fix(tests): resolve a whole lot of test issues

2025-06-28 22:50:40 +00:00 · 2025-06-28 22:50:40 +00:00 · fbf89c213d
parent edd0c7514f
commit fbf89c213d
5 changed files with 615 additions and 55 deletions
--- a/frontend/src/services/tests/api.test.ts
+++ b/frontend/src/services/tests/api.test.ts
@ -7,6 +7,9 @@ const mockList = vi.fn();
 const mockUpload = vi.fn();
 const mockDownload = vi.fn();
 const mockDeleteLowConfidence = vi.fn();
+const mockGetFailedOcrDocuments = vi.fn();
+const mockGetFailedDocuments = vi.fn();
+const mockRetryOcr = vi.fn();

 // Mock the entire api module
 vi.mock('../api', async () => {
@ -19,6 +22,9 @@ vi.mock('../api', async () => {
      upload: mockUpload,
      download: mockDownload,
      deleteLowConfidence: mockDeleteLowConfidence,
+      getFailedOcrDocuments: mockGetFailedOcrDocuments,
+      getFailedDocuments: mockGetFailedDocuments,
+      retryOcr: mockRetryOcr,
    },
  };
 });
@ -490,4 +496,274 @@ describe('documentService.deleteLowConfidence', () => {
      expect(mockDeleteLowConfidence).toHaveBeenCalledWith(confidence, true);
    }
  });
+});
+
+describe('documentService.getFailedOcrDocuments', () => {
+  const mockFailedOcrResponse = {
+    documents: [
+      {
+        id: 'doc-1',
+        filename: 'failed_doc1.pdf',
+        failure_reason: 'low_ocr_confidence',
+        failure_stage: 'ocr',
+        created_at: '2024-01-01T10:00:00Z',
+        retry_count: 1
+      },
+      {
+        id: 'doc-2', 
+        filename: 'failed_doc2.pdf',
+        failure_reason: 'pdf_parsing_error',
+        failure_stage: 'ocr',
+        created_at: '2024-01-01T11:00:00Z',
+        retry_count: 0
+      }
+    ],
+    pagination: {
+      total: 2,
+      limit: 50,
+      offset: 0,
+      has_more: false
+    },
+    statistics: {
+      total_failed: 2,
+      failure_categories: [
+        { reason: 'low_ocr_confidence', display_name: 'Low OCR Confidence', count: 1 },
+        { reason: 'pdf_parsing_error', display_name: 'PDF Parsing Error', count: 1 }
+      ]
+    }
+  };
+
+  it('should fetch failed OCR documents successfully', async () => {
+    const mockResponse = {
+      data: mockFailedOcrResponse,
+      status: 200,
+      statusText: 'OK',
+      headers: {},
+      config: {},
+    };
+
+    mockGetFailedOcrDocuments.mockResolvedValue(mockResponse);
+
+    const result = await documentService.getFailedOcrDocuments(50, 0);
+
+    expect(mockGetFailedOcrDocuments).toHaveBeenCalledWith(50, 0);
+    expect(result.data).toEqual(mockFailedOcrResponse);
+    expect(result.data.documents).toHaveLength(2);
+    expect(result.data.documents[0].failure_stage).toBe('ocr');
+    expect(result.data.pagination.total).toBe(2);
+  });
+
+  it('should handle pagination parameters correctly', async () => {
+    mockGetFailedOcrDocuments.mockResolvedValue({ data: mockFailedOcrResponse });
+
+    await documentService.getFailedOcrDocuments(25, 10);
+
+    expect(mockGetFailedOcrDocuments).toHaveBeenCalledWith(25, 10);
+  });
+
+  it('should use default pagination when not specified', async () => {
+    mockGetFailedOcrDocuments.mockResolvedValue({ data: mockFailedOcrResponse });
+
+    await documentService.getFailedOcrDocuments();
+
+    expect(mockGetFailedOcrDocuments).toHaveBeenCalledWith();
+  });
+
+  it('should handle empty results', async () => {
+    const emptyResponse = {
+      documents: [],
+      pagination: { total: 0, limit: 50, offset: 0, has_more: false },
+      statistics: { total_failed: 0, failure_categories: [] }
+    };
+
+    mockGetFailedOcrDocuments.mockResolvedValue({ data: emptyResponse });
+
+    const result = await documentService.getFailedOcrDocuments();
+
+    expect(result.data.documents).toHaveLength(0);
+    expect(result.data.pagination.total).toBe(0);
+    expect(result.data.statistics.total_failed).toBe(0);
+  });
+
+  it('should handle API errors', async () => {
+    const mockError = new Error('Network error');
+    mockGetFailedOcrDocuments.mockRejectedValue(mockError);
+
+    await expect(documentService.getFailedOcrDocuments()).rejects.toThrow('Network error');
+  });
+});
+
+describe('documentService.getFailedDocuments', () => {
+  const mockFailedDocumentsResponse = {
+    documents: [
+      {
+        id: 'doc-1',
+        filename: 'failed_doc1.pdf',
+        failure_reason: 'low_ocr_confidence',
+        failure_stage: 'ocr',
+        created_at: '2024-01-01T10:00:00Z',
+        retry_count: 1
+      },
+      {
+        id: 'doc-2',
+        filename: 'duplicate_doc.pdf', 
+        failure_reason: 'duplicate_content',
+        failure_stage: 'ingestion',
+        created_at: '2024-01-01T11:00:00Z',
+        retry_count: 0
+      },
+      {
+        id: 'doc-3',
+        filename: 'large_file.pdf',
+        failure_reason: 'file_too_large',
+        failure_stage: 'validation',
+        created_at: '2024-01-01T12:00:00Z',
+        retry_count: 2
+      }
+    ],
+    pagination: {
+      total: 3,
+      limit: 25,
+      offset: 0,
+      has_more: false
+    },
+    statistics: {
+      total_failed: 3,
+      failure_categories: [
+        { reason: 'low_ocr_confidence', display_name: 'Low OCR Confidence', count: 1 },
+        { reason: 'duplicate_content', display_name: 'Duplicate Content', count: 1 },
+        { reason: 'file_too_large', display_name: 'File Too Large', count: 1 }
+      ]
+    }
+  };
+
+  it('should fetch failed documents with default parameters', async () => {
+    mockGetFailedDocuments.mockResolvedValue({ data: mockFailedDocumentsResponse });
+
+    const result = await documentService.getFailedDocuments();
+
+    expect(mockGetFailedDocuments).toHaveBeenCalledWith();
+    expect(result.data).toEqual(mockFailedDocumentsResponse);
+    expect(result.data.documents).toHaveLength(3);
+  });
+
+  it('should filter by stage parameter', async () => {
+    const ocrOnlyResponse = {
+      ...mockFailedDocumentsResponse,
+      documents: [mockFailedDocumentsResponse.documents[0]], // Only OCR failure
+      pagination: { ...mockFailedDocumentsResponse.pagination, total: 1 },
+      statistics: { total_failed: 1, failure_categories: [{ reason: 'low_ocr_confidence', display_name: 'Low OCR Confidence', count: 1 }] }
+    };
+
+    mockGetFailedDocuments.mockResolvedValue({ data: ocrOnlyResponse });
+
+    const result = await documentService.getFailedDocuments(25, 0, 'ocr');
+
+    expect(mockGetFailedDocuments).toHaveBeenCalledWith(25, 0, 'ocr');
+    expect(result.data.documents).toHaveLength(1);
+    expect(result.data.documents[0].failure_stage).toBe('ocr');
+  });
+
+  it('should filter by reason parameter', async () => {
+    const duplicateOnlyResponse = {
+      ...mockFailedDocumentsResponse,
+      documents: [mockFailedDocumentsResponse.documents[1]], // Only duplicate failure
+      pagination: { ...mockFailedDocumentsResponse.pagination, total: 1 },
+      statistics: { total_failed: 1, failure_categories: [{ reason: 'duplicate_content', display_name: 'Duplicate Content', count: 1 }] }
+    };
+
+    mockGetFailedDocuments.mockResolvedValue({ data: duplicateOnlyResponse });
+
+    const result = await documentService.getFailedDocuments(25, 0, undefined, 'duplicate_content');
+
+    expect(mockGetFailedDocuments).toHaveBeenCalledWith(25, 0, undefined, 'duplicate_content');
+    expect(result.data.documents).toHaveLength(1);
+    expect(result.data.documents[0].failure_reason).toBe('duplicate_content');
+  });
+
+  it('should filter by both stage and reason', async () => {
+    const filteredResponse = {
+      ...mockFailedDocumentsResponse,
+      documents: [mockFailedDocumentsResponse.documents[0]], // OCR + low_ocr_confidence
+      pagination: { ...mockFailedDocumentsResponse.pagination, total: 1 },
+      statistics: { total_failed: 1, failure_categories: [{ reason: 'low_ocr_confidence', display_name: 'Low OCR Confidence', count: 1 }] }
+    };
+
+    mockGetFailedDocuments.mockResolvedValue({ data: filteredResponse });
+
+    const result = await documentService.getFailedDocuments(25, 0, 'ocr', 'low_ocr_confidence');
+
+    expect(mockGetFailedDocuments).toHaveBeenCalledWith(25, 0, 'ocr', 'low_ocr_confidence');
+    expect(result.data.documents).toHaveLength(1);
+    expect(result.data.documents[0].failure_stage).toBe('ocr');
+    expect(result.data.documents[0].failure_reason).toBe('low_ocr_confidence');
+  });
+
+  it('should handle custom pagination', async () => {
+    mockGetFailedDocuments.mockResolvedValue({ data: mockFailedDocumentsResponse });
+
+    await documentService.getFailedDocuments(10, 20);
+
+    expect(mockGetFailedDocuments).toHaveBeenCalledWith(10, 20);
+  });
+
+  it('should handle empty results', async () => {
+    const emptyResponse = {
+      documents: [],
+      pagination: { total: 0, limit: 25, offset: 0, has_more: false },
+      statistics: { total_failed: 0, failure_categories: [] }
+    };
+
+    mockGetFailedDocuments.mockResolvedValue({ data: emptyResponse });
+
+    const result = await documentService.getFailedDocuments();
+
+    expect(result.data.documents).toHaveLength(0);
+    expect(result.data.statistics.total_failed).toBe(0);
+  });
+});
+
+describe('documentService.retryOcr', () => {
+  it('should retry OCR for a document successfully', async () => {
+    const mockRetryResponse = {
+      data: {
+        success: true,
+        message: 'OCR retry queued successfully',
+        document_id: 'doc-123'
+      },
+      status: 200,
+      statusText: 'OK',
+      headers: {},
+      config: {},
+    };
+
+    mockRetryOcr.mockResolvedValue(mockRetryResponse);
+
+    const result = await documentService.retryOcr('doc-123');
+
+    expect(mockRetryOcr).toHaveBeenCalledWith('doc-123');
+    expect(result.data.success).toBe(true);
+    expect(result.data.document_id).toBe('doc-123');
+  });
+
+  it('should handle retry errors', async () => {
+    const mockError = {
+      response: {
+        status: 404,
+        data: { error: 'Document not found' }
+      }
+    };
+
+    mockRetryOcr.mockRejectedValue(mockError);
+
+    await expect(documentService.retryOcr('non-existent-doc')).rejects.toMatchObject({
+      response: { status: 404 }
+    });
+  });
+
+  it('should handle network errors', async () => {
+    mockRetryOcr.mockRejectedValue(new Error('Network error'));
+
+    await expect(documentService.retryOcr('doc-123')).rejects.toThrow('Network error');
+  });
 });
--- a/src/tests/failed_documents_unit_tests.rs
+++ b/src/tests/failed_documents_unit_tests.rs
@ -0,0 +1,283 @@
+use crate::db::constraint_validation::ConstraintValidator;
+
+/// Simple unit tests for failed_documents functionality
+/// These tests focus on business logic and constraint validation
+/// without requiring live database connections during compilation
+#[cfg(test)]
+mod failed_documents_unit_tests {
+    use super::*;
+
+    #[test]
+    fn test_constraint_validator_failure_reasons() {
+        // Test all valid failure reasons
+        let valid_reasons = [
+            "duplicate_content", "duplicate_filename", "unsupported_format",
+            "file_too_large", "file_corrupted", "access_denied", 
+            "low_ocr_confidence", "ocr_timeout", "ocr_memory_limit",
+            "pdf_parsing_error", "storage_quota_exceeded", "network_error",
+            "permission_denied", "virus_detected", "invalid_structure",
+            "policy_violation", "other"
+        ];
+
+        for reason in valid_reasons {
+            assert!(
+                ConstraintValidator::validate_failure_reason(reason).is_ok(),
+                "Expected '{}' to be valid",
+                reason
+            );
+        }
+
+        // Test invalid failure reasons
+        let invalid_reasons = [
+            "invalid_reason", "unknown", "timeout", "migration_completed",
+            "", "random_text", "failure", "error"
+        ];
+
+        for reason in invalid_reasons {
+            assert!(
+                ConstraintValidator::validate_failure_reason(reason).is_err(),
+                "Expected '{}' to be invalid",
+                reason
+            );
+        }
+    }
+
+    #[test]
+    fn test_constraint_validator_failure_stages() {
+        // Test all valid failure stages
+        let valid_stages = [
+            "ingestion", "validation", "ocr", "storage", "processing", "sync"
+        ];
+
+        for stage in valid_stages {
+            assert!(
+                ConstraintValidator::validate_failure_stage(stage).is_ok(),
+                "Expected '{}' to be valid",
+                stage
+            );
+        }
+
+        // Test invalid failure stages
+        let invalid_stages = [
+            "invalid_stage", "unknown", "failed", "error", "", "random_text"
+        ];
+
+        for stage in invalid_stages {
+            assert!(
+                ConstraintValidator::validate_failure_stage(stage).is_err(),
+                "Expected '{}' to be invalid",
+                stage
+            );
+        }
+    }
+
+    #[test]
+    fn test_legacy_ocr_failure_mapping() {
+        let test_cases = [
+            (Some("low_ocr_confidence"), "low_ocr_confidence"),
+            (Some("timeout"), "ocr_timeout"),
+            (Some("memory_limit"), "ocr_memory_limit"),
+            (Some("pdf_parsing_error"), "pdf_parsing_error"),
+            (Some("corrupted"), "file_corrupted"),
+            (Some("file_corrupted"), "file_corrupted"),
+            (Some("unsupported_format"), "unsupported_format"),
+            (Some("access_denied"), "access_denied"),
+            (Some("unknown"), "other"),
+            (None, "other"),
+            (Some("unmapped_value"), "other"),
+            (Some(""), "other"),
+        ];
+
+        for (input, expected) in test_cases {
+            let result = ConstraintValidator::map_legacy_ocr_failure_reason(input);
+            assert_eq!(
+                result, expected,
+                "Failed for input: {:?}. Expected '{}', got '{}'",
+                input, expected, result
+            );
+        }
+    }
+
+    #[test]
+    fn test_mapped_legacy_values_are_valid() {
+        // Ensure all mapped legacy values are actually valid according to our constraints
+        let legacy_values = [
+            Some("low_ocr_confidence"),
+            Some("timeout"), 
+            Some("memory_limit"),
+            Some("pdf_parsing_error"),
+            Some("corrupted"),
+            Some("file_corrupted"),
+            Some("unsupported_format"),
+            Some("access_denied"),
+            Some("unknown"),
+            None,
+            Some("random_unmapped_value"),
+        ];
+
+        for legacy_value in legacy_values {
+            let mapped = ConstraintValidator::map_legacy_ocr_failure_reason(legacy_value);
+            assert!(
+                ConstraintValidator::validate_failure_reason(mapped).is_ok(),
+                "Mapped value '{}' from legacy '{:?}' should be valid",
+                mapped, legacy_value
+            );
+        }
+    }
+
+    #[test]
+    fn test_batch_validation() {
+        // Test valid batch
+        let valid_batch = ["other", "low_ocr_confidence", "pdf_parsing_error", "duplicate_content"];
+        assert!(ConstraintValidator::validate_failure_reasons_batch(&valid_batch).is_ok());
+
+        // Test invalid batch
+        let invalid_batch = ["other", "invalid_reason", "timeout", "low_ocr_confidence"];
+        let result = ConstraintValidator::validate_failure_reasons_batch(&invalid_batch);
+        assert!(result.is_err());
+        
+        let errors = result.unwrap_err();
+        assert_eq!(errors.len(), 2); // Should have 2 invalid reasons
+        assert!(errors.iter().any(|e| e.contains("invalid_reason")));
+        assert!(errors.iter().any(|e| e.contains("timeout")));
+
+        // Test empty batch
+        let empty_batch: &[&str] = &[];
+        assert!(ConstraintValidator::validate_failure_reasons_batch(empty_batch).is_ok());
+    }
+
+    #[test]
+    fn test_constraint_error_messages() {
+        let result = ConstraintValidator::validate_failure_reason("invalid_reason");
+        assert!(result.is_err());
+        
+        let error_msg = result.unwrap_err();
+        assert!(error_msg.contains("Invalid failure_reason 'invalid_reason'"));
+        assert!(error_msg.contains("Valid values are:"));
+        assert!(error_msg.contains("low_ocr_confidence"));
+        assert!(error_msg.contains("other"));
+
+        let stage_result = ConstraintValidator::validate_failure_stage("invalid_stage");
+        assert!(stage_result.is_err());
+        
+        let stage_error = stage_result.unwrap_err();
+        assert!(stage_error.contains("Invalid failure_stage 'invalid_stage'"));
+        assert!(stage_error.contains("Valid values are:"));
+        assert!(stage_error.contains("ingestion"));
+        assert!(stage_error.contains("ocr"));
+    }
+
+    #[test]
+    fn test_constraint_validation_comprehensive() {
+        // Test that our enum values comprehensively cover expected failure scenarios
+        
+        // OCR-related failures
+        assert!(ConstraintValidator::validate_failure_reason("low_ocr_confidence").is_ok());
+        assert!(ConstraintValidator::validate_failure_reason("ocr_timeout").is_ok());
+        assert!(ConstraintValidator::validate_failure_reason("ocr_memory_limit").is_ok());
+        assert!(ConstraintValidator::validate_failure_reason("pdf_parsing_error").is_ok());
+
+        // File-related failures
+        assert!(ConstraintValidator::validate_failure_reason("file_too_large").is_ok());
+        assert!(ConstraintValidator::validate_failure_reason("file_corrupted").is_ok());
+        assert!(ConstraintValidator::validate_failure_reason("unsupported_format").is_ok());
+        assert!(ConstraintValidator::validate_failure_reason("access_denied").is_ok());
+
+        // Duplicate detection
+        assert!(ConstraintValidator::validate_failure_reason("duplicate_content").is_ok());
+        assert!(ConstraintValidator::validate_failure_reason("duplicate_filename").is_ok());
+
+        // System-related failures
+        assert!(ConstraintValidator::validate_failure_reason("storage_quota_exceeded").is_ok());
+        assert!(ConstraintValidator::validate_failure_reason("network_error").is_ok());
+        assert!(ConstraintValidator::validate_failure_reason("permission_denied").is_ok());
+
+        // Security-related failures
+        assert!(ConstraintValidator::validate_failure_reason("virus_detected").is_ok());
+        assert!(ConstraintValidator::validate_failure_reason("policy_violation").is_ok());
+        assert!(ConstraintValidator::validate_failure_reason("invalid_structure").is_ok());
+
+        // Fallback
+        assert!(ConstraintValidator::validate_failure_reason("other").is_ok());
+    }
+
+    #[test]
+    fn test_failure_stages_comprehensive() {
+        // Test that our stage enum covers the document processing pipeline
+        
+        // Initial processing stages
+        assert!(ConstraintValidator::validate_failure_stage("ingestion").is_ok());
+        assert!(ConstraintValidator::validate_failure_stage("validation").is_ok());
+        
+        // Core processing stages
+        assert!(ConstraintValidator::validate_failure_stage("ocr").is_ok());
+        assert!(ConstraintValidator::validate_failure_stage("processing").is_ok());
+        
+        // Storage and sync stages
+        assert!(ConstraintValidator::validate_failure_stage("storage").is_ok());
+        assert!(ConstraintValidator::validate_failure_stage("sync").is_ok());
+    }
+
+    #[test]
+    fn test_legacy_mapping_completeness() {
+        // Ensure we handle all possible legacy OCR failure reasons that could exist
+        let legacy_ocr_reasons = [
+            "low_ocr_confidence",
+            "timeout", 
+            "memory_limit",
+            "pdf_parsing_error",
+            "corrupted",
+            "file_corrupted", 
+            "unsupported_format",
+            "access_denied",
+            "unknown",
+            "some_new_unmapped_reason"
+        ];
+
+        for legacy_reason in legacy_ocr_reasons {
+            let mapped = ConstraintValidator::map_legacy_ocr_failure_reason(Some(legacy_reason));
+            
+            // All mapped values should be valid
+            assert!(
+                ConstraintValidator::validate_failure_reason(mapped).is_ok(),
+                "Legacy reason '{}' maps to '{}' which should be valid",
+                legacy_reason, mapped
+            );
+            
+            // Unmapped values should fall back to "other"
+            if !["low_ocr_confidence", "timeout", "memory_limit", "pdf_parsing_error", 
+                  "corrupted", "file_corrupted", "unsupported_format", "access_denied", "unknown"]
+                .contains(&legacy_reason) {
+                assert_eq!(mapped, "other", "Unmapped legacy reason should fall back to 'other'");
+            }
+        }
+    }
+
+    #[test]
+    fn test_case_sensitivity() {
+        // Our validation should be case-sensitive
+        assert!(ConstraintValidator::validate_failure_reason("Low_OCR_Confidence").is_err());
+        assert!(ConstraintValidator::validate_failure_reason("LOW_OCR_CONFIDENCE").is_err());
+        assert!(ConstraintValidator::validate_failure_reason("OCR").is_err());
+        assert!(ConstraintValidator::validate_failure_reason("INGESTION").is_err());
+        
+        // Only exact lowercase matches should work
+        assert!(ConstraintValidator::validate_failure_reason("low_ocr_confidence").is_ok());
+        assert!(ConstraintValidator::validate_failure_stage("ocr").is_ok());
+        assert!(ConstraintValidator::validate_failure_stage("ingestion").is_ok());
+    }
+
+    #[test]
+    fn test_whitespace_handling() {
+        // Validation should not accept values with extra whitespace
+        assert!(ConstraintValidator::validate_failure_reason(" low_ocr_confidence").is_err());
+        assert!(ConstraintValidator::validate_failure_reason("low_ocr_confidence ").is_err());
+        assert!(ConstraintValidator::validate_failure_reason(" low_ocr_confidence ").is_err());
+        assert!(ConstraintValidator::validate_failure_stage(" ocr").is_err());
+        assert!(ConstraintValidator::validate_failure_stage("ocr ").is_err());
+        
+        // Only exact matches should work
+        assert!(ConstraintValidator::validate_failure_reason("low_ocr_confidence").is_ok());
+        assert!(ConstraintValidator::validate_failure_stage("ocr").is_ok());
+    }
+}
--- a/src/tests/migration_constraint_tests.rs
+++ b/src/tests/migration_constraint_tests.rs
@ -1,5 +1,4 @@
 use sqlx::PgPool;
-use crate::tests::helpers::setup_test_db;

 #[cfg(test)]
 mod migration_constraint_tests {
@ -18,17 +17,17 @@ mod migration_constraint_tests {
        ];

        for reason in valid_reasons {
-            let result = sqlx::query!(
+            let result = sqlx::query(
                r#"
                INSERT INTO failed_documents (
                    user_id, filename, failure_reason, failure_stage, ingestion_source
                ) VALUES (
                    gen_random_uuid(), $1, $2, 'validation', 'test'
                )
-                "#,
-                format!("test_file_{}.txt", reason),
-                reason
+                "#
            )
+            .bind(format!("test_file_{}.txt", reason))
+            .bind(reason)
            .execute(&pool)
            .await;

@ -45,17 +44,17 @@ mod migration_constraint_tests {
        ];

        for reason in invalid_reasons {
-            let result = sqlx::query!(
+            let result = sqlx::query(
                r#"
                INSERT INTO failed_documents (
                    user_id, filename, failure_reason, failure_stage, ingestion_source
                ) VALUES (
                    gen_random_uuid(), $1, $2, 'validation', 'test'
                )
-                "#,
-                format!("test_file_{}.txt", reason),
-                reason
+                "#
            )
+            .bind(format!("test_file_{}.txt", reason))
+            .bind(reason)
            .execute(&pool)
            .await;

@ -71,17 +70,17 @@ mod migration_constraint_tests {
        ];

        for stage in valid_stages {
-            let result = sqlx::query!(
+            let result = sqlx::query(
                r#"
                INSERT INTO failed_documents (
                    user_id, filename, failure_reason, failure_stage, ingestion_source
                ) VALUES (
                    gen_random_uuid(), $1, 'other', $2, 'test'
                )
-                "#,
-                format!("test_file_{}.txt", stage),
-                stage
+                "#
            )
+            .bind(format!("test_file_{}.txt", stage))
+            .bind(stage)
            .execute(&pool)
            .await;

@ -123,17 +122,17 @@ mod migration_constraint_tests {
                      input_reason, expected_output);

            // Test that the mapped value works in the database
-            let result = sqlx::query!(
+            let result = sqlx::query(
                r#"
                INSERT INTO failed_documents (
                    user_id, filename, failure_reason, failure_stage, ingestion_source
                ) VALUES (
                    gen_random_uuid(), $1, $2, 'ocr', 'migration'
                )
-                "#,
-                format!("migration_test_{}.txt", input_reason.replace("/", "_")),
-                mapped_reason
+                "#
            )
+            .bind(format!("migration_test_{}.txt", input_reason.replace("/", "_")))
+            .bind(mapped_reason)
            .execute(&pool)
            .await;

--- a/src/tests/migration_integration_tests.rs
+++ b/src/tests/migration_integration_tests.rs
@ -1,4 +1,4 @@
-use sqlx::PgPool;
+use sqlx::{PgPool, Row};
 use uuid::Uuid;

 #[cfg(test)]
@ -22,7 +22,7 @@ mod migration_integration_tests {

        // Insert test documents
        for (filename, failure_reason, error_msg) in &test_documents {
-            sqlx::query!(
+            sqlx::query(
                r#"
                INSERT INTO documents (
                    user_id, filename, original_filename, file_path, file_size, 
@ -31,30 +31,29 @@ mod migration_integration_tests {
                    $1, $2, $2, '/fake/path', 1000, 'application/pdf', 
                    'failed', $3, $4
                )
-                "#,
-                user_id,
-                filename,
-                *failure_reason,
-                error_msg
+                "#
            )
+            .bind(user_id)
+            .bind(filename)
+            .bind(*failure_reason)
+            .bind(error_msg)
            .execute(&pool)
            .await
            .expect("Failed to insert test document");
        }

        // Count documents before migration
-        let before_count = sqlx::query_scalar!(
+        let before_count: i64 = sqlx::query_scalar(
            "SELECT COUNT(*) FROM documents WHERE ocr_status = 'failed'"
        )
        .fetch_one(&pool)
        .await
-        .expect("Failed to count documents")
-        .unwrap_or(0);
+        .expect("Failed to count documents");

        assert_eq!(before_count, test_documents.len() as i64);

        // Simulate the migration logic
-        let migration_result = sqlx::query!(
+        let migration_result = sqlx::query(
            r#"
            INSERT INTO failed_documents (
                user_id, filename, original_filename, file_path, file_size,
@ -87,13 +86,12 @@ mod migration_integration_tests {
        assert!(migration_result.is_ok(), "Migration should succeed");

        // Verify all documents were migrated
-        let migrated_count = sqlx::query_scalar!(
+        let migrated_count: i64 = sqlx::query_scalar(
            "SELECT COUNT(*) FROM failed_documents WHERE ingestion_source = 'migration'"
        )
        .fetch_one(&pool)
        .await
-        .expect("Failed to count migrated documents")
-        .unwrap_or(0);
+        .expect("Failed to count migrated documents");

        assert_eq!(migrated_count, test_documents.len() as i64);

@ -108,24 +106,24 @@ mod migration_integration_tests {
        ];

        for (filename, expected_reason) in mapping_tests {
-            let actual_reason = sqlx::query_scalar!(
-                "SELECT failure_reason FROM failed_documents WHERE filename = $1",
-                filename
+            let actual_reason: String = sqlx::query_scalar(
+                "SELECT failure_reason FROM failed_documents WHERE filename = $1"
            )
+            .bind(filename)
            .fetch_one(&pool)
            .await
            .expect("Failed to fetch failure reason");

            assert_eq!(
-                actual_reason.as_deref(),
-                Some(expected_reason),
+                actual_reason,
+                expected_reason,
                "Incorrect mapping for {}",
                filename
            );
        }

        // Test deletion of original failed documents
-        let delete_result = sqlx::query!(
+        let delete_result = sqlx::query(
            "DELETE FROM documents WHERE ocr_status = 'failed'"
        )
        .execute(&pool)
@ -134,18 +132,17 @@ mod migration_integration_tests {
        assert!(delete_result.is_ok(), "Delete should succeed");

        // Verify cleanup
-        let remaining_failed = sqlx::query_scalar!(
+        let remaining_failed: i64 = sqlx::query_scalar(
            "SELECT COUNT(*) FROM documents WHERE ocr_status = 'failed'"
        )
        .fetch_one(&pool)
        .await
-        .expect("Failed to count remaining documents")
-        .unwrap_or(0);
+        .expect("Failed to count remaining documents");

        assert_eq!(remaining_failed, 0);

        // Verify failed_documents table integrity
-        let failed_docs = sqlx::query!(
+        let failed_docs = sqlx::query(
            "SELECT filename, failure_reason, failure_stage FROM failed_documents ORDER BY filename"
        )
        .fetch_all(&pool)
@ -156,11 +153,13 @@ mod migration_integration_tests {

        for doc in &failed_docs {
            // All should have proper stage
-            assert_eq!(doc.failure_stage, "ocr");
+            let stage: String = doc.get("failure_stage");
+            assert_eq!(stage, "ocr");
            
            // All should have valid failure_reason
+            let reason: String = doc.get("failure_reason");
            assert!(matches!(
-                doc.failure_reason.as_str(),
+                reason.as_str(),
                "low_ocr_confidence" | "ocr_timeout" | "ocr_memory_limit" | 
                "file_corrupted" | "other"
            ));
@ -181,7 +180,7 @@ mod migration_integration_tests {
        ];

        for (filename, failure_reason, error_msg) in &edge_cases {
-            sqlx::query!(
+            sqlx::query(
                r#"
                INSERT INTO documents (
                    user_id, filename, original_filename, file_path, file_size, 
@ -190,19 +189,19 @@ mod migration_integration_tests {
                    $1, $2, $2, '/fake/path', 1000, 'application/pdf', 
                    'failed', $3, $4
                )
-                "#,
-                user_id,
-                filename,
-                *failure_reason,
-                error_msg
+                "#
            )
+            .bind(user_id)
+            .bind(filename)
+            .bind(*failure_reason)
+            .bind(error_msg)
            .execute(&pool)
            .await
            .expect("Failed to insert edge case document");
        }

        // Run migration on edge cases
-        let migration_result = sqlx::query!(
+        let migration_result = sqlx::query(
            r#"
            INSERT INTO failed_documents (
                user_id, filename, failure_reason, failure_stage, ingestion_source
@ -231,7 +230,7 @@ mod migration_integration_tests {
        assert!(migration_result.is_ok(), "Migration should handle edge cases");

        // Verify all edge cases mapped to 'other' (since they're not in our mapping)
-        let edge_case_mappings = sqlx::query!(
+        let edge_case_mappings = sqlx::query(
            "SELECT filename, failure_reason FROM failed_documents WHERE ingestion_source = 'migration_edge_test'"
        )
        .fetch_all(&pool)
@ -239,8 +238,10 @@ mod migration_integration_tests {
        .expect("Failed to fetch edge case mappings");

        for mapping in edge_case_mappings {
-            assert_eq!(mapping.failure_reason, "other", 
-                      "Edge case '{}' should map to 'other'", mapping.filename);
+            let filename: String = mapping.get("filename");
+            let failure_reason: String = mapping.get("failure_reason");
+            assert_eq!(failure_reason, "other", 
+                      "Edge case '{}' should map to 'other'", filename);
        }
    }

@ -250,7 +251,7 @@ mod migration_integration_tests {
        // during migration, the constraints will catch it

        // Try to insert data that violates constraints
-        let invalid_insert = sqlx::query!(
+        let invalid_insert = sqlx::query(
            r#"
            INSERT INTO failed_documents (
                user_id, filename, failure_reason, failure_stage, ingestion_source
--- a/src/tests/mod.rs
+++ b/src/tests/mod.rs
@ -18,4 +18,5 @@ mod sql_type_safety_tests;
 mod users_tests;
 mod generic_migration_tests;
 mod migration_constraint_tests;
-mod migration_integration_tests; 
+mod migration_integration_tests;
+mod failed_documents_unit_tests;