From fbf89c213daad69548b43def0a61c58c69e40cf6 Mon Sep 17 00:00:00 2001 From: perf3ct Date: Sat, 28 Jun 2025 22:50:40 +0000 Subject: [PATCH] fix(tests): resolve a whole lot of test issues --- frontend/src/services/__tests__/api.test.ts | 276 +++++++++++++++++++ src/tests/failed_documents_unit_tests.rs | 283 ++++++++++++++++++++ src/tests/migration_constraint_tests.rs | 33 ++- src/tests/migration_integration_tests.rs | 75 +++--- src/tests/mod.rs | 3 +- 5 files changed, 615 insertions(+), 55 deletions(-) create mode 100644 src/tests/failed_documents_unit_tests.rs diff --git a/frontend/src/services/__tests__/api.test.ts b/frontend/src/services/__tests__/api.test.ts index 719cb51..4763f33 100644 --- a/frontend/src/services/__tests__/api.test.ts +++ b/frontend/src/services/__tests__/api.test.ts @@ -7,6 +7,9 @@ const mockList = vi.fn(); const mockUpload = vi.fn(); const mockDownload = vi.fn(); const mockDeleteLowConfidence = vi.fn(); +const mockGetFailedOcrDocuments = vi.fn(); +const mockGetFailedDocuments = vi.fn(); +const mockRetryOcr = vi.fn(); // Mock the entire api module vi.mock('../api', async () => { @@ -19,6 +22,9 @@ vi.mock('../api', async () => { upload: mockUpload, download: mockDownload, deleteLowConfidence: mockDeleteLowConfidence, + getFailedOcrDocuments: mockGetFailedOcrDocuments, + getFailedDocuments: mockGetFailedDocuments, + retryOcr: mockRetryOcr, }, }; }); @@ -490,4 +496,274 @@ describe('documentService.deleteLowConfidence', () => { expect(mockDeleteLowConfidence).toHaveBeenCalledWith(confidence, true); } }); +}); + +describe('documentService.getFailedOcrDocuments', () => { + const mockFailedOcrResponse = { + documents: [ + { + id: 'doc-1', + filename: 'failed_doc1.pdf', + failure_reason: 'low_ocr_confidence', + failure_stage: 'ocr', + created_at: '2024-01-01T10:00:00Z', + retry_count: 1 + }, + { + id: 'doc-2', + filename: 'failed_doc2.pdf', + failure_reason: 'pdf_parsing_error', + failure_stage: 'ocr', + created_at: '2024-01-01T11:00:00Z', + retry_count: 0 + } + ], + pagination: { + total: 2, + limit: 50, + offset: 0, + has_more: false + }, + statistics: { + total_failed: 2, + failure_categories: [ + { reason: 'low_ocr_confidence', display_name: 'Low OCR Confidence', count: 1 }, + { reason: 'pdf_parsing_error', display_name: 'PDF Parsing Error', count: 1 } + ] + } + }; + + it('should fetch failed OCR documents successfully', async () => { + const mockResponse = { + data: mockFailedOcrResponse, + status: 200, + statusText: 'OK', + headers: {}, + config: {}, + }; + + mockGetFailedOcrDocuments.mockResolvedValue(mockResponse); + + const result = await documentService.getFailedOcrDocuments(50, 0); + + expect(mockGetFailedOcrDocuments).toHaveBeenCalledWith(50, 0); + expect(result.data).toEqual(mockFailedOcrResponse); + expect(result.data.documents).toHaveLength(2); + expect(result.data.documents[0].failure_stage).toBe('ocr'); + expect(result.data.pagination.total).toBe(2); + }); + + it('should handle pagination parameters correctly', async () => { + mockGetFailedOcrDocuments.mockResolvedValue({ data: mockFailedOcrResponse }); + + await documentService.getFailedOcrDocuments(25, 10); + + expect(mockGetFailedOcrDocuments).toHaveBeenCalledWith(25, 10); + }); + + it('should use default pagination when not specified', async () => { + mockGetFailedOcrDocuments.mockResolvedValue({ data: mockFailedOcrResponse }); + + await documentService.getFailedOcrDocuments(); + + expect(mockGetFailedOcrDocuments).toHaveBeenCalledWith(); + }); + + it('should handle empty results', async () => { + const emptyResponse = { + documents: [], + pagination: { total: 0, limit: 50, offset: 0, has_more: false }, + statistics: { total_failed: 0, failure_categories: [] } + }; + + mockGetFailedOcrDocuments.mockResolvedValue({ data: emptyResponse }); + + const result = await documentService.getFailedOcrDocuments(); + + expect(result.data.documents).toHaveLength(0); + expect(result.data.pagination.total).toBe(0); + expect(result.data.statistics.total_failed).toBe(0); + }); + + it('should handle API errors', async () => { + const mockError = new Error('Network error'); + mockGetFailedOcrDocuments.mockRejectedValue(mockError); + + await expect(documentService.getFailedOcrDocuments()).rejects.toThrow('Network error'); + }); +}); + +describe('documentService.getFailedDocuments', () => { + const mockFailedDocumentsResponse = { + documents: [ + { + id: 'doc-1', + filename: 'failed_doc1.pdf', + failure_reason: 'low_ocr_confidence', + failure_stage: 'ocr', + created_at: '2024-01-01T10:00:00Z', + retry_count: 1 + }, + { + id: 'doc-2', + filename: 'duplicate_doc.pdf', + failure_reason: 'duplicate_content', + failure_stage: 'ingestion', + created_at: '2024-01-01T11:00:00Z', + retry_count: 0 + }, + { + id: 'doc-3', + filename: 'large_file.pdf', + failure_reason: 'file_too_large', + failure_stage: 'validation', + created_at: '2024-01-01T12:00:00Z', + retry_count: 2 + } + ], + pagination: { + total: 3, + limit: 25, + offset: 0, + has_more: false + }, + statistics: { + total_failed: 3, + failure_categories: [ + { reason: 'low_ocr_confidence', display_name: 'Low OCR Confidence', count: 1 }, + { reason: 'duplicate_content', display_name: 'Duplicate Content', count: 1 }, + { reason: 'file_too_large', display_name: 'File Too Large', count: 1 } + ] + } + }; + + it('should fetch failed documents with default parameters', async () => { + mockGetFailedDocuments.mockResolvedValue({ data: mockFailedDocumentsResponse }); + + const result = await documentService.getFailedDocuments(); + + expect(mockGetFailedDocuments).toHaveBeenCalledWith(); + expect(result.data).toEqual(mockFailedDocumentsResponse); + expect(result.data.documents).toHaveLength(3); + }); + + it('should filter by stage parameter', async () => { + const ocrOnlyResponse = { + ...mockFailedDocumentsResponse, + documents: [mockFailedDocumentsResponse.documents[0]], // Only OCR failure + pagination: { ...mockFailedDocumentsResponse.pagination, total: 1 }, + statistics: { total_failed: 1, failure_categories: [{ reason: 'low_ocr_confidence', display_name: 'Low OCR Confidence', count: 1 }] } + }; + + mockGetFailedDocuments.mockResolvedValue({ data: ocrOnlyResponse }); + + const result = await documentService.getFailedDocuments(25, 0, 'ocr'); + + expect(mockGetFailedDocuments).toHaveBeenCalledWith(25, 0, 'ocr'); + expect(result.data.documents).toHaveLength(1); + expect(result.data.documents[0].failure_stage).toBe('ocr'); + }); + + it('should filter by reason parameter', async () => { + const duplicateOnlyResponse = { + ...mockFailedDocumentsResponse, + documents: [mockFailedDocumentsResponse.documents[1]], // Only duplicate failure + pagination: { ...mockFailedDocumentsResponse.pagination, total: 1 }, + statistics: { total_failed: 1, failure_categories: [{ reason: 'duplicate_content', display_name: 'Duplicate Content', count: 1 }] } + }; + + mockGetFailedDocuments.mockResolvedValue({ data: duplicateOnlyResponse }); + + const result = await documentService.getFailedDocuments(25, 0, undefined, 'duplicate_content'); + + expect(mockGetFailedDocuments).toHaveBeenCalledWith(25, 0, undefined, 'duplicate_content'); + expect(result.data.documents).toHaveLength(1); + expect(result.data.documents[0].failure_reason).toBe('duplicate_content'); + }); + + it('should filter by both stage and reason', async () => { + const filteredResponse = { + ...mockFailedDocumentsResponse, + documents: [mockFailedDocumentsResponse.documents[0]], // OCR + low_ocr_confidence + pagination: { ...mockFailedDocumentsResponse.pagination, total: 1 }, + statistics: { total_failed: 1, failure_categories: [{ reason: 'low_ocr_confidence', display_name: 'Low OCR Confidence', count: 1 }] } + }; + + mockGetFailedDocuments.mockResolvedValue({ data: filteredResponse }); + + const result = await documentService.getFailedDocuments(25, 0, 'ocr', 'low_ocr_confidence'); + + expect(mockGetFailedDocuments).toHaveBeenCalledWith(25, 0, 'ocr', 'low_ocr_confidence'); + expect(result.data.documents).toHaveLength(1); + expect(result.data.documents[0].failure_stage).toBe('ocr'); + expect(result.data.documents[0].failure_reason).toBe('low_ocr_confidence'); + }); + + it('should handle custom pagination', async () => { + mockGetFailedDocuments.mockResolvedValue({ data: mockFailedDocumentsResponse }); + + await documentService.getFailedDocuments(10, 20); + + expect(mockGetFailedDocuments).toHaveBeenCalledWith(10, 20); + }); + + it('should handle empty results', async () => { + const emptyResponse = { + documents: [], + pagination: { total: 0, limit: 25, offset: 0, has_more: false }, + statistics: { total_failed: 0, failure_categories: [] } + }; + + mockGetFailedDocuments.mockResolvedValue({ data: emptyResponse }); + + const result = await documentService.getFailedDocuments(); + + expect(result.data.documents).toHaveLength(0); + expect(result.data.statistics.total_failed).toBe(0); + }); +}); + +describe('documentService.retryOcr', () => { + it('should retry OCR for a document successfully', async () => { + const mockRetryResponse = { + data: { + success: true, + message: 'OCR retry queued successfully', + document_id: 'doc-123' + }, + status: 200, + statusText: 'OK', + headers: {}, + config: {}, + }; + + mockRetryOcr.mockResolvedValue(mockRetryResponse); + + const result = await documentService.retryOcr('doc-123'); + + expect(mockRetryOcr).toHaveBeenCalledWith('doc-123'); + expect(result.data.success).toBe(true); + expect(result.data.document_id).toBe('doc-123'); + }); + + it('should handle retry errors', async () => { + const mockError = { + response: { + status: 404, + data: { error: 'Document not found' } + } + }; + + mockRetryOcr.mockRejectedValue(mockError); + + await expect(documentService.retryOcr('non-existent-doc')).rejects.toMatchObject({ + response: { status: 404 } + }); + }); + + it('should handle network errors', async () => { + mockRetryOcr.mockRejectedValue(new Error('Network error')); + + await expect(documentService.retryOcr('doc-123')).rejects.toThrow('Network error'); + }); }); \ No newline at end of file diff --git a/src/tests/failed_documents_unit_tests.rs b/src/tests/failed_documents_unit_tests.rs new file mode 100644 index 0000000..d15d6b7 --- /dev/null +++ b/src/tests/failed_documents_unit_tests.rs @@ -0,0 +1,283 @@ +use crate::db::constraint_validation::ConstraintValidator; + +/// Simple unit tests for failed_documents functionality +/// These tests focus on business logic and constraint validation +/// without requiring live database connections during compilation +#[cfg(test)] +mod failed_documents_unit_tests { + use super::*; + + #[test] + fn test_constraint_validator_failure_reasons() { + // Test all valid failure reasons + let valid_reasons = [ + "duplicate_content", "duplicate_filename", "unsupported_format", + "file_too_large", "file_corrupted", "access_denied", + "low_ocr_confidence", "ocr_timeout", "ocr_memory_limit", + "pdf_parsing_error", "storage_quota_exceeded", "network_error", + "permission_denied", "virus_detected", "invalid_structure", + "policy_violation", "other" + ]; + + for reason in valid_reasons { + assert!( + ConstraintValidator::validate_failure_reason(reason).is_ok(), + "Expected '{}' to be valid", + reason + ); + } + + // Test invalid failure reasons + let invalid_reasons = [ + "invalid_reason", "unknown", "timeout", "migration_completed", + "", "random_text", "failure", "error" + ]; + + for reason in invalid_reasons { + assert!( + ConstraintValidator::validate_failure_reason(reason).is_err(), + "Expected '{}' to be invalid", + reason + ); + } + } + + #[test] + fn test_constraint_validator_failure_stages() { + // Test all valid failure stages + let valid_stages = [ + "ingestion", "validation", "ocr", "storage", "processing", "sync" + ]; + + for stage in valid_stages { + assert!( + ConstraintValidator::validate_failure_stage(stage).is_ok(), + "Expected '{}' to be valid", + stage + ); + } + + // Test invalid failure stages + let invalid_stages = [ + "invalid_stage", "unknown", "failed", "error", "", "random_text" + ]; + + for stage in invalid_stages { + assert!( + ConstraintValidator::validate_failure_stage(stage).is_err(), + "Expected '{}' to be invalid", + stage + ); + } + } + + #[test] + fn test_legacy_ocr_failure_mapping() { + let test_cases = [ + (Some("low_ocr_confidence"), "low_ocr_confidence"), + (Some("timeout"), "ocr_timeout"), + (Some("memory_limit"), "ocr_memory_limit"), + (Some("pdf_parsing_error"), "pdf_parsing_error"), + (Some("corrupted"), "file_corrupted"), + (Some("file_corrupted"), "file_corrupted"), + (Some("unsupported_format"), "unsupported_format"), + (Some("access_denied"), "access_denied"), + (Some("unknown"), "other"), + (None, "other"), + (Some("unmapped_value"), "other"), + (Some(""), "other"), + ]; + + for (input, expected) in test_cases { + let result = ConstraintValidator::map_legacy_ocr_failure_reason(input); + assert_eq!( + result, expected, + "Failed for input: {:?}. Expected '{}', got '{}'", + input, expected, result + ); + } + } + + #[test] + fn test_mapped_legacy_values_are_valid() { + // Ensure all mapped legacy values are actually valid according to our constraints + let legacy_values = [ + Some("low_ocr_confidence"), + Some("timeout"), + Some("memory_limit"), + Some("pdf_parsing_error"), + Some("corrupted"), + Some("file_corrupted"), + Some("unsupported_format"), + Some("access_denied"), + Some("unknown"), + None, + Some("random_unmapped_value"), + ]; + + for legacy_value in legacy_values { + let mapped = ConstraintValidator::map_legacy_ocr_failure_reason(legacy_value); + assert!( + ConstraintValidator::validate_failure_reason(mapped).is_ok(), + "Mapped value '{}' from legacy '{:?}' should be valid", + mapped, legacy_value + ); + } + } + + #[test] + fn test_batch_validation() { + // Test valid batch + let valid_batch = ["other", "low_ocr_confidence", "pdf_parsing_error", "duplicate_content"]; + assert!(ConstraintValidator::validate_failure_reasons_batch(&valid_batch).is_ok()); + + // Test invalid batch + let invalid_batch = ["other", "invalid_reason", "timeout", "low_ocr_confidence"]; + let result = ConstraintValidator::validate_failure_reasons_batch(&invalid_batch); + assert!(result.is_err()); + + let errors = result.unwrap_err(); + assert_eq!(errors.len(), 2); // Should have 2 invalid reasons + assert!(errors.iter().any(|e| e.contains("invalid_reason"))); + assert!(errors.iter().any(|e| e.contains("timeout"))); + + // Test empty batch + let empty_batch: &[&str] = &[]; + assert!(ConstraintValidator::validate_failure_reasons_batch(empty_batch).is_ok()); + } + + #[test] + fn test_constraint_error_messages() { + let result = ConstraintValidator::validate_failure_reason("invalid_reason"); + assert!(result.is_err()); + + let error_msg = result.unwrap_err(); + assert!(error_msg.contains("Invalid failure_reason 'invalid_reason'")); + assert!(error_msg.contains("Valid values are:")); + assert!(error_msg.contains("low_ocr_confidence")); + assert!(error_msg.contains("other")); + + let stage_result = ConstraintValidator::validate_failure_stage("invalid_stage"); + assert!(stage_result.is_err()); + + let stage_error = stage_result.unwrap_err(); + assert!(stage_error.contains("Invalid failure_stage 'invalid_stage'")); + assert!(stage_error.contains("Valid values are:")); + assert!(stage_error.contains("ingestion")); + assert!(stage_error.contains("ocr")); + } + + #[test] + fn test_constraint_validation_comprehensive() { + // Test that our enum values comprehensively cover expected failure scenarios + + // OCR-related failures + assert!(ConstraintValidator::validate_failure_reason("low_ocr_confidence").is_ok()); + assert!(ConstraintValidator::validate_failure_reason("ocr_timeout").is_ok()); + assert!(ConstraintValidator::validate_failure_reason("ocr_memory_limit").is_ok()); + assert!(ConstraintValidator::validate_failure_reason("pdf_parsing_error").is_ok()); + + // File-related failures + assert!(ConstraintValidator::validate_failure_reason("file_too_large").is_ok()); + assert!(ConstraintValidator::validate_failure_reason("file_corrupted").is_ok()); + assert!(ConstraintValidator::validate_failure_reason("unsupported_format").is_ok()); + assert!(ConstraintValidator::validate_failure_reason("access_denied").is_ok()); + + // Duplicate detection + assert!(ConstraintValidator::validate_failure_reason("duplicate_content").is_ok()); + assert!(ConstraintValidator::validate_failure_reason("duplicate_filename").is_ok()); + + // System-related failures + assert!(ConstraintValidator::validate_failure_reason("storage_quota_exceeded").is_ok()); + assert!(ConstraintValidator::validate_failure_reason("network_error").is_ok()); + assert!(ConstraintValidator::validate_failure_reason("permission_denied").is_ok()); + + // Security-related failures + assert!(ConstraintValidator::validate_failure_reason("virus_detected").is_ok()); + assert!(ConstraintValidator::validate_failure_reason("policy_violation").is_ok()); + assert!(ConstraintValidator::validate_failure_reason("invalid_structure").is_ok()); + + // Fallback + assert!(ConstraintValidator::validate_failure_reason("other").is_ok()); + } + + #[test] + fn test_failure_stages_comprehensive() { + // Test that our stage enum covers the document processing pipeline + + // Initial processing stages + assert!(ConstraintValidator::validate_failure_stage("ingestion").is_ok()); + assert!(ConstraintValidator::validate_failure_stage("validation").is_ok()); + + // Core processing stages + assert!(ConstraintValidator::validate_failure_stage("ocr").is_ok()); + assert!(ConstraintValidator::validate_failure_stage("processing").is_ok()); + + // Storage and sync stages + assert!(ConstraintValidator::validate_failure_stage("storage").is_ok()); + assert!(ConstraintValidator::validate_failure_stage("sync").is_ok()); + } + + #[test] + fn test_legacy_mapping_completeness() { + // Ensure we handle all possible legacy OCR failure reasons that could exist + let legacy_ocr_reasons = [ + "low_ocr_confidence", + "timeout", + "memory_limit", + "pdf_parsing_error", + "corrupted", + "file_corrupted", + "unsupported_format", + "access_denied", + "unknown", + "some_new_unmapped_reason" + ]; + + for legacy_reason in legacy_ocr_reasons { + let mapped = ConstraintValidator::map_legacy_ocr_failure_reason(Some(legacy_reason)); + + // All mapped values should be valid + assert!( + ConstraintValidator::validate_failure_reason(mapped).is_ok(), + "Legacy reason '{}' maps to '{}' which should be valid", + legacy_reason, mapped + ); + + // Unmapped values should fall back to "other" + if !["low_ocr_confidence", "timeout", "memory_limit", "pdf_parsing_error", + "corrupted", "file_corrupted", "unsupported_format", "access_denied", "unknown"] + .contains(&legacy_reason) { + assert_eq!(mapped, "other", "Unmapped legacy reason should fall back to 'other'"); + } + } + } + + #[test] + fn test_case_sensitivity() { + // Our validation should be case-sensitive + assert!(ConstraintValidator::validate_failure_reason("Low_OCR_Confidence").is_err()); + assert!(ConstraintValidator::validate_failure_reason("LOW_OCR_CONFIDENCE").is_err()); + assert!(ConstraintValidator::validate_failure_reason("OCR").is_err()); + assert!(ConstraintValidator::validate_failure_reason("INGESTION").is_err()); + + // Only exact lowercase matches should work + assert!(ConstraintValidator::validate_failure_reason("low_ocr_confidence").is_ok()); + assert!(ConstraintValidator::validate_failure_stage("ocr").is_ok()); + assert!(ConstraintValidator::validate_failure_stage("ingestion").is_ok()); + } + + #[test] + fn test_whitespace_handling() { + // Validation should not accept values with extra whitespace + assert!(ConstraintValidator::validate_failure_reason(" low_ocr_confidence").is_err()); + assert!(ConstraintValidator::validate_failure_reason("low_ocr_confidence ").is_err()); + assert!(ConstraintValidator::validate_failure_reason(" low_ocr_confidence ").is_err()); + assert!(ConstraintValidator::validate_failure_stage(" ocr").is_err()); + assert!(ConstraintValidator::validate_failure_stage("ocr ").is_err()); + + // Only exact matches should work + assert!(ConstraintValidator::validate_failure_reason("low_ocr_confidence").is_ok()); + assert!(ConstraintValidator::validate_failure_stage("ocr").is_ok()); + } +} \ No newline at end of file diff --git a/src/tests/migration_constraint_tests.rs b/src/tests/migration_constraint_tests.rs index 82382f6..980514a 100644 --- a/src/tests/migration_constraint_tests.rs +++ b/src/tests/migration_constraint_tests.rs @@ -1,5 +1,4 @@ use sqlx::PgPool; -use crate::tests::helpers::setup_test_db; #[cfg(test)] mod migration_constraint_tests { @@ -18,17 +17,17 @@ mod migration_constraint_tests { ]; for reason in valid_reasons { - let result = sqlx::query!( + let result = sqlx::query( r#" INSERT INTO failed_documents ( user_id, filename, failure_reason, failure_stage, ingestion_source ) VALUES ( gen_random_uuid(), $1, $2, 'validation', 'test' ) - "#, - format!("test_file_{}.txt", reason), - reason + "# ) + .bind(format!("test_file_{}.txt", reason)) + .bind(reason) .execute(&pool) .await; @@ -45,17 +44,17 @@ mod migration_constraint_tests { ]; for reason in invalid_reasons { - let result = sqlx::query!( + let result = sqlx::query( r#" INSERT INTO failed_documents ( user_id, filename, failure_reason, failure_stage, ingestion_source ) VALUES ( gen_random_uuid(), $1, $2, 'validation', 'test' ) - "#, - format!("test_file_{}.txt", reason), - reason + "# ) + .bind(format!("test_file_{}.txt", reason)) + .bind(reason) .execute(&pool) .await; @@ -71,17 +70,17 @@ mod migration_constraint_tests { ]; for stage in valid_stages { - let result = sqlx::query!( + let result = sqlx::query( r#" INSERT INTO failed_documents ( user_id, filename, failure_reason, failure_stage, ingestion_source ) VALUES ( gen_random_uuid(), $1, 'other', $2, 'test' ) - "#, - format!("test_file_{}.txt", stage), - stage + "# ) + .bind(format!("test_file_{}.txt", stage)) + .bind(stage) .execute(&pool) .await; @@ -123,17 +122,17 @@ mod migration_constraint_tests { input_reason, expected_output); // Test that the mapped value works in the database - let result = sqlx::query!( + let result = sqlx::query( r#" INSERT INTO failed_documents ( user_id, filename, failure_reason, failure_stage, ingestion_source ) VALUES ( gen_random_uuid(), $1, $2, 'ocr', 'migration' ) - "#, - format!("migration_test_{}.txt", input_reason.replace("/", "_")), - mapped_reason + "# ) + .bind(format!("migration_test_{}.txt", input_reason.replace("/", "_"))) + .bind(mapped_reason) .execute(&pool) .await; diff --git a/src/tests/migration_integration_tests.rs b/src/tests/migration_integration_tests.rs index 0031caa..b5897a2 100644 --- a/src/tests/migration_integration_tests.rs +++ b/src/tests/migration_integration_tests.rs @@ -1,4 +1,4 @@ -use sqlx::PgPool; +use sqlx::{PgPool, Row}; use uuid::Uuid; #[cfg(test)] @@ -22,7 +22,7 @@ mod migration_integration_tests { // Insert test documents for (filename, failure_reason, error_msg) in &test_documents { - sqlx::query!( + sqlx::query( r#" INSERT INTO documents ( user_id, filename, original_filename, file_path, file_size, @@ -31,30 +31,29 @@ mod migration_integration_tests { $1, $2, $2, '/fake/path', 1000, 'application/pdf', 'failed', $3, $4 ) - "#, - user_id, - filename, - *failure_reason, - error_msg + "# ) + .bind(user_id) + .bind(filename) + .bind(*failure_reason) + .bind(error_msg) .execute(&pool) .await .expect("Failed to insert test document"); } // Count documents before migration - let before_count = sqlx::query_scalar!( + let before_count: i64 = sqlx::query_scalar( "SELECT COUNT(*) FROM documents WHERE ocr_status = 'failed'" ) .fetch_one(&pool) .await - .expect("Failed to count documents") - .unwrap_or(0); + .expect("Failed to count documents"); assert_eq!(before_count, test_documents.len() as i64); // Simulate the migration logic - let migration_result = sqlx::query!( + let migration_result = sqlx::query( r#" INSERT INTO failed_documents ( user_id, filename, original_filename, file_path, file_size, @@ -87,13 +86,12 @@ mod migration_integration_tests { assert!(migration_result.is_ok(), "Migration should succeed"); // Verify all documents were migrated - let migrated_count = sqlx::query_scalar!( + let migrated_count: i64 = sqlx::query_scalar( "SELECT COUNT(*) FROM failed_documents WHERE ingestion_source = 'migration'" ) .fetch_one(&pool) .await - .expect("Failed to count migrated documents") - .unwrap_or(0); + .expect("Failed to count migrated documents"); assert_eq!(migrated_count, test_documents.len() as i64); @@ -108,24 +106,24 @@ mod migration_integration_tests { ]; for (filename, expected_reason) in mapping_tests { - let actual_reason = sqlx::query_scalar!( - "SELECT failure_reason FROM failed_documents WHERE filename = $1", - filename + let actual_reason: String = sqlx::query_scalar( + "SELECT failure_reason FROM failed_documents WHERE filename = $1" ) + .bind(filename) .fetch_one(&pool) .await .expect("Failed to fetch failure reason"); assert_eq!( - actual_reason.as_deref(), - Some(expected_reason), + actual_reason, + expected_reason, "Incorrect mapping for {}", filename ); } // Test deletion of original failed documents - let delete_result = sqlx::query!( + let delete_result = sqlx::query( "DELETE FROM documents WHERE ocr_status = 'failed'" ) .execute(&pool) @@ -134,18 +132,17 @@ mod migration_integration_tests { assert!(delete_result.is_ok(), "Delete should succeed"); // Verify cleanup - let remaining_failed = sqlx::query_scalar!( + let remaining_failed: i64 = sqlx::query_scalar( "SELECT COUNT(*) FROM documents WHERE ocr_status = 'failed'" ) .fetch_one(&pool) .await - .expect("Failed to count remaining documents") - .unwrap_or(0); + .expect("Failed to count remaining documents"); assert_eq!(remaining_failed, 0); // Verify failed_documents table integrity - let failed_docs = sqlx::query!( + let failed_docs = sqlx::query( "SELECT filename, failure_reason, failure_stage FROM failed_documents ORDER BY filename" ) .fetch_all(&pool) @@ -156,11 +153,13 @@ mod migration_integration_tests { for doc in &failed_docs { // All should have proper stage - assert_eq!(doc.failure_stage, "ocr"); + let stage: String = doc.get("failure_stage"); + assert_eq!(stage, "ocr"); // All should have valid failure_reason + let reason: String = doc.get("failure_reason"); assert!(matches!( - doc.failure_reason.as_str(), + reason.as_str(), "low_ocr_confidence" | "ocr_timeout" | "ocr_memory_limit" | "file_corrupted" | "other" )); @@ -181,7 +180,7 @@ mod migration_integration_tests { ]; for (filename, failure_reason, error_msg) in &edge_cases { - sqlx::query!( + sqlx::query( r#" INSERT INTO documents ( user_id, filename, original_filename, file_path, file_size, @@ -190,19 +189,19 @@ mod migration_integration_tests { $1, $2, $2, '/fake/path', 1000, 'application/pdf', 'failed', $3, $4 ) - "#, - user_id, - filename, - *failure_reason, - error_msg + "# ) + .bind(user_id) + .bind(filename) + .bind(*failure_reason) + .bind(error_msg) .execute(&pool) .await .expect("Failed to insert edge case document"); } // Run migration on edge cases - let migration_result = sqlx::query!( + let migration_result = sqlx::query( r#" INSERT INTO failed_documents ( user_id, filename, failure_reason, failure_stage, ingestion_source @@ -231,7 +230,7 @@ mod migration_integration_tests { assert!(migration_result.is_ok(), "Migration should handle edge cases"); // Verify all edge cases mapped to 'other' (since they're not in our mapping) - let edge_case_mappings = sqlx::query!( + let edge_case_mappings = sqlx::query( "SELECT filename, failure_reason FROM failed_documents WHERE ingestion_source = 'migration_edge_test'" ) .fetch_all(&pool) @@ -239,8 +238,10 @@ mod migration_integration_tests { .expect("Failed to fetch edge case mappings"); for mapping in edge_case_mappings { - assert_eq!(mapping.failure_reason, "other", - "Edge case '{}' should map to 'other'", mapping.filename); + let filename: String = mapping.get("filename"); + let failure_reason: String = mapping.get("failure_reason"); + assert_eq!(failure_reason, "other", + "Edge case '{}' should map to 'other'", filename); } } @@ -250,7 +251,7 @@ mod migration_integration_tests { // during migration, the constraints will catch it // Try to insert data that violates constraints - let invalid_insert = sqlx::query!( + let invalid_insert = sqlx::query( r#" INSERT INTO failed_documents ( user_id, filename, failure_reason, failure_stage, ingestion_source diff --git a/src/tests/mod.rs b/src/tests/mod.rs index 032e8a5..b137872 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -18,4 +18,5 @@ mod sql_type_safety_tests; mod users_tests; mod generic_migration_tests; mod migration_constraint_tests; -mod migration_integration_tests; +mod migration_integration_tests; +mod failed_documents_unit_tests;