From fb8e61b0e4f214197dacf7258b0b52aa81d046fe Mon Sep 17 00:00:00 2001 From: perf3ct Date: Fri, 11 Jul 2025 19:31:49 +0000 Subject: [PATCH] fix(tests): resolve issue where test was counting all documents and not the test's own docs --- tests/comprehensive_migration_tests.rs | 139 +++++++++++++++---------- 1 file changed, 83 insertions(+), 56 deletions(-) diff --git a/tests/comprehensive_migration_tests.rs b/tests/comprehensive_migration_tests.rs index 8bed356..a310a17 100644 --- a/tests/comprehensive_migration_tests.rs +++ b/tests/comprehensive_migration_tests.rs @@ -36,65 +36,77 @@ mod comprehensive_migration_tests { // Create comprehensive test data covering all edge cases let user_id = create_test_user(pool).await; + // Use unique test identifier to avoid conflicts with other tests + let test_id = Uuid::new_v4().to_string()[..8].to_string(); + + // Pre-create the filenames to avoid borrowing issues + let normal_success_filename = format!("data_integrity_test_{}_normal_success.pdf", test_id); + let low_confidence_filename = format!("data_integrity_test_{}_low_confidence_fail.pdf", test_id); + let timeout_filename = format!("data_integrity_test_{}_timeout_fail.pdf", test_id); + let memory_filename = format!("data_integrity_test_{}_memory_fail.pdf", test_id); + let corrupted_filename = format!("data_integrity_test_{}_corrupted_file.pdf", test_id); + let unsupported_filename = format!("data_integrity_test_{}_unsupported.xyz", test_id); + let pending_filename = format!("data_integrity_test_{}_pending_ocr.pdf", test_id); + // Insert various types of documents let document_scenarios = vec![ DocumentScenario { - filename: "normal_success.pdf", - ocr_status: "completed", + filename: normal_success_filename, + ocr_status: "completed".to_string(), ocr_failure_reason: None, ocr_error: None, ocr_confidence: Some(0.95), - ocr_text: Some("This is a successful OCR"), + ocr_text: Some("This is a successful OCR".to_string()), file_size: 1024, }, DocumentScenario { - filename: "low_confidence_fail.pdf", - ocr_status: "failed", - ocr_failure_reason: Some("low_ocr_confidence"), - ocr_error: Some("OCR confidence below threshold"), + filename: low_confidence_filename, + ocr_status: "failed".to_string(), + ocr_failure_reason: Some("low_ocr_confidence".to_string()), + ocr_error: Some("OCR confidence below threshold".to_string()), ocr_confidence: Some(0.3), - ocr_text: Some("Partially recognized text"), + ocr_text: Some("Partially recognized text".to_string()), file_size: 2048, }, DocumentScenario { - filename: "timeout_fail.pdf", - ocr_status: "failed", - ocr_failure_reason: Some("timeout"), - ocr_error: Some("OCR processing timed out after 60 seconds"), + filename: timeout_filename, + ocr_status: "failed".to_string(), + ocr_failure_reason: Some("timeout".to_string()), + ocr_error: Some("OCR processing timed out after 60 seconds".to_string()), ocr_confidence: None, ocr_text: None, file_size: 10485760, // 10MB }, DocumentScenario { - filename: "memory_fail.pdf", - ocr_status: "failed", - ocr_failure_reason: Some("memory_limit"), - ocr_error: Some("Memory limit exceeded"), + filename: memory_filename, + ocr_status: "failed".to_string(), + ocr_failure_reason: Some("memory_limit".to_string()), + ocr_error: Some("Memory limit exceeded".to_string()), ocr_confidence: None, ocr_text: None, file_size: 52428800, // 50MB }, DocumentScenario { - filename: "corrupted_file.pdf", - ocr_status: "failed", - ocr_failure_reason: Some("file_corrupted"), - ocr_error: Some("PDF file appears to be corrupted"), + filename: corrupted_filename, + ocr_status: "failed".to_string(), + ocr_failure_reason: Some("file_corrupted".to_string()), + ocr_error: Some("PDF file appears to be corrupted".to_string()), ocr_confidence: None, ocr_text: None, file_size: 512, }, DocumentScenario { - filename: "unsupported.xyz", - ocr_status: "failed", - ocr_failure_reason: Some("unsupported_format"), - ocr_error: Some("File format not supported"), + filename: unsupported_filename, + ocr_status: "failed".to_string(), + ocr_failure_reason: Some("unsupported_format".to_string()), + ocr_error: Some("File format not supported".to_string()), ocr_confidence: None, ocr_text: None, file_size: 256, }, DocumentScenario { - filename: "pending_ocr.pdf", - ocr_status: "pending", + filename: pending_filename, + ocr_status: "pending".to_string(), ocr_failure_reason: None, ocr_error: None, ocr_confidence: None, @@ -107,20 +119,22 @@ mod comprehensive_migration_tests { let mut document_ids = HashMap::new(); for scenario in &document_scenarios { let doc_id = insert_test_document(pool, user_id, scenario).await; - document_ids.insert(scenario.filename, doc_id); + document_ids.insert(scenario.filename.clone(), doc_id); } - // Count documents before migration + // Count documents before migration (only our test documents) let failed_count_before: i64 = sqlx::query_scalar( - "SELECT COUNT(*) FROM documents WHERE ocr_status = 'failed'" + "SELECT COUNT(*) FROM documents WHERE ocr_status = 'failed' AND filename LIKE $1" ) + .bind(format!("data_integrity_test_{}_%%", test_id)) .fetch_one(pool) .await .unwrap(); let successful_count_before: i64 = sqlx::query_scalar( - "SELECT COUNT(*) FROM documents WHERE ocr_status = 'completed'" + "SELECT COUNT(*) FROM documents WHERE ocr_status = 'completed' AND filename LIKE $1" ) + .bind(format!("data_integrity_test_{}_%%", test_id)) .fetch_one(pool) .await .unwrap(); @@ -170,7 +184,9 @@ mod comprehensive_migration_tests { // Verify that successful and pending documents are not affected assert_eq!(successful_count_before, 1, "Should have 1 successful document"); + // It should have greater or equal to 5 failed documents assert_eq!(failed_count_before, 5, "Should have 5 failed documents"); + } #[tokio::test] @@ -310,7 +326,8 @@ mod comprehensive_migration_tests { let user_id = create_test_user(pool).await; - // Insert a large number of failed documents + // Insert a large number of failed documents with unique naming + let test_id = Uuid::new_v4().to_string()[..8].to_string(); let batch_size = 100; let start_time = std::time::Instant::now(); @@ -322,7 +339,7 @@ mod comprehensive_migration_tests { for i in 0..batch_size { let doc_num = batch * batch_size + i; - let filename = format!("bulk_doc_{}.pdf", doc_num); + let filename = format!("perf_migration_test_{}_bulk_doc_{}.pdf", test_id, doc_num); let reason = match doc_num % 5 { 0 => "low_ocr_confidence", 1 => "timeout", @@ -352,8 +369,9 @@ mod comprehensive_migration_tests { let migration_start = std::time::Instant::now(); let count: i64 = sqlx::query_scalar( - "SELECT COUNT(*) FROM documents WHERE ocr_status = 'failed'" + "SELECT COUNT(*) FROM documents WHERE ocr_status = 'failed' AND filename LIKE $1" ) + .bind(format!("perf_migration_test_{}_bulk_doc_%", test_id)) .fetch_one(pool) .await .unwrap(); @@ -363,9 +381,10 @@ mod comprehensive_migration_tests { // Simulate the migration SELECT let _migration_data = sqlx::query( r#" - SELECT * FROM documents WHERE ocr_status = 'failed' + SELECT * FROM documents WHERE ocr_status = 'failed' AND filename LIKE $1 "# ) + .bind(format!("perf_migration_test_{}_bulk_doc_%", test_id)) .fetch_all(pool) .await .unwrap(); @@ -386,12 +405,12 @@ mod comprehensive_migration_tests { } struct DocumentScenario { - filename: &'static str, - ocr_status: &'static str, - ocr_failure_reason: Option<&'static str>, - ocr_error: Option<&'static str>, + filename: String, + ocr_status: String, + ocr_failure_reason: Option, + ocr_error: Option, ocr_confidence: Option, - ocr_text: Option<&'static str>, + ocr_text: Option, file_size: i64, } @@ -435,14 +454,14 @@ mod comprehensive_migration_tests { ) .bind(doc_id) .bind(user_id) - .bind(scenario.filename) + .bind(&scenario.filename) .bind(scenario.file_size) .bind(if scenario.filename.ends_with(".pdf") { "application/pdf" } else { "application/octet-stream" }) - .bind(scenario.ocr_status) - .bind(scenario.ocr_failure_reason) - .bind(scenario.ocr_error) + .bind(&scenario.ocr_status) + .bind(scenario.ocr_failure_reason.as_ref()) + .bind(scenario.ocr_error.as_ref()) .bind(scenario.ocr_confidence) - .bind(scenario.ocr_text) + .bind(scenario.ocr_text.as_ref()) .execute(pool) .await .unwrap(); @@ -454,11 +473,14 @@ mod comprehensive_migration_tests { let user_id = create_test_user(pool).await; let mut document_ids = HashMap::new(); + // Use unique test identifier to avoid conflicts with other tests + let test_id = Uuid::new_v4().to_string()[..8].to_string(); + let failure_scenarios = vec![ - ("timeout_doc.pdf".to_string(), "timeout".to_string(), "OCR processing timed out".to_string()), - ("memory_doc.pdf".to_string(), "memory_limit".to_string(), "Memory limit exceeded".to_string()), - ("corrupt_doc.pdf".to_string(), "file_corrupted".to_string(), "File is corrupted".to_string()), - ("low_conf_doc.pdf".to_string(), "low_ocr_confidence".to_string(), "Confidence too low".to_string()), + (format!("comp_migration_test_{}_timeout_doc.pdf", test_id), "timeout".to_string(), "OCR processing timed out".to_string()), + (format!("comp_migration_test_{}_memory_doc.pdf", test_id), "memory_limit".to_string(), "Memory limit exceeded".to_string()), + (format!("comp_migration_test_{}_corrupt_doc.pdf", test_id), "file_corrupted".to_string(), "File is corrupted".to_string()), + (format!("comp_migration_test_{}_low_conf_doc.pdf", test_id), "low_ocr_confidence".to_string(), "Confidence too low".to_string()), ]; // Insert test documents @@ -496,7 +518,7 @@ mod comprehensive_migration_tests { async fn verify_prefilled_data(pool: &PgPool, test_data: &TestData) { let count: i64 = sqlx::query_scalar( - "SELECT COUNT(*) FROM documents WHERE user_id = $1 AND ocr_status = 'failed'" + "SELECT COUNT(*) FROM documents WHERE user_id = $1 AND ocr_status = 'failed' AND filename LIKE 'comp_migration_test_%'" ) .bind(test_data.user_id) .fetch_one(pool) @@ -594,13 +616,18 @@ mod comprehensive_migration_tests { } async fn verify_data_consistency_after_migration(pool: &PgPool, test_data: &TestData) { - // Verify specific failure reason mappings - let mappings = vec![ - ("timeout_doc.pdf", "ocr_timeout"), - ("memory_doc.pdf", "ocr_memory_limit"), - ("corrupt_doc.pdf", "file_corrupted"), - ("low_conf_doc.pdf", "low_ocr_confidence"), - ]; + // Create mappings based on the actual filenames in test_data + let mut mappings = Vec::new(); + for (filename, reason, _) in &test_data.failure_scenarios { + let expected_reason = match reason.as_str() { + "timeout" => "ocr_timeout", + "memory_limit" => "ocr_memory_limit", + "file_corrupted" => "file_corrupted", + "low_ocr_confidence" => "low_ocr_confidence", + _ => reason.as_str(), + }; + mappings.push((filename.as_str(), expected_reason)); + } for (filename, expected_reason) in mappings { let result = sqlx::query(