fix(tests): resolve issue where test was counting all documents and not the test's own docs

This commit is contained in:
perf3ct 2025-07-11 19:31:49 +00:00
parent 3d530b008f
commit fb8e61b0e4
1 changed files with 83 additions and 56 deletions

View File

@ -36,65 +36,77 @@ mod comprehensive_migration_tests {
// Create comprehensive test data covering all edge cases // Create comprehensive test data covering all edge cases
let user_id = create_test_user(pool).await; let user_id = create_test_user(pool).await;
// Use unique test identifier to avoid conflicts with other tests
let test_id = Uuid::new_v4().to_string()[..8].to_string();
// Pre-create the filenames to avoid borrowing issues
let normal_success_filename = format!("data_integrity_test_{}_normal_success.pdf", test_id);
let low_confidence_filename = format!("data_integrity_test_{}_low_confidence_fail.pdf", test_id);
let timeout_filename = format!("data_integrity_test_{}_timeout_fail.pdf", test_id);
let memory_filename = format!("data_integrity_test_{}_memory_fail.pdf", test_id);
let corrupted_filename = format!("data_integrity_test_{}_corrupted_file.pdf", test_id);
let unsupported_filename = format!("data_integrity_test_{}_unsupported.xyz", test_id);
let pending_filename = format!("data_integrity_test_{}_pending_ocr.pdf", test_id);
// Insert various types of documents // Insert various types of documents
let document_scenarios = vec![ let document_scenarios = vec![
DocumentScenario { DocumentScenario {
filename: "normal_success.pdf", filename: normal_success_filename,
ocr_status: "completed", ocr_status: "completed".to_string(),
ocr_failure_reason: None, ocr_failure_reason: None,
ocr_error: None, ocr_error: None,
ocr_confidence: Some(0.95), ocr_confidence: Some(0.95),
ocr_text: Some("This is a successful OCR"), ocr_text: Some("This is a successful OCR".to_string()),
file_size: 1024, file_size: 1024,
}, },
DocumentScenario { DocumentScenario {
filename: "low_confidence_fail.pdf", filename: low_confidence_filename,
ocr_status: "failed", ocr_status: "failed".to_string(),
ocr_failure_reason: Some("low_ocr_confidence"), ocr_failure_reason: Some("low_ocr_confidence".to_string()),
ocr_error: Some("OCR confidence below threshold"), ocr_error: Some("OCR confidence below threshold".to_string()),
ocr_confidence: Some(0.3), ocr_confidence: Some(0.3),
ocr_text: Some("Partially recognized text"), ocr_text: Some("Partially recognized text".to_string()),
file_size: 2048, file_size: 2048,
}, },
DocumentScenario { DocumentScenario {
filename: "timeout_fail.pdf", filename: timeout_filename,
ocr_status: "failed", ocr_status: "failed".to_string(),
ocr_failure_reason: Some("timeout"), ocr_failure_reason: Some("timeout".to_string()),
ocr_error: Some("OCR processing timed out after 60 seconds"), ocr_error: Some("OCR processing timed out after 60 seconds".to_string()),
ocr_confidence: None, ocr_confidence: None,
ocr_text: None, ocr_text: None,
file_size: 10485760, // 10MB file_size: 10485760, // 10MB
}, },
DocumentScenario { DocumentScenario {
filename: "memory_fail.pdf", filename: memory_filename,
ocr_status: "failed", ocr_status: "failed".to_string(),
ocr_failure_reason: Some("memory_limit"), ocr_failure_reason: Some("memory_limit".to_string()),
ocr_error: Some("Memory limit exceeded"), ocr_error: Some("Memory limit exceeded".to_string()),
ocr_confidence: None, ocr_confidence: None,
ocr_text: None, ocr_text: None,
file_size: 52428800, // 50MB file_size: 52428800, // 50MB
}, },
DocumentScenario { DocumentScenario {
filename: "corrupted_file.pdf", filename: corrupted_filename,
ocr_status: "failed", ocr_status: "failed".to_string(),
ocr_failure_reason: Some("file_corrupted"), ocr_failure_reason: Some("file_corrupted".to_string()),
ocr_error: Some("PDF file appears to be corrupted"), ocr_error: Some("PDF file appears to be corrupted".to_string()),
ocr_confidence: None, ocr_confidence: None,
ocr_text: None, ocr_text: None,
file_size: 512, file_size: 512,
}, },
DocumentScenario { DocumentScenario {
filename: "unsupported.xyz", filename: unsupported_filename,
ocr_status: "failed", ocr_status: "failed".to_string(),
ocr_failure_reason: Some("unsupported_format"), ocr_failure_reason: Some("unsupported_format".to_string()),
ocr_error: Some("File format not supported"), ocr_error: Some("File format not supported".to_string()),
ocr_confidence: None, ocr_confidence: None,
ocr_text: None, ocr_text: None,
file_size: 256, file_size: 256,
}, },
DocumentScenario { DocumentScenario {
filename: "pending_ocr.pdf", filename: pending_filename,
ocr_status: "pending", ocr_status: "pending".to_string(),
ocr_failure_reason: None, ocr_failure_reason: None,
ocr_error: None, ocr_error: None,
ocr_confidence: None, ocr_confidence: None,
@ -107,20 +119,22 @@ mod comprehensive_migration_tests {
let mut document_ids = HashMap::new(); let mut document_ids = HashMap::new();
for scenario in &document_scenarios { for scenario in &document_scenarios {
let doc_id = insert_test_document(pool, user_id, scenario).await; let doc_id = insert_test_document(pool, user_id, scenario).await;
document_ids.insert(scenario.filename, doc_id); document_ids.insert(scenario.filename.clone(), doc_id);
} }
// Count documents before migration // Count documents before migration (only our test documents)
let failed_count_before: i64 = sqlx::query_scalar( let failed_count_before: i64 = sqlx::query_scalar(
"SELECT COUNT(*) FROM documents WHERE ocr_status = 'failed'" "SELECT COUNT(*) FROM documents WHERE ocr_status = 'failed' AND filename LIKE $1"
) )
.bind(format!("data_integrity_test_{}_%%", test_id))
.fetch_one(pool) .fetch_one(pool)
.await .await
.unwrap(); .unwrap();
let successful_count_before: i64 = sqlx::query_scalar( let successful_count_before: i64 = sqlx::query_scalar(
"SELECT COUNT(*) FROM documents WHERE ocr_status = 'completed'" "SELECT COUNT(*) FROM documents WHERE ocr_status = 'completed' AND filename LIKE $1"
) )
.bind(format!("data_integrity_test_{}_%%", test_id))
.fetch_one(pool) .fetch_one(pool)
.await .await
.unwrap(); .unwrap();
@ -170,7 +184,9 @@ mod comprehensive_migration_tests {
// Verify that successful and pending documents are not affected // Verify that successful and pending documents are not affected
assert_eq!(successful_count_before, 1, "Should have 1 successful document"); assert_eq!(successful_count_before, 1, "Should have 1 successful document");
// It should have greater or equal to 5 failed documents
assert_eq!(failed_count_before, 5, "Should have 5 failed documents"); assert_eq!(failed_count_before, 5, "Should have 5 failed documents");
} }
#[tokio::test] #[tokio::test]
@ -310,7 +326,8 @@ mod comprehensive_migration_tests {
let user_id = create_test_user(pool).await; let user_id = create_test_user(pool).await;
// Insert a large number of failed documents // Insert a large number of failed documents with unique naming
let test_id = Uuid::new_v4().to_string()[..8].to_string();
let batch_size = 100; let batch_size = 100;
let start_time = std::time::Instant::now(); let start_time = std::time::Instant::now();
@ -322,7 +339,7 @@ mod comprehensive_migration_tests {
for i in 0..batch_size { for i in 0..batch_size {
let doc_num = batch * batch_size + i; let doc_num = batch * batch_size + i;
let filename = format!("bulk_doc_{}.pdf", doc_num); let filename = format!("perf_migration_test_{}_bulk_doc_{}.pdf", test_id, doc_num);
let reason = match doc_num % 5 { let reason = match doc_num % 5 {
0 => "low_ocr_confidence", 0 => "low_ocr_confidence",
1 => "timeout", 1 => "timeout",
@ -352,8 +369,9 @@ mod comprehensive_migration_tests {
let migration_start = std::time::Instant::now(); let migration_start = std::time::Instant::now();
let count: i64 = sqlx::query_scalar( let count: i64 = sqlx::query_scalar(
"SELECT COUNT(*) FROM documents WHERE ocr_status = 'failed'" "SELECT COUNT(*) FROM documents WHERE ocr_status = 'failed' AND filename LIKE $1"
) )
.bind(format!("perf_migration_test_{}_bulk_doc_%", test_id))
.fetch_one(pool) .fetch_one(pool)
.await .await
.unwrap(); .unwrap();
@ -363,9 +381,10 @@ mod comprehensive_migration_tests {
// Simulate the migration SELECT // Simulate the migration SELECT
let _migration_data = sqlx::query( let _migration_data = sqlx::query(
r#" r#"
SELECT * FROM documents WHERE ocr_status = 'failed' SELECT * FROM documents WHERE ocr_status = 'failed' AND filename LIKE $1
"# "#
) )
.bind(format!("perf_migration_test_{}_bulk_doc_%", test_id))
.fetch_all(pool) .fetch_all(pool)
.await .await
.unwrap(); .unwrap();
@ -386,12 +405,12 @@ mod comprehensive_migration_tests {
} }
struct DocumentScenario { struct DocumentScenario {
filename: &'static str, filename: String,
ocr_status: &'static str, ocr_status: String,
ocr_failure_reason: Option<&'static str>, ocr_failure_reason: Option<String>,
ocr_error: Option<&'static str>, ocr_error: Option<String>,
ocr_confidence: Option<f32>, ocr_confidence: Option<f32>,
ocr_text: Option<&'static str>, ocr_text: Option<String>,
file_size: i64, file_size: i64,
} }
@ -435,14 +454,14 @@ mod comprehensive_migration_tests {
) )
.bind(doc_id) .bind(doc_id)
.bind(user_id) .bind(user_id)
.bind(scenario.filename) .bind(&scenario.filename)
.bind(scenario.file_size) .bind(scenario.file_size)
.bind(if scenario.filename.ends_with(".pdf") { "application/pdf" } else { "application/octet-stream" }) .bind(if scenario.filename.ends_with(".pdf") { "application/pdf" } else { "application/octet-stream" })
.bind(scenario.ocr_status) .bind(&scenario.ocr_status)
.bind(scenario.ocr_failure_reason) .bind(scenario.ocr_failure_reason.as_ref())
.bind(scenario.ocr_error) .bind(scenario.ocr_error.as_ref())
.bind(scenario.ocr_confidence) .bind(scenario.ocr_confidence)
.bind(scenario.ocr_text) .bind(scenario.ocr_text.as_ref())
.execute(pool) .execute(pool)
.await .await
.unwrap(); .unwrap();
@ -454,11 +473,14 @@ mod comprehensive_migration_tests {
let user_id = create_test_user(pool).await; let user_id = create_test_user(pool).await;
let mut document_ids = HashMap::new(); let mut document_ids = HashMap::new();
// Use unique test identifier to avoid conflicts with other tests
let test_id = Uuid::new_v4().to_string()[..8].to_string();
let failure_scenarios = vec![ let failure_scenarios = vec![
("timeout_doc.pdf".to_string(), "timeout".to_string(), "OCR processing timed out".to_string()), (format!("comp_migration_test_{}_timeout_doc.pdf", test_id), "timeout".to_string(), "OCR processing timed out".to_string()),
("memory_doc.pdf".to_string(), "memory_limit".to_string(), "Memory limit exceeded".to_string()), (format!("comp_migration_test_{}_memory_doc.pdf", test_id), "memory_limit".to_string(), "Memory limit exceeded".to_string()),
("corrupt_doc.pdf".to_string(), "file_corrupted".to_string(), "File is corrupted".to_string()), (format!("comp_migration_test_{}_corrupt_doc.pdf", test_id), "file_corrupted".to_string(), "File is corrupted".to_string()),
("low_conf_doc.pdf".to_string(), "low_ocr_confidence".to_string(), "Confidence too low".to_string()), (format!("comp_migration_test_{}_low_conf_doc.pdf", test_id), "low_ocr_confidence".to_string(), "Confidence too low".to_string()),
]; ];
// Insert test documents // Insert test documents
@ -496,7 +518,7 @@ mod comprehensive_migration_tests {
async fn verify_prefilled_data(pool: &PgPool, test_data: &TestData) { async fn verify_prefilled_data(pool: &PgPool, test_data: &TestData) {
let count: i64 = sqlx::query_scalar( let count: i64 = sqlx::query_scalar(
"SELECT COUNT(*) FROM documents WHERE user_id = $1 AND ocr_status = 'failed'" "SELECT COUNT(*) FROM documents WHERE user_id = $1 AND ocr_status = 'failed' AND filename LIKE 'comp_migration_test_%'"
) )
.bind(test_data.user_id) .bind(test_data.user_id)
.fetch_one(pool) .fetch_one(pool)
@ -594,13 +616,18 @@ mod comprehensive_migration_tests {
} }
async fn verify_data_consistency_after_migration(pool: &PgPool, test_data: &TestData) { async fn verify_data_consistency_after_migration(pool: &PgPool, test_data: &TestData) {
// Verify specific failure reason mappings // Create mappings based on the actual filenames in test_data
let mappings = vec![ let mut mappings = Vec::new();
("timeout_doc.pdf", "ocr_timeout"), for (filename, reason, _) in &test_data.failure_scenarios {
("memory_doc.pdf", "ocr_memory_limit"), let expected_reason = match reason.as_str() {
("corrupt_doc.pdf", "file_corrupted"), "timeout" => "ocr_timeout",
("low_conf_doc.pdf", "low_ocr_confidence"), "memory_limit" => "ocr_memory_limit",
]; "file_corrupted" => "file_corrupted",
"low_ocr_confidence" => "low_ocr_confidence",
_ => reason.as_str(),
};
mappings.push((filename.as_str(), expected_reason));
}
for (filename, expected_reason) in mappings { for (filename, expected_reason) in mappings {
let result = sqlx::query( let result = sqlx::query(