/*! * OCR Failure Counting Verification Tests * * Tests to ensure that OCR failure counting and display works correctly: * - Verifies that failed OCR documents are properly counted * - Tests that failure categories are correctly categorized and counted * - Ensures the UI displays accurate failure statistics * - Tests edge cases with zero failures * - Verifies failure reason classification logic */ use reqwest::Client; use serde_json::Value; use std::time::Duration; use uuid::Uuid; use readur::models::{CreateUser, LoginRequest, LoginResponse, UserRole}; fn get_base_url() -> String { std::env::var("API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string()) } const TIMEOUT: Duration = Duration::from_secs(60); /// Test client for OCR failure counting verification struct OcrFailureCountingTestClient { client: Client, token: Option, user_id: Option, } impl OcrFailureCountingTestClient { fn new() -> Self { Self { client: Client::new(), token: None, user_id: None, } } /// Register and login a test user async fn register_and_login(&mut self, role: UserRole) -> Result> { // Health check let health_check = self.client .get(&format!("{}/api/health", get_base_url())) .timeout(Duration::from_secs(5)) .send() .await; if let Err(e) = health_check { eprintln!("Health check failed: {}. Is the server running at {}?", e, get_base_url()); return Err(format!("Server not running: {}", e).into()); } // Use UUID for guaranteed uniqueness let test_id = Uuid::new_v4().simple().to_string(); let nanos = std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .unwrap() .as_nanos(); let username = format!("ocr_count_{}_{}_{}_{}", role.to_string(), test_id, nanos, Uuid::new_v4().simple()); let email = format!("ocr_count_{}_{}@{}.example.com", test_id, nanos, Uuid::new_v4().simple()); let password = "testpassword123"; // Register user let user_data = CreateUser { username: username.clone(), email: email.clone(), password: password.to_string(), role: Some(role), }; let register_response = self.client .post(&format!("{}/api/auth/register", get_base_url())) .json(&user_data) .timeout(TIMEOUT) .send() .await?; if !register_response.status().is_success() { let status = register_response.status(); let text = register_response.text().await?; return Err(format!("Registration failed: {}", text).into()); } // Login to get token let login_data = LoginRequest { username: username.clone(), password: password.to_string(), }; let login_response = self.client .post(&format!("{}/api/auth/login", get_base_url())) .json(&login_data) .timeout(TIMEOUT) .send() .await?; if !login_response.status().is_success() { return Err(format!("Login failed: {}", login_response.text().await?).into()); } let login_result: LoginResponse = login_response.json().await?; self.token = Some(login_result.token.clone()); self.user_id = Some(login_result.user.id.to_string()); Ok(login_result.token) } /// Get authorization header fn get_auth_header(&self) -> String { format!("Bearer {}", self.token.as_ref().unwrap()) } /// Get failed OCR documents and statistics async fn get_failed_ocr_documents(&self) -> Result> { let response = self.client .get(&format!("{}/api/documents/failed?stage=ocr", get_base_url())) .header("Authorization", self.get_auth_header()) .timeout(TIMEOUT) .send() .await?; if !response.status().is_success() { return Err(format!("Failed to get failed OCR documents: {}", response.text().await?).into()); } let result: Value = response.json().await?; Ok(result) } /// Upload a document (for creating test data) async fn upload_document(&self, filename: &str, content: &[u8]) -> Result> { let form = reqwest::multipart::Form::new() .part("file", reqwest::multipart::Part::bytes(content.to_vec()) .file_name(filename.to_string()) .mime_str("application/pdf")?); let response = self.client .post(&format!("{}/api/documents/upload", get_base_url())) .header("Authorization", self.get_auth_header()) .multipart(form) .timeout(TIMEOUT) .send() .await?; if !response.status().is_success() { return Err(format!("Upload failed: {}", response.text().await?).into()); } let document: Value = response.json().await?; Ok(document) } } #[tokio::test] async fn test_zero_failures_display_correctly() { let mut client = OcrFailureCountingTestClient::new(); let _token = match client.register_and_login(UserRole::User).await { Ok(t) => t, Err(e) => { eprintln!("Setup failed: {}", e); return; } }; // Get failed OCR documents for new user (should be zero) let result = client.get_failed_ocr_documents().await.unwrap(); // Verify zero failures are handled correctly assert_eq!(result["statistics"]["total_failed"], 0); assert!(result["documents"].is_array()); assert_eq!(result["documents"].as_array().unwrap().len(), 0); assert!(result["statistics"]["by_reason"].is_object()); assert_eq!(result["statistics"]["by_reason"].as_object().unwrap().len(), 0); // Verify pagination shows zero assert_eq!(result["pagination"]["total"], 0); assert_eq!(result["pagination"]["total_pages"], 0); println!("✅ Zero failures are displayed correctly"); } #[tokio::test] async fn test_failure_categories_structure() { let mut client = OcrFailureCountingTestClient::new(); let _token = match client.register_and_login(UserRole::User).await { Ok(t) => t, Err(e) => { eprintln!("Setup failed: {}", e); return; } }; let result = client.get_failed_ocr_documents().await.unwrap(); let by_reason = &result["statistics"]["by_reason"]; // Verify by_reason is an object assert!(by_reason.is_object()); // Check structure of any failure reasons that exist if let Some(reasons) = by_reason.as_object() { for (reason, count) in reasons { // Each entry should have a non-empty reason and numeric count assert!(!reason.is_empty(), "Reason should not be empty"); assert!(count.is_number(), "Count should be a number"); // Count should be non-negative let count_val = count.as_i64().unwrap(); assert!(count_val >= 0, "Failure count should be non-negative"); } } println!("✅ Failure categories have correct structure"); } #[tokio::test] async fn test_total_failed_matches_document_count() { let mut client = OcrFailureCountingTestClient::new(); let _token = match client.register_and_login(UserRole::User).await { Ok(t) => t, Err(e) => { eprintln!("Setup failed: {}", e); return; } }; let result = client.get_failed_ocr_documents().await.unwrap(); // Get the total failed count from statistics let total_failed = result["statistics"]["total_failed"].as_i64().unwrap(); // Get the actual number of documents returned let documents = result["documents"].as_array().unwrap(); let actual_count = documents.len() as i64; // For the first page, the actual count should match pagination.total if total <= limit let pagination_total = result["pagination"]["total"].as_i64().unwrap(); let limit = result["pagination"]["limit"].as_i64().unwrap(); // The documents count should be min(total_failed, limit) if we're on the first page let expected_documents_count = std::cmp::min(total_failed, limit); assert_eq!(actual_count, expected_documents_count, "Document count should match expected count for first page"); // Total failed should match pagination total assert_eq!(total_failed, pagination_total, "Total failed should match pagination total"); println!("✅ Total failed count matches document count and pagination"); } #[tokio::test] async fn test_failure_category_counts_sum_to_total() { let mut client = OcrFailureCountingTestClient::new(); let _token = match client.register_and_login(UserRole::User).await { Ok(t) => t, Err(e) => { eprintln!("Setup failed: {}", e); return; } }; let result = client.get_failed_ocr_documents().await.unwrap(); let total_failed = result["statistics"]["total_failed"].as_i64().unwrap(); let by_reason = result["statistics"]["by_reason"].as_object().unwrap(); // Sum up all reason counts let reason_sum: i64 = by_reason .values() .map(|count| count.as_i64().unwrap()) .sum(); // Reason counts should sum to total failed assert_eq!(reason_sum, total_failed, "Sum of reason counts should equal total failed count"); println!("✅ Failure category counts sum to total failed count"); } #[tokio::test] async fn test_failure_reason_classification() { let mut client = OcrFailureCountingTestClient::new(); let _token = match client.register_and_login(UserRole::User).await { Ok(t) => t, Err(e) => { eprintln!("Setup failed: {}", e); return; } }; let result = client.get_failed_ocr_documents().await.unwrap(); let by_reason = result["statistics"]["by_reason"].as_object().unwrap(); // Check that known failure reasons are properly categorized let valid_reason_keys = vec![ "low_ocr_confidence", "ocr_timeout", "ocr_memory_limit", "pdf_parsing_error", "file_corrupted", "unsupported_format", "access_denied", "other" ]; for (reason_key, _count) in by_reason { assert!(valid_reason_keys.contains(&reason_key.as_str()), "Reason key '{}' should be one of the valid failure reasons", reason_key); } println!("✅ Failure reasons are properly classified"); } #[tokio::test] async fn test_document_failure_fields_present() { let mut client = OcrFailureCountingTestClient::new(); let _token = match client.register_and_login(UserRole::User).await { Ok(t) => t, Err(e) => { eprintln!("Setup failed: {}", e); return; } }; let result = client.get_failed_ocr_documents().await.unwrap(); let documents = result["documents"].as_array().unwrap(); // Check each failed document has required fields for document in documents { // Required fields for failed documents assert!(document.get("id").is_some(), "Document should have 'id' field"); assert!(document.get("filename").is_some(), "Document should have 'filename' field"); assert!(document.get("ocr_status").is_some(), "Document should have 'ocr_status' field"); assert!(document.get("ocr_error").is_some(), "Document should have 'ocr_error' field"); assert!(document.get("ocr_failure_reason").is_some(), "Document should have 'ocr_failure_reason' field"); assert!(document.get("failure_category").is_some(), "Document should have 'failure_category' field"); assert!(document.get("retry_count").is_some(), "Document should have 'retry_count' field"); assert!(document.get("can_retry").is_some(), "Document should have 'can_retry' field"); // Verify OCR status is 'failed' assert_eq!(document["ocr_status"], "failed", "OCR status should be 'failed'"); // Verify retry count is a non-negative number let retry_count = document["retry_count"].as_i64().unwrap(); assert!(retry_count >= 0, "Retry count should be non-negative"); // Verify can_retry is a boolean assert!(document["can_retry"].is_boolean(), "'can_retry' should be a boolean"); } println!("✅ Failed documents have all required fields"); } #[tokio::test] async fn test_pagination_consistency() { let mut client = OcrFailureCountingTestClient::new(); let _token = match client.register_and_login(UserRole::User).await { Ok(t) => t, Err(e) => { eprintln!("Setup failed: {}", e); return; } }; // Test different pagination parameters let response1 = client.client .get(&format!("{}/api/documents/failed?stage=ocr&limit=10&offset=0", get_base_url())) .header("Authorization", client.get_auth_header()) .timeout(TIMEOUT) .send() .await.unwrap(); let result1: Value = response1.json().await.unwrap(); let response2 = client.client .get(&format!("{}/api/documents/failed?stage=ocr&limit=5&offset=0", get_base_url())) .header("Authorization", client.get_auth_header()) .timeout(TIMEOUT) .send() .await.unwrap(); let result2: Value = response2.json().await.unwrap(); // Both should have same total count assert_eq!(result1["pagination"]["total"], result2["pagination"]["total"], "Total count should be consistent across different pagination requests"); assert_eq!(result1["statistics"]["total_failed"], result2["statistics"]["total_failed"], "Total failed count should be consistent across different pagination requests"); // Verify pagination parameters are respected assert_eq!(result1["pagination"]["limit"], 10); assert_eq!(result2["pagination"]["limit"], 5); assert_eq!(result1["pagination"]["offset"], 0); assert_eq!(result2["pagination"]["offset"], 0); // Documents array length should not exceed limit let docs1_len = result1["documents"].as_array().unwrap().len(); let docs2_len = result2["documents"].as_array().unwrap().len(); assert!(docs1_len <= 10, "Documents should not exceed limit of 10"); assert!(docs2_len <= 5, "Documents should not exceed limit of 5"); println!("✅ Pagination parameters are consistent and respected"); } #[tokio::test] async fn test_statistics_are_always_present() { let mut client = OcrFailureCountingTestClient::new(); let _token = match client.register_and_login(UserRole::User).await { Ok(t) => t, Err(e) => { eprintln!("Setup failed: {}", e); return; } }; let result = client.get_failed_ocr_documents().await.unwrap(); // Statistics should always be present assert!(result.get("statistics").is_some(), "Statistics should always be present"); let statistics = &result["statistics"]; assert!(statistics.get("total_failed").is_some(), "total_failed should always be present"); assert!(statistics.get("by_reason").is_some(), "by_reason should always be present"); assert!(statistics.get("by_stage").is_some(), "by_stage should always be present"); // Values should be valid even if zero assert!(statistics["total_failed"].is_number(), "total_failed should be a number"); assert!(statistics["by_reason"].is_object(), "by_reason should be an object"); assert!(statistics["by_stage"].is_object(), "by_stage should be an object"); let total_failed = statistics["total_failed"].as_i64().unwrap(); assert!(total_failed >= 0, "total_failed should be non-negative"); println!("✅ Statistics are always present and valid"); }