Readur/tests/integration_ocr_failure_cou...

445 lines
16 KiB
Rust

/*!
* OCR Failure Counting Verification Tests
*
* Tests to ensure that OCR failure counting and display works correctly:
* - Verifies that failed OCR documents are properly counted
* - Tests that failure categories are correctly categorized and counted
* - Ensures the UI displays accurate failure statistics
* - Tests edge cases with zero failures
* - Verifies failure reason classification logic
*/
use reqwest::Client;
use serde_json::Value;
use std::time::Duration;
use uuid::Uuid;
use readur::models::{CreateUser, LoginRequest, LoginResponse, UserRole};
fn get_base_url() -> String {
std::env::var("API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string())
}
const TIMEOUT: Duration = Duration::from_secs(60);
/// Test client for OCR failure counting verification
struct OcrFailureCountingTestClient {
client: Client,
token: Option<String>,
user_id: Option<String>,
}
impl OcrFailureCountingTestClient {
fn new() -> Self {
Self {
client: Client::new(),
token: None,
user_id: None,
}
}
/// Register and login a test user
async fn register_and_login(&mut self, role: UserRole) -> Result<String, Box<dyn std::error::Error + Send + Sync>> {
// Health check
let health_check = self.client
.get(&format!("{}/api/health", get_base_url()))
.timeout(Duration::from_secs(5))
.send()
.await;
if let Err(e) = health_check {
eprintln!("Health check failed: {}. Is the server running at {}?", e, get_base_url());
return Err(format!("Server not running: {}", e).into());
}
// Use UUID for guaranteed uniqueness
let test_id = Uuid::new_v4().simple().to_string();
let nanos = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos();
let username = format!("ocr_count_{}_{}_{}_{}", role.to_string(), test_id, nanos, Uuid::new_v4().simple());
let email = format!("ocr_count_{}_{}@{}.example.com", test_id, nanos, Uuid::new_v4().simple());
let password = "testpassword123";
// Register user
let user_data = CreateUser {
username: username.clone(),
email: email.clone(),
password: password.to_string(),
role: Some(role),
};
let register_response = self.client
.post(&format!("{}/api/auth/register", get_base_url()))
.json(&user_data)
.timeout(TIMEOUT)
.send()
.await?;
if !register_response.status().is_success() {
let status = register_response.status();
let text = register_response.text().await?;
return Err(format!("Registration failed: {}", text).into());
}
// Login to get token
let login_data = LoginRequest {
username: username.clone(),
password: password.to_string(),
};
let login_response = self.client
.post(&format!("{}/api/auth/login", get_base_url()))
.json(&login_data)
.timeout(TIMEOUT)
.send()
.await?;
if !login_response.status().is_success() {
return Err(format!("Login failed: {}", login_response.text().await?).into());
}
let login_result: LoginResponse = login_response.json().await?;
self.token = Some(login_result.token.clone());
self.user_id = Some(login_result.user.id.to_string());
Ok(login_result.token)
}
/// Get authorization header
fn get_auth_header(&self) -> String {
format!("Bearer {}", self.token.as_ref().unwrap())
}
/// Get failed OCR documents and statistics
async fn get_failed_ocr_documents(&self) -> Result<Value, Box<dyn std::error::Error + Send + Sync>> {
let response = self.client
.get(&format!("{}/api/documents/failed?stage=ocr", get_base_url()))
.header("Authorization", self.get_auth_header())
.timeout(TIMEOUT)
.send()
.await?;
if !response.status().is_success() {
return Err(format!("Failed to get failed OCR documents: {}", response.text().await?).into());
}
let result: Value = response.json().await?;
Ok(result)
}
/// Upload a document (for creating test data)
async fn upload_document(&self, filename: &str, content: &[u8]) -> Result<Value, Box<dyn std::error::Error + Send + Sync>> {
let form = reqwest::multipart::Form::new()
.part("file", reqwest::multipart::Part::bytes(content.to_vec())
.file_name(filename.to_string())
.mime_str("application/pdf")?);
let response = self.client
.post(&format!("{}/api/documents/upload", get_base_url()))
.header("Authorization", self.get_auth_header())
.multipart(form)
.timeout(TIMEOUT)
.send()
.await?;
if !response.status().is_success() {
return Err(format!("Upload failed: {}", response.text().await?).into());
}
let document: Value = response.json().await?;
Ok(document)
}
}
#[tokio::test]
async fn test_zero_failures_display_correctly() {
let mut client = OcrFailureCountingTestClient::new();
let _token = match client.register_and_login(UserRole::User).await {
Ok(t) => t,
Err(e) => {
eprintln!("Setup failed: {}", e);
return;
}
};
// Get failed OCR documents for new user (should be zero)
let result = client.get_failed_ocr_documents().await.unwrap();
// Verify zero failures are handled correctly
assert_eq!(result["statistics"]["total_failed"], 0);
assert!(result["documents"].is_array());
assert_eq!(result["documents"].as_array().unwrap().len(), 0);
assert!(result["statistics"]["by_reason"].is_object());
assert_eq!(result["statistics"]["by_reason"].as_object().unwrap().len(), 0);
// Verify pagination shows zero
assert_eq!(result["pagination"]["total"], 0);
assert_eq!(result["pagination"]["total_pages"], 0);
println!("✅ Zero failures are displayed correctly");
}
#[tokio::test]
async fn test_failure_categories_structure() {
let mut client = OcrFailureCountingTestClient::new();
let _token = match client.register_and_login(UserRole::User).await {
Ok(t) => t,
Err(e) => {
eprintln!("Setup failed: {}", e);
return;
}
};
let result = client.get_failed_ocr_documents().await.unwrap();
let by_reason = &result["statistics"]["by_reason"];
// Verify by_reason is an object
assert!(by_reason.is_object());
// Check structure of any failure reasons that exist
if let Some(reasons) = by_reason.as_object() {
for (reason, count) in reasons {
// Each entry should have a non-empty reason and numeric count
assert!(!reason.is_empty(), "Reason should not be empty");
assert!(count.is_number(), "Count should be a number");
// Count should be non-negative
let count_val = count.as_i64().unwrap();
assert!(count_val >= 0, "Failure count should be non-negative");
}
}
println!("✅ Failure categories have correct structure");
}
#[tokio::test]
async fn test_total_failed_matches_document_count() {
let mut client = OcrFailureCountingTestClient::new();
let _token = match client.register_and_login(UserRole::User).await {
Ok(t) => t,
Err(e) => {
eprintln!("Setup failed: {}", e);
return;
}
};
let result = client.get_failed_ocr_documents().await.unwrap();
// Get the total failed count from statistics
let total_failed = result["statistics"]["total_failed"].as_i64().unwrap();
// Get the actual number of documents returned
let documents = result["documents"].as_array().unwrap();
let actual_count = documents.len() as i64;
// For the first page, the actual count should match pagination.total if total <= limit
let pagination_total = result["pagination"]["total"].as_i64().unwrap();
let limit = result["pagination"]["limit"].as_i64().unwrap();
// The documents count should be min(total_failed, limit) if we're on the first page
let expected_documents_count = std::cmp::min(total_failed, limit);
assert_eq!(actual_count, expected_documents_count,
"Document count should match expected count for first page");
// Total failed should match pagination total
assert_eq!(total_failed, pagination_total,
"Total failed should match pagination total");
println!("✅ Total failed count matches document count and pagination");
}
#[tokio::test]
async fn test_failure_category_counts_sum_to_total() {
let mut client = OcrFailureCountingTestClient::new();
let _token = match client.register_and_login(UserRole::User).await {
Ok(t) => t,
Err(e) => {
eprintln!("Setup failed: {}", e);
return;
}
};
let result = client.get_failed_ocr_documents().await.unwrap();
let total_failed = result["statistics"]["total_failed"].as_i64().unwrap();
let by_reason = result["statistics"]["by_reason"].as_object().unwrap();
// Sum up all reason counts
let reason_sum: i64 = by_reason
.values()
.map(|count| count.as_i64().unwrap())
.sum();
// Reason counts should sum to total failed
assert_eq!(reason_sum, total_failed,
"Sum of reason counts should equal total failed count");
println!("✅ Failure category counts sum to total failed count");
}
#[tokio::test]
async fn test_failure_reason_classification() {
let mut client = OcrFailureCountingTestClient::new();
let _token = match client.register_and_login(UserRole::User).await {
Ok(t) => t,
Err(e) => {
eprintln!("Setup failed: {}", e);
return;
}
};
let result = client.get_failed_ocr_documents().await.unwrap();
let by_reason = result["statistics"]["by_reason"].as_object().unwrap();
// Check that known failure reasons are properly categorized
let valid_reason_keys = vec![
"low_ocr_confidence",
"ocr_timeout",
"ocr_memory_limit",
"pdf_parsing_error",
"file_corrupted",
"unsupported_format",
"access_denied",
"other"
];
for (reason_key, _count) in by_reason {
assert!(valid_reason_keys.contains(&reason_key.as_str()),
"Reason key '{}' should be one of the valid failure reasons", reason_key);
}
println!("✅ Failure reasons are properly classified");
}
#[tokio::test]
async fn test_document_failure_fields_present() {
let mut client = OcrFailureCountingTestClient::new();
let _token = match client.register_and_login(UserRole::User).await {
Ok(t) => t,
Err(e) => {
eprintln!("Setup failed: {}", e);
return;
}
};
let result = client.get_failed_ocr_documents().await.unwrap();
let documents = result["documents"].as_array().unwrap();
// Check each failed document has required fields
for document in documents {
// Required fields for failed documents
assert!(document.get("id").is_some(), "Document should have 'id' field");
assert!(document.get("filename").is_some(), "Document should have 'filename' field");
assert!(document.get("ocr_status").is_some(), "Document should have 'ocr_status' field");
assert!(document.get("ocr_error").is_some(), "Document should have 'ocr_error' field");
assert!(document.get("ocr_failure_reason").is_some(), "Document should have 'ocr_failure_reason' field");
assert!(document.get("failure_category").is_some(), "Document should have 'failure_category' field");
assert!(document.get("retry_count").is_some(), "Document should have 'retry_count' field");
assert!(document.get("can_retry").is_some(), "Document should have 'can_retry' field");
// Verify OCR status is 'failed'
assert_eq!(document["ocr_status"], "failed", "OCR status should be 'failed'");
// Verify retry count is a non-negative number
let retry_count = document["retry_count"].as_i64().unwrap();
assert!(retry_count >= 0, "Retry count should be non-negative");
// Verify can_retry is a boolean
assert!(document["can_retry"].is_boolean(), "'can_retry' should be a boolean");
}
println!("✅ Failed documents have all required fields");
}
#[tokio::test]
async fn test_pagination_consistency() {
let mut client = OcrFailureCountingTestClient::new();
let _token = match client.register_and_login(UserRole::User).await {
Ok(t) => t,
Err(e) => {
eprintln!("Setup failed: {}", e);
return;
}
};
// Test different pagination parameters
let response1 = client.client
.get(&format!("{}/api/documents/failed?stage=ocr&limit=10&offset=0", get_base_url()))
.header("Authorization", client.get_auth_header())
.timeout(TIMEOUT)
.send()
.await.unwrap();
let result1: Value = response1.json().await.unwrap();
let response2 = client.client
.get(&format!("{}/api/documents/failed?stage=ocr&limit=5&offset=0", get_base_url()))
.header("Authorization", client.get_auth_header())
.timeout(TIMEOUT)
.send()
.await.unwrap();
let result2: Value = response2.json().await.unwrap();
// Both should have same total count
assert_eq!(result1["pagination"]["total"], result2["pagination"]["total"],
"Total count should be consistent across different pagination requests");
assert_eq!(result1["statistics"]["total_failed"], result2["statistics"]["total_failed"],
"Total failed count should be consistent across different pagination requests");
// Verify pagination parameters are respected
assert_eq!(result1["pagination"]["limit"], 10);
assert_eq!(result2["pagination"]["limit"], 5);
assert_eq!(result1["pagination"]["offset"], 0);
assert_eq!(result2["pagination"]["offset"], 0);
// Documents array length should not exceed limit
let docs1_len = result1["documents"].as_array().unwrap().len();
let docs2_len = result2["documents"].as_array().unwrap().len();
assert!(docs1_len <= 10, "Documents should not exceed limit of 10");
assert!(docs2_len <= 5, "Documents should not exceed limit of 5");
println!("✅ Pagination parameters are consistent and respected");
}
#[tokio::test]
async fn test_statistics_are_always_present() {
let mut client = OcrFailureCountingTestClient::new();
let _token = match client.register_and_login(UserRole::User).await {
Ok(t) => t,
Err(e) => {
eprintln!("Setup failed: {}", e);
return;
}
};
let result = client.get_failed_ocr_documents().await.unwrap();
// Statistics should always be present
assert!(result.get("statistics").is_some(), "Statistics should always be present");
let statistics = &result["statistics"];
assert!(statistics.get("total_failed").is_some(), "total_failed should always be present");
assert!(statistics.get("by_reason").is_some(), "by_reason should always be present");
assert!(statistics.get("by_stage").is_some(), "by_stage should always be present");
// Values should be valid even if zero
assert!(statistics["total_failed"].is_number(), "total_failed should be a number");
assert!(statistics["by_reason"].is_object(), "by_reason should be an object");
assert!(statistics["by_stage"].is_object(), "by_stage should be an object");
let total_failed = statistics["total_failed"].as_i64().unwrap();
assert!(total_failed >= 0, "total_failed should be non-negative");
println!("✅ Statistics are always present and valid");
}