Readur/tests/integration_ocr_retry_tests.rs

486 lines
19 KiB
Rust
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use reqwest::Client;
use serde_json::{json, Value};
use std::time::Duration;
use uuid::Uuid;
use readur::models::{CreateUser, LoginRequest, LoginResponse, UserRole};
fn get_base_url() -> String {
std::env::var("API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string())
}
const TIMEOUT: Duration = Duration::from_secs(60);
struct OcrRetryTestHelper {
client: Client,
token: String,
}
impl OcrRetryTestHelper {
async fn new() -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
let client = Client::new();
// First check if server is running with better error handling
let health_check = client
.get(&format!("{}/api/health", get_base_url()))
.timeout(Duration::from_secs(10))
.send()
.await;
match health_check {
Ok(response) => {
if !response.status().is_success() {
let status = response.status();
let text = response.text().await.unwrap_or_else(|_| "Unable to read response".to_string());
return Err(format!("Health check failed with status {}: {}. Is the server running at {}?", status, text, get_base_url()).into());
}
println!("✅ Server health check passed at {}", get_base_url());
}
Err(e) => {
eprintln!("❌ Cannot connect to server at {}: {}", get_base_url(), e);
eprintln!("💡 To run integration tests, start the server first:");
eprintln!(" cargo run");
eprintln!(" Then run tests in another terminal:");
eprintln!(" cargo test --test integration_ocr_retry_tests");
return Err(format!("Server not reachable: {}", e).into());
}
}
// Create a test admin user
let test_id = Uuid::new_v4().simple().to_string();
let nanos = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos();
let username = format!("ocr_retry_admin_{}_{}", test_id, nanos);
let email = format!("ocr_retry_admin_{}@{}.example.com", test_id, nanos);
let password = "testpassword123";
// Register admin user
let user_data = CreateUser {
username: username.clone(),
email: email.clone(),
password: password.to_string(),
role: Some(UserRole::Admin),
};
let register_response = client
.post(&format!("{}/api/auth/register", get_base_url()))
.json(&user_data)
.timeout(TIMEOUT)
.send()
.await?;
if !register_response.status().is_success() {
return Err(format!("Registration failed: {}", register_response.text().await?).into());
}
// Login with the new user
let login_data = LoginRequest {
username: username.clone(),
password: password.to_string(),
};
let login_response = client
.post(&format!("{}/api/auth/login", get_base_url()))
.json(&login_data)
.timeout(TIMEOUT)
.send()
.await?;
if !login_response.status().is_success() {
return Err(format!("Login failed: {}", login_response.text().await?).into());
}
let login_result: LoginResponse = login_response.json().await?;
let token = login_result.token;
Ok(Self { client, token })
}
fn get_auth_header(&self) -> String {
format!("Bearer {}", self.token)
}
async fn get_retry_stats(&self) -> Result<Value, Box<dyn std::error::Error + Send + Sync>> {
let response = self.client
.get(&format!("{}/api/documents/ocr/retry-stats", get_base_url()))
.header("Authorization", self.get_auth_header())
.timeout(TIMEOUT)
.send()
.await?;
let status = response.status();
let response_text = response.text().await?;
if !status.is_success() {
return Err(format!("Failed to get retry stats (status {}): {}", status, response_text).into());
}
// Try to parse the JSON and provide better error messages
match serde_json::from_str::<Value>(&response_text) {
Ok(result) => Ok(result),
Err(e) => {
eprintln!("JSON parsing failed for retry stats response:");
eprintln!("Status: {}", status);
eprintln!("Response text: {}", response_text);
Err(format!("Failed to parse JSON response: {}. Raw response: {}", e, response_text).into())
}
}
}
async fn get_retry_recommendations(&self) -> Result<Value, Box<dyn std::error::Error + Send + Sync>> {
let response = self.client
.get(&format!("{}/api/documents/ocr/retry-recommendations", get_base_url()))
.header("Authorization", self.get_auth_header())
.timeout(TIMEOUT)
.send()
.await?;
let status = response.status();
let response_text = response.text().await?;
if !status.is_success() {
return Err(format!("Failed to get retry recommendations (status {}): {}", status, response_text).into());
}
// Try to parse the JSON and provide better error messages
match serde_json::from_str::<Value>(&response_text) {
Ok(result) => Ok(result),
Err(e) => {
eprintln!("JSON parsing failed for retry recommendations response:");
eprintln!("Status: {}", status);
eprintln!("Response text: {}", response_text);
Err(format!("Failed to parse JSON response: {}. Raw response: {}", e, response_text).into())
}
}
}
async fn bulk_retry_ocr(&self, mode: &str, document_ids: Option<Vec<String>>, preview_only: bool) -> Result<Value, Box<dyn std::error::Error + Send + Sync>> {
let mut request_body = json!({
"mode": mode,
"preview_only": preview_only
});
if let Some(ids) = document_ids {
request_body["document_ids"] = json!(ids);
}
let response = self.client
.post(&format!("{}/api/documents/ocr/bulk-retry", get_base_url()))
.header("Authorization", self.get_auth_header())
.json(&request_body)
.timeout(TIMEOUT)
.send()
.await?;
let status = response.status();
let response_text = response.text().await?;
if !status.is_success() {
return Err(format!("Failed to bulk retry OCR (status {}): {}", status, response_text).into());
}
// Try to parse the JSON and provide better error messages
match serde_json::from_str::<Value>(&response_text) {
Ok(result) => Ok(result),
Err(e) => {
eprintln!("JSON parsing failed for bulk retry response:");
eprintln!("Status: {}", status);
eprintln!("Response text: {}", response_text);
Err(format!("Failed to parse JSON response: {}. Raw response: {}", e, response_text).into())
}
}
}
async fn get_document_retry_history(&self, document_id: &str) -> Result<Value, Box<dyn std::error::Error + Send + Sync>> {
let response = self.client
.get(&format!("{}/api/documents/{}/ocr/retry-history", get_base_url(), document_id))
.header("Authorization", self.get_auth_header())
.timeout(TIMEOUT)
.send()
.await?;
if !response.status().is_success() {
return Err(format!("Failed to get retry history: {}", response.text().await?).into());
}
let result: Value = response.json().await?;
Ok(result)
}
async fn get_failed_documents(&self) -> Result<Value, Box<dyn std::error::Error + Send + Sync>> {
let response = self.client
.get(&format!("{}/api/documents/failed", get_base_url()))
.header("Authorization", self.get_auth_header())
.timeout(TIMEOUT)
.send()
.await?;
if !response.status().is_success() {
return Err(format!("Failed to get failed documents: {}", response.text().await?).into());
}
let result: Value = response.json().await?;
Ok(result)
}
async fn create_failed_test_document(&self) -> Result<String, Box<dyn std::error::Error + Send + Sync>> {
// Upload a simple text file first
let test_content = "This is a test document for OCR retry testing.";
let file_part = reqwest::multipart::Part::bytes(test_content.as_bytes())
.file_name("test_retry_document.txt")
.mime_str("text/plain")?;
let form = reqwest::multipart::Form::new()
.part("file", file_part);
let response = self.client
.post(&format!("{}/api/documents", get_base_url()))
.header("Authorization", self.get_auth_header())
.multipart(form)
.timeout(TIMEOUT)
.send()
.await?;
if !response.status().is_success() {
return Err(format!("Failed to upload test document: {}", response.text().await?).into());
}
let upload_result: Value = response.json().await?;
let doc_id = upload_result["id"].as_str()
.ok_or("No document ID in upload response")?
.to_string();
// Wait a moment for processing
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
// Manually mark the document as failed via direct database manipulation isn't available,
// so we'll just return the document ID and use it for testing the endpoint structure
Ok(doc_id)
}
}
#[tokio::test]
async fn test_ocr_retry_stats_endpoint() {
let helper = match OcrRetryTestHelper::new().await {
Ok(h) => h,
Err(e) => {
println!(" Skipping OCR retry stats test (setup failed): {}", e);
return;
}
};
// Test getting retry statistics
match helper.get_retry_stats().await {
Ok(stats) => {
println!(" OCR retry stats endpoint working");
// Verify response structure
assert!(stats["failure_reasons"].is_array(), "Should have failure_reasons array");
assert!(stats["file_types"].is_array(), "Should have file_types array");
assert!(stats["total_failed"].is_number(), "Should have total_failed count");
println!("📊 Total failed documents: {}", stats["total_failed"]);
}
Err(e) => {
println!(" OCR retry stats test failed: {}", e);
println!("💡 This might indicate a server issue or missing endpoint implementation");
panic!("OCR retry stats endpoint failed: {}", e);
}
}
}
#[tokio::test]
async fn test_ocr_retry_recommendations_endpoint() {
let helper = match OcrRetryTestHelper::new().await {
Ok(h) => h,
Err(e) => {
println!(" Skipping OCR retry recommendations test (setup failed): {}", e);
return;
}
};
// Test getting retry recommendations
match helper.get_retry_recommendations().await {
Ok(recommendations) => {
println!(" OCR retry recommendations endpoint working");
// Verify response structure
assert!(recommendations["recommendations"].is_array(), "Should have recommendations array");
assert!(recommendations["total_recommendations"].is_number(), "Should have total count");
let recs = recommendations["recommendations"].as_array().unwrap();
println!("💡 Got {} retry recommendations", recs.len());
for rec in recs {
println!(" - {}: {} documents ({}% success rate)",
rec["title"].as_str().unwrap_or("Unknown"),
rec["document_count"].as_i64().unwrap_or(0),
(rec["estimated_success_rate"].as_f64().unwrap_or(0.0) * 100.0) as i32
);
}
}
Err(e) => {
println!(" OCR retry recommendations test failed: {}", e);
println!("💡 This might indicate a server issue or missing endpoint implementation");
panic!("OCR retry recommendations endpoint failed: {}", e);
}
}
}
#[tokio::test]
async fn test_bulk_retry_preview_mode() {
let helper = match OcrRetryTestHelper::new().await {
Ok(h) => h,
Err(e) => {
println!(" Skipping bulk retry preview test (setup failed): {}", e);
return;
}
};
// Test preview mode - should not actually queue anything
match helper.bulk_retry_ocr("all", None, true).await {
Ok(result) => {
println!(" Bulk retry preview mode working");
// Verify response structure
assert!(result["success"].as_bool().unwrap_or(false), "Should be successful");
assert!(result["matched_count"].is_number(), "Should have matched_count");
assert!(result["queued_count"].is_number(), "Should have queued_count");
assert!(result["documents"].is_array(), "Should have documents array");
assert!(result["message"].as_str().unwrap_or("").contains("Preview"), "Should indicate preview mode");
// In preview mode, queued_count should be 0
assert_eq!(result["queued_count"].as_u64().unwrap_or(1), 0, "Preview mode should not queue any documents");
println!("📋 Preview found {} documents that would be retried", result["matched_count"]);
}
Err(e) => {
println!(" Bulk retry preview test failed: {}", e);
println!("💡 This might indicate a server issue or missing endpoint implementation");
panic!("Bulk retry preview failed: {}", e);
}
}
}
#[tokio::test]
async fn test_document_retry_history() {
let helper = match OcrRetryTestHelper::new().await {
Ok(h) => h,
Err(e) => {
println!(" Skipping retry history test (setup failed): {}", e);
return;
}
};
// Create a failed document by uploading a file and manually marking it as failed
println!("🔄 Creating a test failed document...");
// First try to create a failed document for testing
let doc_id = match helper.create_failed_test_document().await {
Ok(id) => {
println!(" Created test failed document with ID: {}", id);
id
}
Err(e) => {
println!(" Could not create test failed document: {}", e);
// Just test the endpoint with a random UUID to verify it doesn't crash
let test_uuid = "00000000-0000-0000-0000-000000000000";
match helper.get_document_retry_history(test_uuid).await {
Ok(_) => {
println!(" Document retry history endpoint working (with test UUID)");
return;
}
Err(retry_err) => {
// A 404 is expected for non-existent document - that's fine
if retry_err.to_string().contains("404") {
println!(" Document retry history endpoint working (404 for non-existent document is expected)");
return;
} else {
println!(" Document retry history test failed even with test UUID: {}", retry_err);
panic!("Document retry history failed: {}", retry_err);
}
}
}
}
};
// Test getting retry history for this document
match helper.get_document_retry_history(&doc_id).await {
Ok(history) => {
println!(" Document retry history endpoint working");
// Verify response structure
assert!(history["document_id"].is_string(), "Should have document_id");
assert!(history["retry_history"].is_array(), "Should have retry_history array");
assert!(history["total_retries"].is_number(), "Should have total_retries count");
println!("📜 Document {} has {} retry attempts",
doc_id,
history["total_retries"].as_i64().unwrap_or(0)
);
}
Err(e) => {
println!(" Document retry history test failed: {}", e);
println!("💡 This might indicate a server issue or missing endpoint implementation");
panic!("Document retry history failed: {}", e);
}
}
}
#[tokio::test]
async fn test_filtered_bulk_retry_preview() {
let helper = match OcrRetryTestHelper::new().await {
Ok(h) => h,
Err(e) => {
println!(" Skipping filtered bulk retry test (setup failed): {}", e);
return;
}
};
// Test filtered retry with specific criteria
let request_body = json!({
"mode": "filter",
"preview_only": true,
"filter": {
"mime_types": ["application/pdf"],
"max_file_size": 5242880, // 5MB
"limit": 10
}
});
let response = helper.client
.post(&format!("{}/api/documents/ocr/bulk-retry", get_base_url()))
.header("Authorization", helper.get_auth_header())
.json(&request_body)
.timeout(TIMEOUT)
.send()
.await;
match response {
Ok(res) if res.status().is_success() => {
let result: Value = res.json().await.unwrap();
println!(" Filtered bulk retry preview working");
// Verify filtering worked
let documents = result["documents"].as_array().unwrap();
for doc in documents {
let mime_type = doc["mime_type"].as_str().unwrap_or("");
assert_eq!(mime_type, "application/pdf", "Should only return PDF documents");
let file_size = doc["file_size"].as_i64().unwrap_or(0);
assert!(file_size <= 5242880, "Should only return files <= 5MB");
}
println!("🔍 Filtered preview found {} matching documents", documents.len());
}
Ok(res) => {
let status = res.status();
let error_text = res.text().await.unwrap_or_else(|_| "Unknown error".to_string());
println!(" Filtered bulk retry failed with status {}: {}", status, error_text);
}
Err(e) => {
println!(" Filtered bulk retry request failed: {}", e);
}
}
}