diff --git a/migrations/20250620100013_add_database_guardrails.sql b/migrations/20250620100013_add_database_guardrails.sql index 26a4cf2..67f86a6 100644 --- a/migrations/20250620100013_add_database_guardrails.sql +++ b/migrations/20250620100013_add_database_guardrails.sql @@ -53,10 +53,9 @@ WHERE status IN ('pending', 'processing'); CREATE OR REPLACE FUNCTION validate_ocr_consistency() RETURNS TRIGGER AS $$ BEGIN - -- Allow OCR retry operations: completed -> pending is allowed for retry functionality - -- Prevent other modifications to completed OCR data - IF OLD.ocr_status = 'completed' AND NEW.ocr_status != 'completed' AND NEW.ocr_status != 'pending' THEN - RAISE EXCEPTION 'Cannot modify completed OCR data for document %. Only retry (pending) is allowed.', OLD.id; + -- Prevent updating completed OCR unless explicitly allowed + IF OLD.ocr_status = 'completed' AND NEW.ocr_status != 'completed' THEN + RAISE EXCEPTION 'Cannot modify completed OCR data for document %', OLD.id; END IF; -- Ensure OCR text and metadata consistency diff --git a/src/routes/documents_ocr_retry.rs b/src/routes/documents_ocr_retry.rs index 11874eb..7e11b83 100644 --- a/src/routes/documents_ocr_retry.rs +++ b/src/routes/documents_ocr_retry.rs @@ -717,10 +717,10 @@ fn calculate_priority(file_size: i64, override_priority: Option) -> i32 { } #[derive(Debug, sqlx::FromRow)] -struct DocumentInfo { - id: Uuid, - filename: String, - file_size: i64, - mime_type: String, - ocr_failure_reason: Option, +pub struct DocumentInfo { + pub id: Uuid, + pub filename: String, + pub file_size: i64, + pub mime_type: String, + pub ocr_failure_reason: Option, } \ No newline at end of file diff --git a/src/tests/mod.rs b/src/tests/mod.rs index d893d86..1e42cc8 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -21,4 +21,5 @@ mod migration_constraint_tests; mod migration_integration_tests; mod failed_documents_unit_tests; mod document_response_serialization_tests; -mod unit_ocr_retry_db_tests_simple; +mod unit_ocr_retry_db_tests_simple; +mod ocr_retry_regression_tests; diff --git a/src/tests/ocr_retry_regression_tests.rs b/src/tests/ocr_retry_regression_tests.rs new file mode 100644 index 0000000..72dd2c6 --- /dev/null +++ b/src/tests/ocr_retry_regression_tests.rs @@ -0,0 +1,330 @@ +#[cfg(test)] +mod ocr_retry_regression_tests { + use sqlx::{PgPool, Row}; + use testcontainers::{runners::AsyncRunner, ContainerAsync}; + use testcontainers_modules::postgres::Postgres; + use uuid::Uuid; + use crate::routes::documents_ocr_retry::DocumentInfo; + + async fn setup_test_db() -> (ContainerAsync, PgPool) { + let postgres_image = Postgres::default(); + let container = postgres_image.start().await.expect("Failed to start postgres container"); + let port = container.get_host_port_ipv4(5432).await.expect("Failed to get postgres port"); + + let connection_string = format!( + "postgres://postgres:postgres@127.0.0.1:{}/postgres", + port + ); + + let pool = PgPool::connect(&connection_string).await.expect("Failed to connect to test database"); + + // Skip migrations that require extensions and create minimal schema manually + // This avoids needing uuid-ossp or other extensions for testing + sqlx::query(r#" + CREATE TABLE IF NOT EXISTS users ( + id UUID PRIMARY KEY, + username VARCHAR(255) UNIQUE NOT NULL, + email VARCHAR(255) UNIQUE NOT NULL, + password_hash VARCHAR(255) NOT NULL, + role VARCHAR(50) NOT NULL DEFAULT 'user', + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW() + ) + "#) + .execute(&pool) + .await + .expect("Failed to create users table"); + + sqlx::query(r#" + CREATE TABLE IF NOT EXISTS documents ( + id UUID PRIMARY KEY, + filename VARCHAR(255) NOT NULL, + original_filename VARCHAR(255) NOT NULL, + user_id UUID NOT NULL REFERENCES users(id), + mime_type VARCHAR(100) NOT NULL, + file_size BIGINT NOT NULL, + ocr_status VARCHAR(50) DEFAULT 'pending', + ocr_text TEXT, + ocr_confidence DECIMAL(5,2), + ocr_word_count INTEGER, + ocr_processing_time_ms INTEGER, + ocr_completed_at TIMESTAMPTZ, + ocr_error TEXT, + ocr_failure_reason VARCHAR(255), + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW() + ) + "#) + .execute(&pool) + .await + .expect("Failed to create documents table"); + + (container, pool) + } + + async fn create_test_user(pool: &PgPool) -> Uuid { + let user_id = Uuid::new_v4(); + + sqlx::query("INSERT INTO users (id, username, email, password_hash, role) VALUES ($1, $2, $3, 'test_hash', 'user')") + .bind(user_id) + .bind(format!("test_user_{}", user_id.simple().to_string()[0..8].to_string())) + .bind(format!("test_{}@test.com", user_id.simple().to_string()[0..8].to_string())) + .execute(pool) + .await + .expect("Failed to create test user"); + + user_id + } + + async fn create_test_document(pool: &PgPool, user_id: Uuid, ocr_status: &str) -> Uuid { + let doc_id = Uuid::new_v4(); + + sqlx::query(r#" + INSERT INTO documents ( + id, filename, original_filename, user_id, mime_type, file_size, + ocr_status, created_at, updated_at + ) VALUES ($1, $2, $3, $4, 'application/pdf', 1024, $5, NOW(), NOW()) + "#) + .bind(doc_id) + .bind(format!("test_{}.pdf", doc_id.simple().to_string()[0..8].to_string())) + .bind(format!("original_{}.pdf", doc_id.simple().to_string()[0..8].to_string())) + .bind(user_id) + .bind(ocr_status) + .execute(pool) + .await + .expect("Failed to create test document"); + + doc_id + } + + #[tokio::test] + async fn test_sql_query_only_returns_failed_documents() { + let (_container, pool) = setup_test_db().await; + let user_id = create_test_user(&pool).await; + + // Create documents with different OCR statuses + let failed_doc1 = create_test_document(&pool, user_id, "failed").await; + let failed_doc2 = create_test_document(&pool, user_id, "failed").await; + let completed_doc = create_test_document(&pool, user_id, "completed").await; + let pending_doc = create_test_document(&pool, user_id, "pending").await; + let processing_doc = create_test_document(&pool, user_id, "processing").await; + + // Test the corrected SQL query that should be used in get_all_failed_ocr_documents + let documents = sqlx::query_as::<_, DocumentInfo>( + r#" + SELECT id, filename, file_size, mime_type, ocr_failure_reason + FROM documents + WHERE ocr_status = 'failed' + AND ($1::uuid IS NULL OR user_id = $1) + ORDER BY created_at DESC + "# + ) + .bind(Some(user_id)) + .fetch_all(&pool) + .await + .expect("Failed to execute SQL query"); + + // Should only return the 2 failed documents + assert_eq!(documents.len(), 2, "SQL query should only return failed documents, but returned {}", documents.len()); + + let returned_ids: Vec = documents.iter().map(|d| d.id).collect(); + assert!(returned_ids.contains(&failed_doc1), "Should contain first failed document"); + assert!(returned_ids.contains(&failed_doc2), "Should contain second failed document"); + assert!(!returned_ids.contains(&completed_doc), "Should NOT contain completed document"); + assert!(!returned_ids.contains(&pending_doc), "Should NOT contain pending document"); + assert!(!returned_ids.contains(&processing_doc), "Should NOT contain processing document"); + } + + #[tokio::test] + async fn test_broken_sql_query_returns_all_documents() { + let (_container, pool) = setup_test_db().await; + let user_id = create_test_user(&pool).await; + + // Create documents with different OCR statuses + let _failed_doc1 = create_test_document(&pool, user_id, "failed").await; + let _failed_doc2 = create_test_document(&pool, user_id, "failed").await; + let _completed_doc = create_test_document(&pool, user_id, "completed").await; + let _pending_doc = create_test_document(&pool, user_id, "pending").await; + let _processing_doc = create_test_document(&pool, user_id, "processing").await; + + // Test the BROKEN SQL query (what it was before the fix) + let documents = sqlx::query_as::<_, DocumentInfo>( + r#" + SELECT id, filename, file_size, mime_type, ocr_failure_reason + FROM documents + WHERE ($1::uuid IS NULL OR user_id = $1) + ORDER BY created_at DESC + "# + ) + .bind(Some(user_id)) + .fetch_all(&pool) + .await + .expect("Failed to execute broken SQL query"); + + // This demonstrates the bug - it returns ALL documents (5), not just failed ones (2) + assert_eq!(documents.len(), 5, "Broken SQL query returns all documents, demonstrating the bug"); + } + + #[tokio::test] + async fn test_database_allows_completed_to_pending_transition() { + let (_container, pool) = setup_test_db().await; + let user_id = create_test_user(&pool).await; + let doc_id = create_test_document(&pool, user_id, "completed").await; + + // Make it a "real" completed document with OCR data + sqlx::query(r#" + UPDATE documents + SET ocr_text = 'Sample OCR text content', + ocr_confidence = 95.5, + ocr_word_count = 15, + ocr_completed_at = NOW() + WHERE id = $1 + "#) + .bind(doc_id) + .execute(&pool) + .await + .expect("Failed to set OCR completion data"); + + // Test: completed -> pending (should work after applying the migration) + let result = sqlx::query(r#" + UPDATE documents + SET ocr_status = 'pending', + ocr_text = NULL, + ocr_error = NULL, + ocr_failure_reason = NULL, + ocr_confidence = NULL, + ocr_word_count = NULL, + ocr_processing_time_ms = NULL, + ocr_completed_at = NULL, + updated_at = NOW() + WHERE id = $1 + "#) + .bind(doc_id) + .execute(&pool) + .await; + + // This test will pass after the migration is applied + match result { + Ok(_) => { + // Verify the update succeeded + let new_status: String = sqlx::query_scalar("SELECT ocr_status FROM documents WHERE id = $1") + .bind(doc_id) + .fetch_one(&pool) + .await + .expect("Failed to get updated status"); + assert_eq!(new_status, "pending", "Document should be reset to pending status"); + println!("✅ Database trigger correctly allows completed -> pending transition for retry"); + } + Err(e) => { + let error_msg = e.to_string(); + if error_msg.contains("Cannot modify completed OCR data") { + panic!("❌ REGRESSION: Database trigger still blocking retry operations. Apply migration 20250702000002_fix_ocr_retry_guardrails.sql"); + } else { + panic!("❌ Unexpected database error: {}", error_msg); + } + } + } + } + + #[tokio::test] + async fn test_database_blocks_invalid_completed_transitions() { + let (_container, pool) = setup_test_db().await; + let user_id = create_test_user(&pool).await; + let doc_id = create_test_document(&pool, user_id, "completed").await; + + // Set OCR completion data + sqlx::query(r#" + UPDATE documents + SET ocr_text = 'Sample text', + ocr_confidence = 90.0, + ocr_completed_at = NOW() + WHERE id = $1 + "#) + .bind(doc_id) + .execute(&pool) + .await + .expect("Failed to set OCR data"); + + // Test invalid transitions that should still be blocked + let invalid_transitions = ["processing", "failed"]; + + for invalid_status in invalid_transitions { + let result = sqlx::query("UPDATE documents SET ocr_status = $1 WHERE id = $2") + .bind(invalid_status) + .bind(doc_id) + .execute(&pool) + .await; + + assert!(result.is_err(), "Database trigger should still block completed -> {} transition", invalid_status); + + let error_msg = result.err().unwrap().to_string(); + assert!(error_msg.contains("Cannot modify completed OCR data"), + "Error should mention OCR data protection for transition to {}: {}", invalid_status, error_msg); + } + } + + #[tokio::test] + async fn test_admin_vs_user_document_visibility() { + let (_container, pool) = setup_test_db().await; + + // Create admin and regular users + let admin_id = Uuid::new_v4(); + let user1_id = Uuid::new_v4(); + let user2_id = Uuid::new_v4(); + + // Create admin user + sqlx::query("INSERT INTO users (id, username, email, password_hash, role) VALUES ($1, 'admin', 'admin@test.com', 'test', 'admin')") + .bind(admin_id) + .execute(&pool) + .await + .expect("Failed to create admin user"); + + // Create regular users + for (user_id, username) in [(user1_id, "user1"), (user2_id, "user2")] { + sqlx::query("INSERT INTO users (id, username, email, password_hash, role) VALUES ($1, $2, $3, 'test', 'user')") + .bind(user_id) + .bind(username) + .bind(format!("{}@test.com", username)) + .execute(&pool) + .await + .expect("Failed to create user"); + } + + // Create failed documents for different users + let _admin_failed_doc = create_test_document(&pool, admin_id, "failed").await; + let _user1_failed_doc = create_test_document(&pool, user1_id, "failed").await; + let _user2_failed_doc = create_test_document(&pool, user2_id, "failed").await; + + // Test admin sees all failed documents (user_filter = NULL) + let admin_docs = sqlx::query_as::<_, DocumentInfo>( + r#" + SELECT id, filename, file_size, mime_type, ocr_failure_reason + FROM documents + WHERE ocr_status = 'failed' + AND ($1::uuid IS NULL OR user_id = $1) + ORDER BY created_at DESC + "# + ) + .bind(None::) // Admin filter - NULL means see all + .fetch_all(&pool) + .await + .expect("Failed to fetch admin documents"); + assert_eq!(admin_docs.len(), 3, "Admin should see all 3 failed documents"); + + // Test regular user sees only their own + let user1_docs = sqlx::query_as::<_, DocumentInfo>( + r#" + SELECT id, filename, file_size, mime_type, ocr_failure_reason + FROM documents + WHERE ocr_status = 'failed' + AND ($1::uuid IS NULL OR user_id = $1) + ORDER BY created_at DESC + "# + ) + .bind(Some(user1_id)) // User filter - only their documents + .fetch_all(&pool) + .await + .expect("Failed to fetch user documents"); + assert_eq!(user1_docs.len(), 1, "User should only see their own failed document"); + } +} \ No newline at end of file diff --git a/tests/integration_ocr_retry_bug_regression_tests.rs b/tests/integration_ocr_retry_bug_regression_tests.rs new file mode 100644 index 0000000..9b5003d --- /dev/null +++ b/tests/integration_ocr_retry_bug_regression_tests.rs @@ -0,0 +1,333 @@ +use reqwest::Client; +use serde_json::{json, Value}; +use std::time::Duration; +use uuid::Uuid; + +use readur::models::{CreateUser, LoginRequest, LoginResponse, UserRole}; + +fn get_base_url() -> String { + std::env::var("API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string()) +} + +const TIMEOUT: Duration = Duration::from_secs(60); + +struct OcrRetryRegressionTestHelper { + client: Client, + token: String, +} + +impl OcrRetryRegressionTestHelper { + async fn new() -> Result> { + let client = Client::new(); + + // Health check + let health_check = client + .get(&format!("{}/api/health", get_base_url())) + .timeout(Duration::from_secs(10)) + .send() + .await; + + match health_check { + Ok(response) => { + if !response.status().is_success() { + let status = response.status(); + let text = response.text().await.unwrap_or_else(|_| "Unable to read response".to_string()); + return Err(format!("Health check failed with status {}: {}. Is the server running at {}?", status, text, get_base_url()).into()); + } + println!("✅ Server health check passed at {}", get_base_url()); + } + Err(e) => { + eprintln!("❌ Cannot connect to server at {}: {}", get_base_url(), e); + eprintln!("💡 To run integration tests, start the server first:"); + eprintln!(" cargo run"); + eprintln!(" Then run tests in another terminal:"); + eprintln!(" cargo test --test integration_ocr_retry_bug_regression_tests"); + return Err(format!("Server not reachable: {}", e).into()); + } + } + + // Create and login as admin user + let test_id = Uuid::new_v4().simple().to_string(); + let username = format!("test_admin_{}", &test_id[0..8]); + let password = "test_password_123"; + let email = format!("{}@test.com", username); + + let create_user = CreateUser { + username: username.clone(), + password: password.to_string(), + email: email.clone(), + role: Some(UserRole::Admin), + }; + + let _create_response = client + .post(&format!("{}/api/users", get_base_url())) + .json(&create_user) + .timeout(TIMEOUT) + .send() + .await?; + + let login_request = LoginRequest { + username: username.clone(), + password: password.to_string(), + }; + + let login_response = client + .post(&format!("{}/api/auth/login", get_base_url())) + .json(&login_request) + .timeout(TIMEOUT) + .send() + .await?; + + if !login_response.status().is_success() { + let status = login_response.status(); + let error_text = login_response.text().await.unwrap_or_else(|_| "Unknown error".to_string()); + return Err(format!("Login failed with status {}: {}", status, error_text).into()); + } + + let login_data: LoginResponse = login_response.json().await?; + let token = login_data.token; + + Ok(Self { client, token }) + } + + async fn create_test_document(&self, filename: &str, ocr_status: &str) -> Result> { + // Create a document directly in the database via API + let document_data = json!({ + "filename": filename, + "original_filename": filename, + "mime_type": "application/pdf", + "file_size": 1024, + "ocr_status": ocr_status + }); + + let response = self.client + .post(&format!("{}/api/internal/test/documents", get_base_url())) + .header("Authorization", format!("Bearer {}", self.token)) + .json(&document_data) + .timeout(TIMEOUT) + .send() + .await?; + + if !response.status().is_success() { + let status = response.status(); + let error_text = response.text().await.unwrap_or_else(|_| "Unknown error".to_string()); + return Err(format!("Failed to create test document with status {}: {}", status, error_text).into()); + } + + let response_data: Value = response.json().await?; + let doc_id_str = response_data["id"].as_str() + .ok_or("Document ID not found in response")?; + let doc_id = Uuid::parse_str(doc_id_str)?; + + Ok(doc_id) + } + + async fn get_bulk_retry_preview(&self, mode: &str) -> Result> { + let request_body = json!({ + "mode": mode, + "preview_only": true + }); + + let response = self.client + .post(&format!("{}/api/documents/ocr/bulk-retry", get_base_url())) + .header("Authorization", format!("Bearer {}", self.token)) + .json(&request_body) + .timeout(TIMEOUT) + .send() + .await?; + + if !response.status().is_success() { + let status = response.status(); + let error_text = response.text().await.unwrap_or_else(|_| "Unknown error".to_string()); + return Err(format!("Bulk retry preview failed with status {}: {}", status, error_text).into()); + } + + let response_data: Value = response.json().await?; + Ok(response_data) + } + + async fn execute_bulk_retry(&self, mode: &str) -> Result> { + let request_body = json!({ + "mode": mode, + "preview_only": false + }); + + let response = self.client + .post(&format!("{}/api/documents/ocr/bulk-retry", get_base_url())) + .header("Authorization", format!("Bearer {}", self.token)) + .json(&request_body) + .timeout(TIMEOUT) + .send() + .await?; + + if !response.status().is_success() { + let status = response.status(); + let error_text = response.text().await.unwrap_or_else(|_| "Unknown error".to_string()); + return Err(format!("Bulk retry execution failed with status {}: {}", status, error_text).into()); + } + + let response_data: Value = response.json().await?; + Ok(response_data) + } +} + +#[tokio::test] +async fn test_bulk_retry_only_targets_failed_documents_regression() { + let helper = match OcrRetryRegressionTestHelper::new().await { + Ok(h) => h, + Err(e) => { + eprintln!("⚠️ Skipping test due to setup failure: {}", e); + return; + } + }; + + println!("🧪 Testing regression: Bulk retry should only target failed documents"); + + // Create a mix of documents with different OCR statuses + let failed_doc1 = helper.create_test_document("failed_doc_1.pdf", "failed").await.expect("Failed to create failed document 1"); + let failed_doc2 = helper.create_test_document("failed_doc_2.pdf", "failed").await.expect("Failed to create failed document 2"); + let completed_doc = helper.create_test_document("completed_doc.pdf", "completed").await.expect("Failed to create completed document"); + let pending_doc = helper.create_test_document("pending_doc.pdf", "pending").await.expect("Failed to create pending document"); + + println!("📄 Created test documents:"); + println!(" - Failed: {}, {}", failed_doc1, failed_doc2); + println!(" - Completed: {}", completed_doc); + println!(" - Pending: {}", pending_doc); + + // Test 1: Preview should only show failed documents + println!("🔍 Testing bulk retry preview..."); + let preview_result = helper.get_bulk_retry_preview("all").await.expect("Failed to get preview"); + + let matched_count = preview_result["matched_count"].as_u64().expect("matched_count not found"); + assert_eq!(matched_count, 2, "Preview should only match 2 failed documents, but matched {}", matched_count); + + let queued_count = preview_result["queued_count"].as_u64().unwrap_or(0); + assert_eq!(queued_count, 0, "Preview should not queue any documents, but queued {}", queued_count); + + println!("✅ Preview correctly identified {} failed documents", matched_count); + + // Test 2: Execution should only process failed documents and not error on completed ones + println!("🚀 Testing bulk retry execution..."); + let execution_result = helper.execute_bulk_retry("all").await.expect("Failed to execute bulk retry"); + + let execution_matched_count = execution_result["matched_count"].as_u64().expect("matched_count not found in execution"); + let execution_queued_count = execution_result["queued_count"].as_u64().expect("queued_count not found in execution"); + + assert_eq!(execution_matched_count, 2, "Execution should only match 2 failed documents, but matched {}", execution_matched_count); + assert_eq!(execution_queued_count, 2, "Execution should queue 2 failed documents, but queued {}", execution_queued_count); + + let success = execution_result["success"].as_bool().expect("success not found in execution"); + assert!(success, "Bulk retry execution should succeed"); + + println!("✅ Execution successfully processed {} failed documents", execution_queued_count); + println!("🎉 Regression test passed: Bulk retry correctly targets only failed documents"); +} + +#[tokio::test] +async fn test_bulk_retry_no_database_constraint_errors() { + let helper = match OcrRetryRegressionTestHelper::new().await { + Ok(h) => h, + Err(e) => { + eprintln!("⚠️ Skipping test due to setup failure: {}", e); + return; + } + }; + + println!("🧪 Testing regression: No database constraint errors during retry"); + + // Create only failed documents to ensure we test the constraint logic + let failed_doc1 = helper.create_test_document("constraint_test_1.pdf", "failed").await.expect("Failed to create test document"); + let failed_doc2 = helper.create_test_document("constraint_test_2.pdf", "failed").await.expect("Failed to create test document"); + + println!("📄 Created {} failed documents for constraint testing", 2); + + // Execute bulk retry - this should not produce any database constraint errors + println!("🚀 Executing bulk retry to test database constraints..."); + let result = helper.execute_bulk_retry("all").await; + + match result { + Ok(response) => { + let success = response["success"].as_bool().expect("success field not found"); + let queued_count = response["queued_count"].as_u64().expect("queued_count not found"); + let message = response["message"].as_str().unwrap_or("No message"); + + assert!(success, "Bulk retry should succeed without constraint errors"); + assert_eq!(queued_count, 2, "Should queue both failed documents"); + + println!("✅ Bulk retry succeeded: queued {} documents", queued_count); + println!("📝 Response message: {}", message); + } + Err(e) => { + // Check if the error contains the specific constraint violation we were experiencing + let error_msg = e.to_string(); + if error_msg.contains("Cannot modify completed OCR data") { + panic!("❌ REGRESSION DETECTED: Database constraint error occurred: {}", error_msg); + } else { + panic!("❌ Unexpected error during bulk retry: {}", error_msg); + } + } + } + + println!("🎉 Regression test passed: No database constraint errors during retry"); +} + +#[tokio::test] +async fn test_bulk_retry_with_mixed_documents_no_errors() { + let helper = match OcrRetryRegressionTestHelper::new().await { + Ok(h) => h, + Err(e) => { + eprintln!("⚠️ Skipping test due to setup failure: {}", e); + return; + } + }; + + println!("🧪 Testing regression: Mixed document statuses should not cause errors"); + + // Create a realistic mix of documents that might exist in production + let mut created_docs = Vec::new(); + + // Create failed documents (should be included in retry) + for i in 0..3 { + let doc_id = helper.create_test_document(&format!("failed_{}.pdf", i), "failed").await.expect("Failed to create failed document"); + created_docs.push((doc_id, "failed")); + } + + // Create completed documents (should be ignored, not cause errors) + for i in 0..10 { + let doc_id = helper.create_test_document(&format!("completed_{}.pdf", i), "completed").await.expect("Failed to create completed document"); + created_docs.push((doc_id, "completed")); + } + + // Create pending documents (should be ignored) + for i in 0..5 { + let doc_id = helper.create_test_document(&format!("pending_{}.pdf", i), "pending").await.expect("Failed to create pending document"); + created_docs.push((doc_id, "pending")); + } + + println!("📄 Created {} total documents with mixed statuses", created_docs.len()); + println!(" - 3 failed (should be retried)"); + println!(" - 10 completed (should be ignored)"); + println!(" - 5 pending (should be ignored)"); + + // Test preview first + println!("🔍 Testing preview with mixed document statuses..."); + let preview_result = helper.get_bulk_retry_preview("all").await.expect("Failed to get preview"); + + let preview_matched = preview_result["matched_count"].as_u64().expect("matched_count not found"); + assert_eq!(preview_matched, 3, "Preview should only match 3 failed documents from mix"); + + // Test execution + println!("🚀 Testing execution with mixed document statuses..."); + let execution_result = helper.execute_bulk_retry("all").await.expect("Failed to execute bulk retry"); + + let success = execution_result["success"].as_bool().expect("success not found"); + let matched_count = execution_result["matched_count"].as_u64().expect("matched_count not found"); + let queued_count = execution_result["queued_count"].as_u64().expect("queued_count not found"); + + assert!(success, "Bulk retry should succeed with mixed document statuses"); + assert_eq!(matched_count, 3, "Should only match 3 failed documents from the mix"); + assert_eq!(queued_count, 3, "Should queue all 3 failed documents"); + + println!("✅ Successfully handled mixed documents: matched {}, queued {}", matched_count, queued_count); + println!("🎉 Regression test passed: Mixed document statuses handled correctly"); +} \ No newline at end of file