fix(server): resolve issues with the retry ocr tests
This commit is contained in:
parent
ab03b8d73d
commit
6d40feadb3
|
|
@ -53,10 +53,9 @@ WHERE status IN ('pending', 'processing');
|
|||
CREATE OR REPLACE FUNCTION validate_ocr_consistency()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
-- Allow OCR retry operations: completed -> pending is allowed for retry functionality
|
||||
-- Prevent other modifications to completed OCR data
|
||||
IF OLD.ocr_status = 'completed' AND NEW.ocr_status != 'completed' AND NEW.ocr_status != 'pending' THEN
|
||||
RAISE EXCEPTION 'Cannot modify completed OCR data for document %. Only retry (pending) is allowed.', OLD.id;
|
||||
-- Prevent updating completed OCR unless explicitly allowed
|
||||
IF OLD.ocr_status = 'completed' AND NEW.ocr_status != 'completed' THEN
|
||||
RAISE EXCEPTION 'Cannot modify completed OCR data for document %', OLD.id;
|
||||
END IF;
|
||||
|
||||
-- Ensure OCR text and metadata consistency
|
||||
|
|
|
|||
|
|
@ -717,10 +717,10 @@ fn calculate_priority(file_size: i64, override_priority: Option<i32>) -> i32 {
|
|||
}
|
||||
|
||||
#[derive(Debug, sqlx::FromRow)]
|
||||
struct DocumentInfo {
|
||||
id: Uuid,
|
||||
filename: String,
|
||||
file_size: i64,
|
||||
mime_type: String,
|
||||
ocr_failure_reason: Option<String>,
|
||||
pub struct DocumentInfo {
|
||||
pub id: Uuid,
|
||||
pub filename: String,
|
||||
pub file_size: i64,
|
||||
pub mime_type: String,
|
||||
pub ocr_failure_reason: Option<String>,
|
||||
}
|
||||
|
|
@ -22,3 +22,4 @@ mod migration_integration_tests;
|
|||
mod failed_documents_unit_tests;
|
||||
mod document_response_serialization_tests;
|
||||
mod unit_ocr_retry_db_tests_simple;
|
||||
mod ocr_retry_regression_tests;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,330 @@
|
|||
#[cfg(test)]
|
||||
mod ocr_retry_regression_tests {
|
||||
use sqlx::{PgPool, Row};
|
||||
use testcontainers::{runners::AsyncRunner, ContainerAsync};
|
||||
use testcontainers_modules::postgres::Postgres;
|
||||
use uuid::Uuid;
|
||||
use crate::routes::documents_ocr_retry::DocumentInfo;
|
||||
|
||||
async fn setup_test_db() -> (ContainerAsync<Postgres>, PgPool) {
|
||||
let postgres_image = Postgres::default();
|
||||
let container = postgres_image.start().await.expect("Failed to start postgres container");
|
||||
let port = container.get_host_port_ipv4(5432).await.expect("Failed to get postgres port");
|
||||
|
||||
let connection_string = format!(
|
||||
"postgres://postgres:postgres@127.0.0.1:{}/postgres",
|
||||
port
|
||||
);
|
||||
|
||||
let pool = PgPool::connect(&connection_string).await.expect("Failed to connect to test database");
|
||||
|
||||
// Skip migrations that require extensions and create minimal schema manually
|
||||
// This avoids needing uuid-ossp or other extensions for testing
|
||||
sqlx::query(r#"
|
||||
CREATE TABLE IF NOT EXISTS users (
|
||||
id UUID PRIMARY KEY,
|
||||
username VARCHAR(255) UNIQUE NOT NULL,
|
||||
email VARCHAR(255) UNIQUE NOT NULL,
|
||||
password_hash VARCHAR(255) NOT NULL,
|
||||
role VARCHAR(50) NOT NULL DEFAULT 'user',
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
)
|
||||
"#)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.expect("Failed to create users table");
|
||||
|
||||
sqlx::query(r#"
|
||||
CREATE TABLE IF NOT EXISTS documents (
|
||||
id UUID PRIMARY KEY,
|
||||
filename VARCHAR(255) NOT NULL,
|
||||
original_filename VARCHAR(255) NOT NULL,
|
||||
user_id UUID NOT NULL REFERENCES users(id),
|
||||
mime_type VARCHAR(100) NOT NULL,
|
||||
file_size BIGINT NOT NULL,
|
||||
ocr_status VARCHAR(50) DEFAULT 'pending',
|
||||
ocr_text TEXT,
|
||||
ocr_confidence DECIMAL(5,2),
|
||||
ocr_word_count INTEGER,
|
||||
ocr_processing_time_ms INTEGER,
|
||||
ocr_completed_at TIMESTAMPTZ,
|
||||
ocr_error TEXT,
|
||||
ocr_failure_reason VARCHAR(255),
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
)
|
||||
"#)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.expect("Failed to create documents table");
|
||||
|
||||
(container, pool)
|
||||
}
|
||||
|
||||
async fn create_test_user(pool: &PgPool) -> Uuid {
|
||||
let user_id = Uuid::new_v4();
|
||||
|
||||
sqlx::query("INSERT INTO users (id, username, email, password_hash, role) VALUES ($1, $2, $3, 'test_hash', 'user')")
|
||||
.bind(user_id)
|
||||
.bind(format!("test_user_{}", user_id.simple().to_string()[0..8].to_string()))
|
||||
.bind(format!("test_{}@test.com", user_id.simple().to_string()[0..8].to_string()))
|
||||
.execute(pool)
|
||||
.await
|
||||
.expect("Failed to create test user");
|
||||
|
||||
user_id
|
||||
}
|
||||
|
||||
async fn create_test_document(pool: &PgPool, user_id: Uuid, ocr_status: &str) -> Uuid {
|
||||
let doc_id = Uuid::new_v4();
|
||||
|
||||
sqlx::query(r#"
|
||||
INSERT INTO documents (
|
||||
id, filename, original_filename, user_id, mime_type, file_size,
|
||||
ocr_status, created_at, updated_at
|
||||
) VALUES ($1, $2, $3, $4, 'application/pdf', 1024, $5, NOW(), NOW())
|
||||
"#)
|
||||
.bind(doc_id)
|
||||
.bind(format!("test_{}.pdf", doc_id.simple().to_string()[0..8].to_string()))
|
||||
.bind(format!("original_{}.pdf", doc_id.simple().to_string()[0..8].to_string()))
|
||||
.bind(user_id)
|
||||
.bind(ocr_status)
|
||||
.execute(pool)
|
||||
.await
|
||||
.expect("Failed to create test document");
|
||||
|
||||
doc_id
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_sql_query_only_returns_failed_documents() {
|
||||
let (_container, pool) = setup_test_db().await;
|
||||
let user_id = create_test_user(&pool).await;
|
||||
|
||||
// Create documents with different OCR statuses
|
||||
let failed_doc1 = create_test_document(&pool, user_id, "failed").await;
|
||||
let failed_doc2 = create_test_document(&pool, user_id, "failed").await;
|
||||
let completed_doc = create_test_document(&pool, user_id, "completed").await;
|
||||
let pending_doc = create_test_document(&pool, user_id, "pending").await;
|
||||
let processing_doc = create_test_document(&pool, user_id, "processing").await;
|
||||
|
||||
// Test the corrected SQL query that should be used in get_all_failed_ocr_documents
|
||||
let documents = sqlx::query_as::<_, DocumentInfo>(
|
||||
r#"
|
||||
SELECT id, filename, file_size, mime_type, ocr_failure_reason
|
||||
FROM documents
|
||||
WHERE ocr_status = 'failed'
|
||||
AND ($1::uuid IS NULL OR user_id = $1)
|
||||
ORDER BY created_at DESC
|
||||
"#
|
||||
)
|
||||
.bind(Some(user_id))
|
||||
.fetch_all(&pool)
|
||||
.await
|
||||
.expect("Failed to execute SQL query");
|
||||
|
||||
// Should only return the 2 failed documents
|
||||
assert_eq!(documents.len(), 2, "SQL query should only return failed documents, but returned {}", documents.len());
|
||||
|
||||
let returned_ids: Vec<Uuid> = documents.iter().map(|d| d.id).collect();
|
||||
assert!(returned_ids.contains(&failed_doc1), "Should contain first failed document");
|
||||
assert!(returned_ids.contains(&failed_doc2), "Should contain second failed document");
|
||||
assert!(!returned_ids.contains(&completed_doc), "Should NOT contain completed document");
|
||||
assert!(!returned_ids.contains(&pending_doc), "Should NOT contain pending document");
|
||||
assert!(!returned_ids.contains(&processing_doc), "Should NOT contain processing document");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_broken_sql_query_returns_all_documents() {
|
||||
let (_container, pool) = setup_test_db().await;
|
||||
let user_id = create_test_user(&pool).await;
|
||||
|
||||
// Create documents with different OCR statuses
|
||||
let _failed_doc1 = create_test_document(&pool, user_id, "failed").await;
|
||||
let _failed_doc2 = create_test_document(&pool, user_id, "failed").await;
|
||||
let _completed_doc = create_test_document(&pool, user_id, "completed").await;
|
||||
let _pending_doc = create_test_document(&pool, user_id, "pending").await;
|
||||
let _processing_doc = create_test_document(&pool, user_id, "processing").await;
|
||||
|
||||
// Test the BROKEN SQL query (what it was before the fix)
|
||||
let documents = sqlx::query_as::<_, DocumentInfo>(
|
||||
r#"
|
||||
SELECT id, filename, file_size, mime_type, ocr_failure_reason
|
||||
FROM documents
|
||||
WHERE ($1::uuid IS NULL OR user_id = $1)
|
||||
ORDER BY created_at DESC
|
||||
"#
|
||||
)
|
||||
.bind(Some(user_id))
|
||||
.fetch_all(&pool)
|
||||
.await
|
||||
.expect("Failed to execute broken SQL query");
|
||||
|
||||
// This demonstrates the bug - it returns ALL documents (5), not just failed ones (2)
|
||||
assert_eq!(documents.len(), 5, "Broken SQL query returns all documents, demonstrating the bug");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_database_allows_completed_to_pending_transition() {
|
||||
let (_container, pool) = setup_test_db().await;
|
||||
let user_id = create_test_user(&pool).await;
|
||||
let doc_id = create_test_document(&pool, user_id, "completed").await;
|
||||
|
||||
// Make it a "real" completed document with OCR data
|
||||
sqlx::query(r#"
|
||||
UPDATE documents
|
||||
SET ocr_text = 'Sample OCR text content',
|
||||
ocr_confidence = 95.5,
|
||||
ocr_word_count = 15,
|
||||
ocr_completed_at = NOW()
|
||||
WHERE id = $1
|
||||
"#)
|
||||
.bind(doc_id)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.expect("Failed to set OCR completion data");
|
||||
|
||||
// Test: completed -> pending (should work after applying the migration)
|
||||
let result = sqlx::query(r#"
|
||||
UPDATE documents
|
||||
SET ocr_status = 'pending',
|
||||
ocr_text = NULL,
|
||||
ocr_error = NULL,
|
||||
ocr_failure_reason = NULL,
|
||||
ocr_confidence = NULL,
|
||||
ocr_word_count = NULL,
|
||||
ocr_processing_time_ms = NULL,
|
||||
ocr_completed_at = NULL,
|
||||
updated_at = NOW()
|
||||
WHERE id = $1
|
||||
"#)
|
||||
.bind(doc_id)
|
||||
.execute(&pool)
|
||||
.await;
|
||||
|
||||
// This test will pass after the migration is applied
|
||||
match result {
|
||||
Ok(_) => {
|
||||
// Verify the update succeeded
|
||||
let new_status: String = sqlx::query_scalar("SELECT ocr_status FROM documents WHERE id = $1")
|
||||
.bind(doc_id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.expect("Failed to get updated status");
|
||||
assert_eq!(new_status, "pending", "Document should be reset to pending status");
|
||||
println!("✅ Database trigger correctly allows completed -> pending transition for retry");
|
||||
}
|
||||
Err(e) => {
|
||||
let error_msg = e.to_string();
|
||||
if error_msg.contains("Cannot modify completed OCR data") {
|
||||
panic!("❌ REGRESSION: Database trigger still blocking retry operations. Apply migration 20250702000002_fix_ocr_retry_guardrails.sql");
|
||||
} else {
|
||||
panic!("❌ Unexpected database error: {}", error_msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_database_blocks_invalid_completed_transitions() {
|
||||
let (_container, pool) = setup_test_db().await;
|
||||
let user_id = create_test_user(&pool).await;
|
||||
let doc_id = create_test_document(&pool, user_id, "completed").await;
|
||||
|
||||
// Set OCR completion data
|
||||
sqlx::query(r#"
|
||||
UPDATE documents
|
||||
SET ocr_text = 'Sample text',
|
||||
ocr_confidence = 90.0,
|
||||
ocr_completed_at = NOW()
|
||||
WHERE id = $1
|
||||
"#)
|
||||
.bind(doc_id)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.expect("Failed to set OCR data");
|
||||
|
||||
// Test invalid transitions that should still be blocked
|
||||
let invalid_transitions = ["processing", "failed"];
|
||||
|
||||
for invalid_status in invalid_transitions {
|
||||
let result = sqlx::query("UPDATE documents SET ocr_status = $1 WHERE id = $2")
|
||||
.bind(invalid_status)
|
||||
.bind(doc_id)
|
||||
.execute(&pool)
|
||||
.await;
|
||||
|
||||
assert!(result.is_err(), "Database trigger should still block completed -> {} transition", invalid_status);
|
||||
|
||||
let error_msg = result.err().unwrap().to_string();
|
||||
assert!(error_msg.contains("Cannot modify completed OCR data"),
|
||||
"Error should mention OCR data protection for transition to {}: {}", invalid_status, error_msg);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_admin_vs_user_document_visibility() {
|
||||
let (_container, pool) = setup_test_db().await;
|
||||
|
||||
// Create admin and regular users
|
||||
let admin_id = Uuid::new_v4();
|
||||
let user1_id = Uuid::new_v4();
|
||||
let user2_id = Uuid::new_v4();
|
||||
|
||||
// Create admin user
|
||||
sqlx::query("INSERT INTO users (id, username, email, password_hash, role) VALUES ($1, 'admin', 'admin@test.com', 'test', 'admin')")
|
||||
.bind(admin_id)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.expect("Failed to create admin user");
|
||||
|
||||
// Create regular users
|
||||
for (user_id, username) in [(user1_id, "user1"), (user2_id, "user2")] {
|
||||
sqlx::query("INSERT INTO users (id, username, email, password_hash, role) VALUES ($1, $2, $3, 'test', 'user')")
|
||||
.bind(user_id)
|
||||
.bind(username)
|
||||
.bind(format!("{}@test.com", username))
|
||||
.execute(&pool)
|
||||
.await
|
||||
.expect("Failed to create user");
|
||||
}
|
||||
|
||||
// Create failed documents for different users
|
||||
let _admin_failed_doc = create_test_document(&pool, admin_id, "failed").await;
|
||||
let _user1_failed_doc = create_test_document(&pool, user1_id, "failed").await;
|
||||
let _user2_failed_doc = create_test_document(&pool, user2_id, "failed").await;
|
||||
|
||||
// Test admin sees all failed documents (user_filter = NULL)
|
||||
let admin_docs = sqlx::query_as::<_, DocumentInfo>(
|
||||
r#"
|
||||
SELECT id, filename, file_size, mime_type, ocr_failure_reason
|
||||
FROM documents
|
||||
WHERE ocr_status = 'failed'
|
||||
AND ($1::uuid IS NULL OR user_id = $1)
|
||||
ORDER BY created_at DESC
|
||||
"#
|
||||
)
|
||||
.bind(None::<Uuid>) // Admin filter - NULL means see all
|
||||
.fetch_all(&pool)
|
||||
.await
|
||||
.expect("Failed to fetch admin documents");
|
||||
assert_eq!(admin_docs.len(), 3, "Admin should see all 3 failed documents");
|
||||
|
||||
// Test regular user sees only their own
|
||||
let user1_docs = sqlx::query_as::<_, DocumentInfo>(
|
||||
r#"
|
||||
SELECT id, filename, file_size, mime_type, ocr_failure_reason
|
||||
FROM documents
|
||||
WHERE ocr_status = 'failed'
|
||||
AND ($1::uuid IS NULL OR user_id = $1)
|
||||
ORDER BY created_at DESC
|
||||
"#
|
||||
)
|
||||
.bind(Some(user1_id)) // User filter - only their documents
|
||||
.fetch_all(&pool)
|
||||
.await
|
||||
.expect("Failed to fetch user documents");
|
||||
assert_eq!(user1_docs.len(), 1, "User should only see their own failed document");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,333 @@
|
|||
use reqwest::Client;
|
||||
use serde_json::{json, Value};
|
||||
use std::time::Duration;
|
||||
use uuid::Uuid;
|
||||
|
||||
use readur::models::{CreateUser, LoginRequest, LoginResponse, UserRole};
|
||||
|
||||
fn get_base_url() -> String {
|
||||
std::env::var("API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string())
|
||||
}
|
||||
|
||||
const TIMEOUT: Duration = Duration::from_secs(60);
|
||||
|
||||
struct OcrRetryRegressionTestHelper {
|
||||
client: Client,
|
||||
token: String,
|
||||
}
|
||||
|
||||
impl OcrRetryRegressionTestHelper {
|
||||
async fn new() -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
|
||||
let client = Client::new();
|
||||
|
||||
// Health check
|
||||
let health_check = client
|
||||
.get(&format!("{}/api/health", get_base_url()))
|
||||
.timeout(Duration::from_secs(10))
|
||||
.send()
|
||||
.await;
|
||||
|
||||
match health_check {
|
||||
Ok(response) => {
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let text = response.text().await.unwrap_or_else(|_| "Unable to read response".to_string());
|
||||
return Err(format!("Health check failed with status {}: {}. Is the server running at {}?", status, text, get_base_url()).into());
|
||||
}
|
||||
println!("✅ Server health check passed at {}", get_base_url());
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("❌ Cannot connect to server at {}: {}", get_base_url(), e);
|
||||
eprintln!("💡 To run integration tests, start the server first:");
|
||||
eprintln!(" cargo run");
|
||||
eprintln!(" Then run tests in another terminal:");
|
||||
eprintln!(" cargo test --test integration_ocr_retry_bug_regression_tests");
|
||||
return Err(format!("Server not reachable: {}", e).into());
|
||||
}
|
||||
}
|
||||
|
||||
// Create and login as admin user
|
||||
let test_id = Uuid::new_v4().simple().to_string();
|
||||
let username = format!("test_admin_{}", &test_id[0..8]);
|
||||
let password = "test_password_123";
|
||||
let email = format!("{}@test.com", username);
|
||||
|
||||
let create_user = CreateUser {
|
||||
username: username.clone(),
|
||||
password: password.to_string(),
|
||||
email: email.clone(),
|
||||
role: Some(UserRole::Admin),
|
||||
};
|
||||
|
||||
let _create_response = client
|
||||
.post(&format!("{}/api/users", get_base_url()))
|
||||
.json(&create_user)
|
||||
.timeout(TIMEOUT)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
let login_request = LoginRequest {
|
||||
username: username.clone(),
|
||||
password: password.to_string(),
|
||||
};
|
||||
|
||||
let login_response = client
|
||||
.post(&format!("{}/api/auth/login", get_base_url()))
|
||||
.json(&login_request)
|
||||
.timeout(TIMEOUT)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
if !login_response.status().is_success() {
|
||||
let status = login_response.status();
|
||||
let error_text = login_response.text().await.unwrap_or_else(|_| "Unknown error".to_string());
|
||||
return Err(format!("Login failed with status {}: {}", status, error_text).into());
|
||||
}
|
||||
|
||||
let login_data: LoginResponse = login_response.json().await?;
|
||||
let token = login_data.token;
|
||||
|
||||
Ok(Self { client, token })
|
||||
}
|
||||
|
||||
async fn create_test_document(&self, filename: &str, ocr_status: &str) -> Result<Uuid, Box<dyn std::error::Error + Send + Sync>> {
|
||||
// Create a document directly in the database via API
|
||||
let document_data = json!({
|
||||
"filename": filename,
|
||||
"original_filename": filename,
|
||||
"mime_type": "application/pdf",
|
||||
"file_size": 1024,
|
||||
"ocr_status": ocr_status
|
||||
});
|
||||
|
||||
let response = self.client
|
||||
.post(&format!("{}/api/internal/test/documents", get_base_url()))
|
||||
.header("Authorization", format!("Bearer {}", self.token))
|
||||
.json(&document_data)
|
||||
.timeout(TIMEOUT)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let error_text = response.text().await.unwrap_or_else(|_| "Unknown error".to_string());
|
||||
return Err(format!("Failed to create test document with status {}: {}", status, error_text).into());
|
||||
}
|
||||
|
||||
let response_data: Value = response.json().await?;
|
||||
let doc_id_str = response_data["id"].as_str()
|
||||
.ok_or("Document ID not found in response")?;
|
||||
let doc_id = Uuid::parse_str(doc_id_str)?;
|
||||
|
||||
Ok(doc_id)
|
||||
}
|
||||
|
||||
async fn get_bulk_retry_preview(&self, mode: &str) -> Result<Value, Box<dyn std::error::Error + Send + Sync>> {
|
||||
let request_body = json!({
|
||||
"mode": mode,
|
||||
"preview_only": true
|
||||
});
|
||||
|
||||
let response = self.client
|
||||
.post(&format!("{}/api/documents/ocr/bulk-retry", get_base_url()))
|
||||
.header("Authorization", format!("Bearer {}", self.token))
|
||||
.json(&request_body)
|
||||
.timeout(TIMEOUT)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let error_text = response.text().await.unwrap_or_else(|_| "Unknown error".to_string());
|
||||
return Err(format!("Bulk retry preview failed with status {}: {}", status, error_text).into());
|
||||
}
|
||||
|
||||
let response_data: Value = response.json().await?;
|
||||
Ok(response_data)
|
||||
}
|
||||
|
||||
async fn execute_bulk_retry(&self, mode: &str) -> Result<Value, Box<dyn std::error::Error + Send + Sync>> {
|
||||
let request_body = json!({
|
||||
"mode": mode,
|
||||
"preview_only": false
|
||||
});
|
||||
|
||||
let response = self.client
|
||||
.post(&format!("{}/api/documents/ocr/bulk-retry", get_base_url()))
|
||||
.header("Authorization", format!("Bearer {}", self.token))
|
||||
.json(&request_body)
|
||||
.timeout(TIMEOUT)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let error_text = response.text().await.unwrap_or_else(|_| "Unknown error".to_string());
|
||||
return Err(format!("Bulk retry execution failed with status {}: {}", status, error_text).into());
|
||||
}
|
||||
|
||||
let response_data: Value = response.json().await?;
|
||||
Ok(response_data)
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_bulk_retry_only_targets_failed_documents_regression() {
|
||||
let helper = match OcrRetryRegressionTestHelper::new().await {
|
||||
Ok(h) => h,
|
||||
Err(e) => {
|
||||
eprintln!("⚠️ Skipping test due to setup failure: {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
println!("🧪 Testing regression: Bulk retry should only target failed documents");
|
||||
|
||||
// Create a mix of documents with different OCR statuses
|
||||
let failed_doc1 = helper.create_test_document("failed_doc_1.pdf", "failed").await.expect("Failed to create failed document 1");
|
||||
let failed_doc2 = helper.create_test_document("failed_doc_2.pdf", "failed").await.expect("Failed to create failed document 2");
|
||||
let completed_doc = helper.create_test_document("completed_doc.pdf", "completed").await.expect("Failed to create completed document");
|
||||
let pending_doc = helper.create_test_document("pending_doc.pdf", "pending").await.expect("Failed to create pending document");
|
||||
|
||||
println!("📄 Created test documents:");
|
||||
println!(" - Failed: {}, {}", failed_doc1, failed_doc2);
|
||||
println!(" - Completed: {}", completed_doc);
|
||||
println!(" - Pending: {}", pending_doc);
|
||||
|
||||
// Test 1: Preview should only show failed documents
|
||||
println!("🔍 Testing bulk retry preview...");
|
||||
let preview_result = helper.get_bulk_retry_preview("all").await.expect("Failed to get preview");
|
||||
|
||||
let matched_count = preview_result["matched_count"].as_u64().expect("matched_count not found");
|
||||
assert_eq!(matched_count, 2, "Preview should only match 2 failed documents, but matched {}", matched_count);
|
||||
|
||||
let queued_count = preview_result["queued_count"].as_u64().unwrap_or(0);
|
||||
assert_eq!(queued_count, 0, "Preview should not queue any documents, but queued {}", queued_count);
|
||||
|
||||
println!("✅ Preview correctly identified {} failed documents", matched_count);
|
||||
|
||||
// Test 2: Execution should only process failed documents and not error on completed ones
|
||||
println!("🚀 Testing bulk retry execution...");
|
||||
let execution_result = helper.execute_bulk_retry("all").await.expect("Failed to execute bulk retry");
|
||||
|
||||
let execution_matched_count = execution_result["matched_count"].as_u64().expect("matched_count not found in execution");
|
||||
let execution_queued_count = execution_result["queued_count"].as_u64().expect("queued_count not found in execution");
|
||||
|
||||
assert_eq!(execution_matched_count, 2, "Execution should only match 2 failed documents, but matched {}", execution_matched_count);
|
||||
assert_eq!(execution_queued_count, 2, "Execution should queue 2 failed documents, but queued {}", execution_queued_count);
|
||||
|
||||
let success = execution_result["success"].as_bool().expect("success not found in execution");
|
||||
assert!(success, "Bulk retry execution should succeed");
|
||||
|
||||
println!("✅ Execution successfully processed {} failed documents", execution_queued_count);
|
||||
println!("🎉 Regression test passed: Bulk retry correctly targets only failed documents");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_bulk_retry_no_database_constraint_errors() {
|
||||
let helper = match OcrRetryRegressionTestHelper::new().await {
|
||||
Ok(h) => h,
|
||||
Err(e) => {
|
||||
eprintln!("⚠️ Skipping test due to setup failure: {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
println!("🧪 Testing regression: No database constraint errors during retry");
|
||||
|
||||
// Create only failed documents to ensure we test the constraint logic
|
||||
let failed_doc1 = helper.create_test_document("constraint_test_1.pdf", "failed").await.expect("Failed to create test document");
|
||||
let failed_doc2 = helper.create_test_document("constraint_test_2.pdf", "failed").await.expect("Failed to create test document");
|
||||
|
||||
println!("📄 Created {} failed documents for constraint testing", 2);
|
||||
|
||||
// Execute bulk retry - this should not produce any database constraint errors
|
||||
println!("🚀 Executing bulk retry to test database constraints...");
|
||||
let result = helper.execute_bulk_retry("all").await;
|
||||
|
||||
match result {
|
||||
Ok(response) => {
|
||||
let success = response["success"].as_bool().expect("success field not found");
|
||||
let queued_count = response["queued_count"].as_u64().expect("queued_count not found");
|
||||
let message = response["message"].as_str().unwrap_or("No message");
|
||||
|
||||
assert!(success, "Bulk retry should succeed without constraint errors");
|
||||
assert_eq!(queued_count, 2, "Should queue both failed documents");
|
||||
|
||||
println!("✅ Bulk retry succeeded: queued {} documents", queued_count);
|
||||
println!("📝 Response message: {}", message);
|
||||
}
|
||||
Err(e) => {
|
||||
// Check if the error contains the specific constraint violation we were experiencing
|
||||
let error_msg = e.to_string();
|
||||
if error_msg.contains("Cannot modify completed OCR data") {
|
||||
panic!("❌ REGRESSION DETECTED: Database constraint error occurred: {}", error_msg);
|
||||
} else {
|
||||
panic!("❌ Unexpected error during bulk retry: {}", error_msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
println!("🎉 Regression test passed: No database constraint errors during retry");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_bulk_retry_with_mixed_documents_no_errors() {
|
||||
let helper = match OcrRetryRegressionTestHelper::new().await {
|
||||
Ok(h) => h,
|
||||
Err(e) => {
|
||||
eprintln!("⚠️ Skipping test due to setup failure: {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
println!("🧪 Testing regression: Mixed document statuses should not cause errors");
|
||||
|
||||
// Create a realistic mix of documents that might exist in production
|
||||
let mut created_docs = Vec::new();
|
||||
|
||||
// Create failed documents (should be included in retry)
|
||||
for i in 0..3 {
|
||||
let doc_id = helper.create_test_document(&format!("failed_{}.pdf", i), "failed").await.expect("Failed to create failed document");
|
||||
created_docs.push((doc_id, "failed"));
|
||||
}
|
||||
|
||||
// Create completed documents (should be ignored, not cause errors)
|
||||
for i in 0..10 {
|
||||
let doc_id = helper.create_test_document(&format!("completed_{}.pdf", i), "completed").await.expect("Failed to create completed document");
|
||||
created_docs.push((doc_id, "completed"));
|
||||
}
|
||||
|
||||
// Create pending documents (should be ignored)
|
||||
for i in 0..5 {
|
||||
let doc_id = helper.create_test_document(&format!("pending_{}.pdf", i), "pending").await.expect("Failed to create pending document");
|
||||
created_docs.push((doc_id, "pending"));
|
||||
}
|
||||
|
||||
println!("📄 Created {} total documents with mixed statuses", created_docs.len());
|
||||
println!(" - 3 failed (should be retried)");
|
||||
println!(" - 10 completed (should be ignored)");
|
||||
println!(" - 5 pending (should be ignored)");
|
||||
|
||||
// Test preview first
|
||||
println!("🔍 Testing preview with mixed document statuses...");
|
||||
let preview_result = helper.get_bulk_retry_preview("all").await.expect("Failed to get preview");
|
||||
|
||||
let preview_matched = preview_result["matched_count"].as_u64().expect("matched_count not found");
|
||||
assert_eq!(preview_matched, 3, "Preview should only match 3 failed documents from mix");
|
||||
|
||||
// Test execution
|
||||
println!("🚀 Testing execution with mixed document statuses...");
|
||||
let execution_result = helper.execute_bulk_retry("all").await.expect("Failed to execute bulk retry");
|
||||
|
||||
let success = execution_result["success"].as_bool().expect("success not found");
|
||||
let matched_count = execution_result["matched_count"].as_u64().expect("matched_count not found");
|
||||
let queued_count = execution_result["queued_count"].as_u64().expect("queued_count not found");
|
||||
|
||||
assert!(success, "Bulk retry should succeed with mixed document statuses");
|
||||
assert_eq!(matched_count, 3, "Should only match 3 failed documents from the mix");
|
||||
assert_eq!(queued_count, 3, "Should queue all 3 failed documents");
|
||||
|
||||
println!("✅ Successfully handled mixed documents: matched {}, queued {}", matched_count, queued_count);
|
||||
println!("🎉 Regression test passed: Mixed document statuses handled correctly");
|
||||
}
|
||||
Loading…
Reference in New Issue