From 65f42c2cd74835f1806df1aab171692acccc18bd Mon Sep 17 00:00:00 2001 From: perf3ct Date: Mon, 21 Jul 2025 20:43:37 +0000 Subject: [PATCH] fix(ocr): use proper failure reasons to avoid constraint violations in failed_documents table --- src/ocr/queue.rs | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/src/ocr/queue.rs b/src/ocr/queue.rs index dce48ec..8bccdf9 100644 --- a/src/ocr/queue.rs +++ b/src/ocr/queue.rs @@ -428,10 +428,13 @@ impl OcrQueueService { let error_msg = "OCR update failed validation (document may have been modified)"; warn!("{} for document {}", error_msg, item.document_id); + // Use classification function to determine proper failure reason + let (failure_reason, _should_suppress) = Self::classify_ocr_error(error_msg); + // Create failed document record using helper function let _ = self.create_failed_document_from_ocr_error( item.document_id, - "processing", + failure_reason, error_msg, item.attempts, ).await; @@ -443,10 +446,13 @@ impl OcrQueueService { let error_msg = format!("Transaction-safe OCR update failed: {}", e); error!("{}", error_msg); + // Use classification function to determine proper failure reason + let (failure_reason, _should_suppress) = Self::classify_ocr_error(&error_msg); + // Create failed document record using helper function let _ = self.create_failed_document_from_ocr_error( item.document_id, - "processing", + failure_reason, &error_msg, item.attempts, ).await; @@ -461,10 +467,13 @@ impl OcrQueueService { warn!("⚠️ No searchable content extracted for '{}' | Job: {} | Document: {} | 0 words", filename, item.id, item.document_id); + // Use classification function to determine proper failure reason + let (failure_reason, _should_suppress) = Self::classify_ocr_error(&error_msg); + // Create failed document record using helper function let _ = self.create_failed_document_from_ocr_error( item.document_id, - "no_extractable_text", + failure_reason, &error_msg, item.attempts, ).await; @@ -1196,8 +1205,12 @@ impl OcrQueueService { ("unsupported_format", false) } else if error_str.contains("too large") || error_str.contains("file size") { ("file_too_large", false) + } else if error_str.contains("No extractable text") || error_str.contains("0 words") { + ("low_ocr_confidence", false) // No extractable text treated as low confidence OCR + } else if error_str.contains("validation") || error_str.contains("document may have been modified") { + ("other", false) // Document validation failures use "other" } else { - ("other", false) + ("other", false) // Fallback for any unrecognized errors } } } \ No newline at end of file