diff --git a/migrations/20250620100013_add_database_guardrails.sql b/migrations/20250620100013_add_database_guardrails.sql index 67f86a6..26a4cf2 100644 --- a/migrations/20250620100013_add_database_guardrails.sql +++ b/migrations/20250620100013_add_database_guardrails.sql @@ -53,9 +53,10 @@ WHERE status IN ('pending', 'processing'); CREATE OR REPLACE FUNCTION validate_ocr_consistency() RETURNS TRIGGER AS $$ BEGIN - -- Prevent updating completed OCR unless explicitly allowed - IF OLD.ocr_status = 'completed' AND NEW.ocr_status != 'completed' THEN - RAISE EXCEPTION 'Cannot modify completed OCR data for document %', OLD.id; + -- Allow OCR retry operations: completed -> pending is allowed for retry functionality + -- Prevent other modifications to completed OCR data + IF OLD.ocr_status = 'completed' AND NEW.ocr_status != 'completed' AND NEW.ocr_status != 'pending' THEN + RAISE EXCEPTION 'Cannot modify completed OCR data for document %. Only retry (pending) is allowed.', OLD.id; END IF; -- Ensure OCR text and metadata consistency diff --git a/migrations/20250702000002_fix_ocr_retry_guardrails.sql b/migrations/20250702000002_fix_ocr_retry_guardrails.sql new file mode 100644 index 0000000..96d904c --- /dev/null +++ b/migrations/20250702000002_fix_ocr_retry_guardrails.sql @@ -0,0 +1,38 @@ +-- Fix OCR retry functionality by allowing completed -> pending transitions +-- This addresses the issue where retry operations were blocked by database guardrails + +-- Update the OCR consistency validation function to allow retry operations +CREATE OR REPLACE FUNCTION validate_ocr_consistency() +RETURNS TRIGGER AS $$ +BEGIN + -- Allow OCR retry operations: completed -> pending is allowed for retry functionality + -- Prevent other modifications to completed OCR data + IF OLD.ocr_status = 'completed' AND NEW.ocr_status != 'completed' AND NEW.ocr_status != 'pending' THEN + RAISE EXCEPTION 'Cannot modify completed OCR data for document %. Only retry (pending) is allowed.', OLD.id; + END IF; + + -- Ensure OCR text and metadata consistency + IF NEW.ocr_status = 'completed' AND NEW.ocr_text IS NOT NULL THEN + -- Check that confidence and word count are reasonable + IF NEW.ocr_confidence IS NULL OR NEW.ocr_word_count IS NULL THEN + RAISE WARNING 'OCR completed but missing confidence or word count for document %', NEW.id; + END IF; + + -- Validate word count roughly matches text length + IF NEW.ocr_word_count > 0 AND length(NEW.ocr_text) < NEW.ocr_word_count THEN + RAISE WARNING 'OCR word count (%) seems too high for text length (%) in document %', + NEW.ocr_word_count, length(NEW.ocr_text), NEW.id; + END IF; + END IF; + + -- Set completion timestamp when status changes to completed + IF OLD.ocr_status != 'completed' AND NEW.ocr_status = 'completed' THEN + NEW.ocr_completed_at = NOW(); + END IF; + + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +-- Add comment to document the change +COMMENT ON FUNCTION validate_ocr_consistency() IS 'Validates OCR data consistency during updates. Updated to allow retry operations (completed -> pending transitions).'; \ No newline at end of file diff --git a/src/routes/documents_ocr_retry.rs b/src/routes/documents_ocr_retry.rs index bce5f03..11874eb 100644 --- a/src/routes/documents_ocr_retry.rs +++ b/src/routes/documents_ocr_retry.rs @@ -521,7 +521,8 @@ async fn get_all_failed_ocr_documents( r#" SELECT id, filename, file_size, mime_type, ocr_failure_reason FROM documents - WHERE ($1::uuid IS NULL OR user_id = $1) + WHERE ocr_status = 'failed' + AND ($1::uuid IS NULL OR user_id = $1) ORDER BY created_at DESC "# )