diff --git a/migrations/20250628000001_backfill_ocr_confidence.sql b/migrations/20250628000001_backfill_ocr_confidence.sql index 829d5cd..478e8d7 100644 --- a/migrations/20250628000001_backfill_ocr_confidence.sql +++ b/migrations/20250628000001_backfill_ocr_confidence.sql @@ -2,6 +2,9 @@ -- Since OCR confidence was previously hardcoded to 85%, we need to reprocess -- these documents to get accurate confidence scores +-- Temporarily disable the OCR consistency trigger to allow this migration +ALTER TABLE documents DISABLE TRIGGER trigger_validate_ocr_consistency; + -- Mark documents with exactly 85% confidence as pending OCR reprocessing UPDATE documents SET ocr_status = 'pending', @@ -12,6 +15,9 @@ WHERE ocr_confidence = 85.0 AND ocr_status = 'completed' AND ocr_text IS NOT NULL; +-- Re-enable the OCR consistency trigger +ALTER TABLE documents ENABLE TRIGGER trigger_validate_ocr_consistency; + -- Add a comment explaining what we did COMMENT ON COLUMN documents.ocr_confidence IS 'OCR confidence percentage (0-100) from Tesseract. Documents with NULL confidence and pending status will be reprocessed.';