36 lines
1.5 KiB
SQL
36 lines
1.5 KiB
SQL
-- Populate OCR queue with documents that have pending OCR status
|
|
-- This migration addresses the issue where documents marked as pending
|
|
-- by the confidence backfill migration are not in the processing queue
|
|
|
|
-- Insert pending documents into OCR queue
|
|
INSERT INTO ocr_queue (document_id, priority, file_size, created_at)
|
|
SELECT
|
|
id,
|
|
-- Calculate priority based on file size (smaller files get higher priority)
|
|
CASE
|
|
WHEN file_size <= 1048576 THEN 10 -- <= 1MB: highest priority
|
|
WHEN file_size <= 5242880 THEN 8 -- 1-5MB: high priority
|
|
WHEN file_size <= 10485760 THEN 6 -- 5-10MB: medium priority
|
|
WHEN file_size <= 52428800 THEN 4 -- 10-50MB: low priority
|
|
ELSE 2 -- > 50MB: lowest priority
|
|
END as priority,
|
|
file_size,
|
|
NOW() as created_at
|
|
FROM documents
|
|
WHERE ocr_status = 'pending'
|
|
AND id NOT IN (SELECT document_id FROM ocr_queue) -- Avoid duplicates
|
|
AND file_path IS NOT NULL -- Only queue documents with files
|
|
AND (mime_type LIKE 'image/%' OR mime_type = 'application/pdf' OR mime_type = 'text/plain'); -- Only OCR-able types
|
|
|
|
-- Log the result
|
|
DO $$
|
|
DECLARE
|
|
queued_count INTEGER;
|
|
BEGIN
|
|
GET DIAGNOSTICS queued_count = ROW_COUNT;
|
|
RAISE NOTICE 'Added % pending documents to OCR queue for processing', queued_count;
|
|
END $$;
|
|
|
|
-- Create helpful index for monitoring
|
|
CREATE INDEX IF NOT EXISTS idx_ocr_queue_document_status
|
|
ON ocr_queue(document_id, status, created_at); |