-- Migration: Add multi-language OCR support -- This migration adds support for multiple OCR languages per user -- Add new columns for multi-language support ALTER TABLE settings ADD COLUMN preferred_languages JSONB DEFAULT '["eng"]'::jsonb, ADD COLUMN primary_language VARCHAR(10) DEFAULT 'eng', ADD COLUMN auto_detect_language_combination BOOLEAN DEFAULT false; -- Migrate existing ocr_language data to new preferred_languages array UPDATE settings SET preferred_languages = jsonb_build_array(COALESCE(ocr_language, 'eng')), primary_language = COALESCE(ocr_language, 'eng') WHERE preferred_languages = '["eng"]'::jsonb; -- Create index for efficient querying of preferred languages CREATE INDEX IF NOT EXISTS idx_settings_preferred_languages ON settings USING gin(preferred_languages); CREATE INDEX IF NOT EXISTS idx_settings_primary_language ON settings(primary_language); -- Add constraint to ensure primary_language is always in preferred_languages ALTER TABLE settings ADD CONSTRAINT check_primary_language_in_preferred CHECK (preferred_languages ? primary_language); -- Add constraint to limit number of preferred languages (max 4 for performance) ALTER TABLE settings ADD CONSTRAINT check_max_preferred_languages CHECK (jsonb_array_length(preferred_languages) <= 4); -- Add constraint to ensure valid primary language code (3-letter ISO codes) ALTER TABLE settings ADD CONSTRAINT check_valid_primary_language_code CHECK (primary_language ~ '^[a-z]{3}(_[A-Z]{2})?$'); -- Note: preferred_languages validation is handled in application code due to PostgreSQL subquery limitations in CHECK constraints -- Update existing users who don't have settings yet INSERT INTO settings (user_id, preferred_languages, primary_language, auto_detect_language_combination) SELECT u.id, '["eng"]'::jsonb, 'eng', false FROM users u WHERE NOT EXISTS ( SELECT 1 FROM settings s WHERE s.user_id = u.id ); -- Add comments for documentation COMMENT ON COLUMN settings.preferred_languages IS 'Array of 3-letter ISO language codes for OCR processing, max 4 languages'; COMMENT ON COLUMN settings.primary_language IS 'Primary language code that should be listed first in OCR processing'; COMMENT ON COLUMN settings.auto_detect_language_combination IS 'Whether to automatically suggest language combinations based on document content';