Readur/migrations/20250714000000_add_multi_la...

52 lines
2.3 KiB
SQL

-- Migration: Add multi-language OCR support
-- This migration adds support for multiple OCR languages per user
-- Add new columns for multi-language support
ALTER TABLE settings
ADD COLUMN preferred_languages JSONB DEFAULT '["eng"]'::jsonb,
ADD COLUMN primary_language VARCHAR(10) DEFAULT 'eng',
ADD COLUMN auto_detect_language_combination BOOLEAN DEFAULT false;
-- Migrate existing ocr_language data to new preferred_languages array
UPDATE settings
SET preferred_languages = jsonb_build_array(COALESCE(ocr_language, 'eng')),
primary_language = COALESCE(ocr_language, 'eng')
WHERE preferred_languages = '["eng"]'::jsonb;
-- Create index for efficient querying of preferred languages
CREATE INDEX IF NOT EXISTS idx_settings_preferred_languages ON settings USING gin(preferred_languages);
CREATE INDEX IF NOT EXISTS idx_settings_primary_language ON settings(primary_language);
-- Add constraint to ensure primary_language is always in preferred_languages
ALTER TABLE settings
ADD CONSTRAINT check_primary_language_in_preferred
CHECK (preferred_languages ? primary_language);
-- Add constraint to limit number of preferred languages (max 4 for performance)
ALTER TABLE settings
ADD CONSTRAINT check_max_preferred_languages
CHECK (jsonb_array_length(preferred_languages) <= 4);
-- Add constraint to ensure valid primary language code (3-letter ISO codes)
ALTER TABLE settings
ADD CONSTRAINT check_valid_primary_language_code
CHECK (primary_language ~ '^[a-z]{3}(_[A-Z]{2})?$');
-- Note: preferred_languages validation is handled in application code due to PostgreSQL subquery limitations in CHECK constraints
-- Update existing users who don't have settings yet
INSERT INTO settings (user_id, preferred_languages, primary_language, auto_detect_language_combination)
SELECT
u.id,
'["eng"]'::jsonb,
'eng',
false
FROM users u
WHERE NOT EXISTS (
SELECT 1 FROM settings s WHERE s.user_id = u.id
);
-- Add comments for documentation
COMMENT ON COLUMN settings.preferred_languages IS 'Array of 3-letter ISO language codes for OCR processing, max 4 languages';
COMMENT ON COLUMN settings.primary_language IS 'Primary language code that should be listed first in OCR processing';
COMMENT ON COLUMN settings.auto_detect_language_combination IS 'Whether to automatically suggest language combinations based on document content';