Readur/migrations/20240101000000_initial_sche...

78 lines
3.3 KiB
SQL

-- Create extensions
CREATE EXTENSION IF NOT EXISTS "pg_trgm";
-- Create users table
CREATE TABLE IF NOT EXISTS users (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
username VARCHAR(255) UNIQUE NOT NULL,
email VARCHAR(255) UNIQUE NOT NULL,
password_hash VARCHAR(255) NOT NULL,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- Create documents table
CREATE TABLE IF NOT EXISTS documents (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
filename VARCHAR(255) NOT NULL,
original_filename VARCHAR(255) NOT NULL,
file_path VARCHAR(500) NOT NULL,
file_size BIGINT NOT NULL,
mime_type VARCHAR(100) NOT NULL,
content TEXT,
ocr_text TEXT,
ocr_confidence REAL,
ocr_word_count INT,
ocr_processing_time_ms INT,
ocr_status VARCHAR(20) DEFAULT 'pending',
ocr_error TEXT,
ocr_completed_at TIMESTAMPTZ,
tags TEXT[] DEFAULT '{}',
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
user_id UUID REFERENCES users(id) ON DELETE CASCADE
);
-- Create indexes
CREATE INDEX IF NOT EXISTS idx_documents_user_id ON documents(user_id);
CREATE INDEX IF NOT EXISTS idx_documents_filename ON documents(filename);
CREATE INDEX IF NOT EXISTS idx_documents_mime_type ON documents(mime_type);
CREATE INDEX IF NOT EXISTS idx_documents_tags ON documents USING GIN(tags);
CREATE INDEX IF NOT EXISTS idx_documents_content_search ON documents USING GIN(to_tsvector('english', COALESCE(content, '') || ' ' || COALESCE(ocr_text, '')));
CREATE INDEX IF NOT EXISTS idx_documents_filename_trgm ON documents USING GIN(filename gin_trgm_ops);
CREATE INDEX IF NOT EXISTS idx_documents_content_trgm ON documents USING GIN((COALESCE(content, '') || ' ' || COALESCE(ocr_text, '')) gin_trgm_ops);
CREATE INDEX IF NOT EXISTS idx_documents_ocr_confidence ON documents(ocr_confidence) WHERE ocr_confidence IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_documents_ocr_word_count ON documents(ocr_word_count) WHERE ocr_word_count IS NOT NULL;
-- Create settings table
CREATE TABLE IF NOT EXISTS settings (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id UUID REFERENCES users(id) ON DELETE CASCADE UNIQUE,
ocr_language VARCHAR(10) DEFAULT 'eng',
concurrent_ocr_jobs INT DEFAULT 4,
ocr_timeout_seconds INT DEFAULT 300,
max_file_size_mb INT DEFAULT 50,
allowed_file_types TEXT[] DEFAULT ARRAY['pdf', 'png', 'jpg', 'jpeg', 'tiff', 'bmp', 'txt'],
auto_rotate_images BOOLEAN DEFAULT TRUE,
enable_image_preprocessing BOOLEAN DEFAULT TRUE,
search_results_per_page INT DEFAULT 25,
search_snippet_length INT DEFAULT 200,
fuzzy_search_threshold REAL DEFAULT 0.8,
retention_days INT,
enable_auto_cleanup BOOLEAN DEFAULT FALSE,
enable_compression BOOLEAN DEFAULT FALSE,
memory_limit_mb INT DEFAULT 512,
cpu_priority VARCHAR(10) DEFAULT 'normal',
enable_background_ocr BOOLEAN DEFAULT TRUE,
ocr_page_segmentation_mode INT DEFAULT 3,
ocr_engine_mode INT DEFAULT 3,
ocr_min_confidence REAL DEFAULT 30.0,
ocr_dpi INT DEFAULT 300,
ocr_enhance_contrast BOOLEAN DEFAULT true,
ocr_remove_noise BOOLEAN DEFAULT true,
ocr_detect_orientation BOOLEAN DEFAULT true,
ocr_whitelist_chars TEXT,
ocr_blacklist_chars TEXT,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);