79 lines
3.4 KiB
SQL
79 lines
3.4 KiB
SQL
-- Create extensions
|
|
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
|
CREATE EXTENSION IF NOT EXISTS "pg_trgm";
|
|
|
|
-- Create users table
|
|
CREATE TABLE IF NOT EXISTS users (
|
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
username VARCHAR(255) UNIQUE NOT NULL,
|
|
email VARCHAR(255) UNIQUE NOT NULL,
|
|
password_hash VARCHAR(255) NOT NULL,
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- Create documents table
|
|
CREATE TABLE IF NOT EXISTS documents (
|
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
filename VARCHAR(255) NOT NULL,
|
|
original_filename VARCHAR(255) NOT NULL,
|
|
file_path VARCHAR(500) NOT NULL,
|
|
file_size BIGINT NOT NULL,
|
|
mime_type VARCHAR(100) NOT NULL,
|
|
content TEXT,
|
|
ocr_text TEXT,
|
|
ocr_confidence REAL,
|
|
ocr_word_count INT,
|
|
ocr_processing_time_ms INT,
|
|
ocr_status VARCHAR(20) DEFAULT 'pending',
|
|
ocr_error TEXT,
|
|
ocr_completed_at TIMESTAMPTZ,
|
|
tags TEXT[] DEFAULT '{}',
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
|
user_id UUID REFERENCES users(id) ON DELETE CASCADE
|
|
);
|
|
|
|
-- Create indexes
|
|
CREATE INDEX IF NOT EXISTS idx_documents_user_id ON documents(user_id);
|
|
CREATE INDEX IF NOT EXISTS idx_documents_filename ON documents(filename);
|
|
CREATE INDEX IF NOT EXISTS idx_documents_mime_type ON documents(mime_type);
|
|
CREATE INDEX IF NOT EXISTS idx_documents_tags ON documents USING GIN(tags);
|
|
CREATE INDEX IF NOT EXISTS idx_documents_content_search ON documents USING GIN(to_tsvector('english', COALESCE(content, '') || ' ' || COALESCE(ocr_text, '')));
|
|
CREATE INDEX IF NOT EXISTS idx_documents_filename_trgm ON documents USING GIN(filename gin_trgm_ops);
|
|
CREATE INDEX IF NOT EXISTS idx_documents_content_trgm ON documents USING GIN((COALESCE(content, '') || ' ' || COALESCE(ocr_text, '')) gin_trgm_ops);
|
|
CREATE INDEX IF NOT EXISTS idx_documents_ocr_confidence ON documents(ocr_confidence) WHERE ocr_confidence IS NOT NULL;
|
|
CREATE INDEX IF NOT EXISTS idx_documents_ocr_word_count ON documents(ocr_word_count) WHERE ocr_word_count IS NOT NULL;
|
|
|
|
-- Create settings table
|
|
CREATE TABLE IF NOT EXISTS settings (
|
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
user_id UUID REFERENCES users(id) ON DELETE CASCADE UNIQUE,
|
|
ocr_language VARCHAR(10) DEFAULT 'eng',
|
|
concurrent_ocr_jobs INT DEFAULT 4,
|
|
ocr_timeout_seconds INT DEFAULT 300,
|
|
max_file_size_mb INT DEFAULT 50,
|
|
allowed_file_types TEXT[] DEFAULT ARRAY['pdf', 'png', 'jpg', 'jpeg', 'tiff', 'bmp', 'txt'],
|
|
auto_rotate_images BOOLEAN DEFAULT TRUE,
|
|
enable_image_preprocessing BOOLEAN DEFAULT TRUE,
|
|
search_results_per_page INT DEFAULT 25,
|
|
search_snippet_length INT DEFAULT 200,
|
|
fuzzy_search_threshold REAL DEFAULT 0.8,
|
|
retention_days INT,
|
|
enable_auto_cleanup BOOLEAN DEFAULT FALSE,
|
|
enable_compression BOOLEAN DEFAULT FALSE,
|
|
memory_limit_mb INT DEFAULT 512,
|
|
cpu_priority VARCHAR(10) DEFAULT 'normal',
|
|
enable_background_ocr BOOLEAN DEFAULT TRUE,
|
|
ocr_page_segmentation_mode INT DEFAULT 3,
|
|
ocr_engine_mode INT DEFAULT 3,
|
|
ocr_min_confidence REAL DEFAULT 30.0,
|
|
ocr_dpi INT DEFAULT 300,
|
|
ocr_enhance_contrast BOOLEAN DEFAULT true,
|
|
ocr_remove_noise BOOLEAN DEFAULT true,
|
|
ocr_detect_orientation BOOLEAN DEFAULT true,
|
|
ocr_whitelist_chars TEXT,
|
|
ocr_blacklist_chars TEXT,
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
); |