feat(migrations): try to fix the migrations service

This commit is contained in:
perf3ct 2025-06-13 15:14:13 +00:00
parent e1e949cf65
commit cd35f877b1
6 changed files with 62 additions and 4 deletions

View File

@ -8,6 +8,7 @@ services:
- JWT_SECRET=your-super-secret-jwt-key-change-this-in-production
- UPLOAD_PATH=/app/uploads
- WATCH_FOLDER=/app/watch
- RUST_BACKTRACE=1
volumes:
- uploads:/app/uploads
- watch:/app/watch

View File

@ -22,6 +22,12 @@ CREATE TABLE IF NOT EXISTS documents (
mime_type VARCHAR(100) NOT NULL,
content TEXT,
ocr_text TEXT,
ocr_confidence REAL,
ocr_word_count INT,
ocr_processing_time_ms INT,
ocr_status VARCHAR(20) DEFAULT 'pending',
ocr_error TEXT,
ocr_completed_at TIMESTAMPTZ,
tags TEXT[] DEFAULT '{}',
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
@ -36,6 +42,8 @@ CREATE INDEX IF NOT EXISTS idx_documents_tags ON documents USING GIN(tags);
CREATE INDEX IF NOT EXISTS idx_documents_content_search ON documents USING GIN(to_tsvector('english', COALESCE(content, '') || ' ' || COALESCE(ocr_text, '')));
CREATE INDEX IF NOT EXISTS idx_documents_filename_trgm ON documents USING GIN(filename gin_trgm_ops);
CREATE INDEX IF NOT EXISTS idx_documents_content_trgm ON documents USING GIN((COALESCE(content, '') || ' ' || COALESCE(ocr_text, '')) gin_trgm_ops);
CREATE INDEX IF NOT EXISTS idx_documents_ocr_confidence ON documents(ocr_confidence) WHERE ocr_confidence IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_documents_ocr_word_count ON documents(ocr_word_count) WHERE ocr_word_count IS NOT NULL;
-- Create settings table
CREATE TABLE IF NOT EXISTS settings (
@ -57,6 +65,15 @@ CREATE TABLE IF NOT EXISTS settings (
memory_limit_mb INT DEFAULT 512,
cpu_priority VARCHAR(10) DEFAULT 'normal',
enable_background_ocr BOOLEAN DEFAULT TRUE,
ocr_page_segmentation_mode INT DEFAULT 3,
ocr_engine_mode INT DEFAULT 3,
ocr_min_confidence REAL DEFAULT 30.0,
ocr_dpi INT DEFAULT 300,
ocr_enhance_contrast BOOLEAN DEFAULT true,
ocr_remove_noise BOOLEAN DEFAULT true,
ocr_detect_orientation BOOLEAN DEFAULT true,
ocr_whitelist_chars TEXT,
ocr_blacklist_chars TEXT,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);

View File

@ -322,9 +322,9 @@ impl Database {
pub async fn create_document(&self, document: Document) -> Result<Document> {
let row = sqlx::query(
r#"
INSERT INTO documents (id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, tags, created_at, updated_at, user_id)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, tags, created_at, updated_at, user_id
INSERT INTO documents (id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18)
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id
"#
)
.bind(document.id)
@ -335,6 +335,12 @@ impl Database {
.bind(&document.mime_type)
.bind(&document.content)
.bind(&document.ocr_text)
.bind(document.ocr_confidence)
.bind(document.ocr_word_count)
.bind(document.ocr_processing_time_ms)
.bind(&document.ocr_status)
.bind(&document.ocr_error)
.bind(document.ocr_completed_at)
.bind(&document.tags)
.bind(document.created_at)
.bind(document.updated_at)
@ -355,6 +361,8 @@ impl Database {
ocr_word_count: row.get("ocr_word_count"),
ocr_processing_time_ms: row.get("ocr_processing_time_ms"),
ocr_status: row.get("ocr_status"),
ocr_error: row.get("ocr_error"),
ocr_completed_at: row.get("ocr_completed_at"),
tags: row.get("tags"),
created_at: row.get("created_at"),
updated_at: row.get("updated_at"),
@ -393,6 +401,8 @@ impl Database {
ocr_word_count: row.get("ocr_word_count"),
ocr_processing_time_ms: row.get("ocr_processing_time_ms"),
ocr_status: row.get("ocr_status"),
ocr_error: row.get("ocr_error"),
ocr_completed_at: row.get("ocr_completed_at"),
tags: row.get("tags"),
created_at: row.get("created_at"),
updated_at: row.get("updated_at"),
@ -431,6 +441,8 @@ impl Database {
ocr_word_count: row.get("ocr_word_count"),
ocr_processing_time_ms: row.get("ocr_processing_time_ms"),
ocr_status: row.get("ocr_status"),
ocr_error: row.get("ocr_error"),
ocr_completed_at: row.get("ocr_completed_at"),
tags: row.get("tags"),
created_at: row.get("created_at"),
updated_at: row.get("updated_at"),
@ -499,6 +511,8 @@ impl Database {
ocr_word_count: row.get("ocr_word_count"),
ocr_processing_time_ms: row.get("ocr_processing_time_ms"),
ocr_status: row.get("ocr_status"),
ocr_error: row.get("ocr_error"),
ocr_completed_at: row.get("ocr_completed_at"),
tags: row.get("tags"),
created_at: row.get("created_at"),
updated_at: row.get("updated_at"),
@ -580,7 +594,7 @@ impl Database {
CASE WHEN filename ILIKE '%' || "#
));
builder.push_bind(&search.query);
builder.push(&format!(r#"' || '%' THEN 0.8 ELSE 0 END,
builder.push(&format!(r#" || '%' THEN 0.8 ELSE 0 END,
ts_rank(to_tsvector('english', COALESCE(content, '') || ' ' || COALESCE(ocr_text, '')), {}('english', "#, query_function));
builder.push_bind(&search.query);
builder.push(&format!(r#"))

View File

@ -62,6 +62,8 @@ impl FileService {
ocr_word_count: None,
ocr_processing_time_ms: None,
ocr_status: Some("pending".to_string()),
ocr_error: None,
ocr_completed_at: None,
tags: Vec::new(),
created_at: Utc::now(),
updated_at: Utc::now(),

View File

@ -82,6 +82,28 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
}
}
// Debug: Check what columns exist in documents table
let columns_result = sqlx::query(
"SELECT column_name FROM information_schema.columns
WHERE table_name = 'documents' AND table_schema = 'public'
ORDER BY ordinal_position"
)
.fetch_all(&db.pool)
.await;
match columns_result {
Ok(rows) => {
info!("Columns in documents table:");
for row in rows {
let column_name: String = row.get("column_name");
info!(" - {}", column_name);
}
}
Err(e) => {
error!("Failed to check columns: {}", e);
}
}
// Seed admin user
seed::seed_admin_user(&db).await?;

View File

@ -54,6 +54,8 @@ pub struct Document {
pub ocr_word_count: Option<i32>,
pub ocr_processing_time_ms: Option<i32>,
pub ocr_status: Option<String>,
pub ocr_error: Option<String>,
pub ocr_completed_at: Option<DateTime<Utc>>,
pub tags: Vec<String>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,