feat(server): implement ocr enhanced service throughout
This commit is contained in:
parent
d5f419ca18
commit
d61b1c3f4b
|
|
@ -24,6 +24,7 @@ RUN apt-get update && apt-get install -y \
|
|||
WORKDIR /app
|
||||
COPY Cargo.toml Cargo.lock ./
|
||||
COPY src ./src
|
||||
COPY migrations ./migrations
|
||||
RUN cargo build --release
|
||||
|
||||
# --- Runtime stage ---
|
||||
|
|
@ -41,6 +42,9 @@ WORKDIR /app
|
|||
# Copy backend binary
|
||||
COPY --from=backend-builder /app/target/release/readur /app/readur
|
||||
|
||||
# Copy migrations directory
|
||||
COPY --from=backend-builder /app/migrations /app/migrations
|
||||
|
||||
# Create necessary directories
|
||||
RUN mkdir -p /app/uploads /app/watch /app/frontend
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import React, { useState, useEffect } from 'react'
|
|||
import FileUpload from './FileUpload'
|
||||
import DocumentList from './DocumentList'
|
||||
import SearchBar from './SearchBar'
|
||||
import OcrAnalytics from './OcrAnalytics'
|
||||
import { Document, documentService } from '../services/api'
|
||||
|
||||
function Dashboard() {
|
||||
|
|
@ -55,6 +56,12 @@ function Dashboard() {
|
|||
<SearchBar onSearch={handleSearch} />
|
||||
</div>
|
||||
|
||||
{!searchResults && (
|
||||
<div className="mb-6">
|
||||
<OcrAnalytics documents={documents} />
|
||||
</div>
|
||||
)}
|
||||
|
||||
{searchResults && (
|
||||
<div className="mb-4">
|
||||
<button
|
||||
|
|
|
|||
|
|
@ -42,6 +42,78 @@ function DocumentList({ documents, loading }: DocumentListProps) {
|
|||
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i]
|
||||
}
|
||||
|
||||
const getOcrStatusBadge = (document: Document) => {
|
||||
if (!document.has_ocr_text) {
|
||||
return null
|
||||
}
|
||||
|
||||
const confidence = document.ocr_confidence
|
||||
const status = document.ocr_status
|
||||
|
||||
if (status === 'failed') {
|
||||
return (
|
||||
<span className="ml-2 inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-red-100 text-red-800">
|
||||
OCR Failed
|
||||
</span>
|
||||
)
|
||||
}
|
||||
|
||||
if (status === 'processing') {
|
||||
return (
|
||||
<span className="ml-2 inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-yellow-100 text-yellow-800">
|
||||
Processing...
|
||||
</span>
|
||||
)
|
||||
}
|
||||
|
||||
if (confidence !== undefined) {
|
||||
let badgeClass = 'bg-green-100 text-green-800'
|
||||
let label = 'OCR'
|
||||
|
||||
if (confidence >= 80) {
|
||||
badgeClass = 'bg-green-100 text-green-800'
|
||||
label = `OCR ${confidence.toFixed(0)}%`
|
||||
} else if (confidence >= 60) {
|
||||
badgeClass = 'bg-yellow-100 text-yellow-800'
|
||||
label = `OCR ${confidence.toFixed(0)}%`
|
||||
} else {
|
||||
badgeClass = 'bg-orange-100 text-orange-800'
|
||||
label = `OCR ${confidence.toFixed(0)}%`
|
||||
}
|
||||
|
||||
return (
|
||||
<span className={`ml-2 inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium ${badgeClass}`}>
|
||||
{label}
|
||||
</span>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<span className="ml-2 inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-green-100 text-green-800">
|
||||
OCR
|
||||
</span>
|
||||
)
|
||||
}
|
||||
|
||||
const getOcrMetrics = (document: Document) => {
|
||||
if (!document.has_ocr_text || !document.ocr_word_count) {
|
||||
return null
|
||||
}
|
||||
|
||||
const metrics = []
|
||||
|
||||
if (document.ocr_word_count) {
|
||||
metrics.push(`${document.ocr_word_count} words`)
|
||||
}
|
||||
|
||||
if (document.ocr_processing_time_ms) {
|
||||
const seconds = (document.ocr_processing_time_ms / 1000).toFixed(1)
|
||||
metrics.push(`${seconds}s`)
|
||||
}
|
||||
|
||||
return metrics.length > 0 ? ` • ${metrics.join(' • ')}` : null
|
||||
}
|
||||
|
||||
if (loading) {
|
||||
return (
|
||||
<div className="text-center py-8">
|
||||
|
|
@ -74,11 +146,8 @@ function DocumentList({ documents, loading }: DocumentListProps) {
|
|||
</div>
|
||||
<div className="text-sm text-gray-500">
|
||||
{formatFileSize(document.file_size)} • {document.mime_type}
|
||||
{document.has_ocr_text && (
|
||||
<span className="ml-2 inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-green-100 text-green-800">
|
||||
OCR
|
||||
</span>
|
||||
)}
|
||||
{getOcrMetrics(document)}
|
||||
{getOcrStatusBadge(document)}
|
||||
</div>
|
||||
<div className="text-xs text-gray-400">
|
||||
{new Date(document.created_at).toLocaleDateString()}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,187 @@
|
|||
import React, { useState, useEffect } from 'react'
|
||||
import {
|
||||
ChartBarIcon,
|
||||
ClockIcon,
|
||||
DocumentTextIcon,
|
||||
ExclamationCircleIcon,
|
||||
} from '@heroicons/react/24/outline'
|
||||
import { Document } from '../services/api'
|
||||
|
||||
interface OcrAnalyticsProps {
|
||||
documents: Document[]
|
||||
}
|
||||
|
||||
interface OcrStats {
|
||||
totalDocuments: number
|
||||
documentsWithOcr: number
|
||||
averageConfidence: number
|
||||
highConfidenceCount: number
|
||||
lowConfidenceCount: number
|
||||
failedCount: number
|
||||
processingCount: number
|
||||
totalWords: number
|
||||
averageProcessingTime: number
|
||||
}
|
||||
|
||||
function OcrAnalytics({ documents }: OcrAnalyticsProps) {
|
||||
const [stats, setStats] = useState<OcrStats | null>(null)
|
||||
|
||||
useEffect(() => {
|
||||
if (documents.length === 0) {
|
||||
setStats(null)
|
||||
return
|
||||
}
|
||||
|
||||
const ocrDocuments = documents.filter(doc => doc.has_ocr_text)
|
||||
const completedOcr = ocrDocuments.filter(doc => doc.ocr_status === 'completed')
|
||||
const failedOcr = ocrDocuments.filter(doc => doc.ocr_status === 'failed')
|
||||
const processingOcr = ocrDocuments.filter(doc => doc.ocr_status === 'processing')
|
||||
|
||||
const confidenceScores = completedOcr
|
||||
.map(doc => doc.ocr_confidence)
|
||||
.filter((confidence): confidence is number => confidence !== undefined)
|
||||
|
||||
const wordCounts = completedOcr
|
||||
.map(doc => doc.ocr_word_count)
|
||||
.filter((count): count is number => count !== undefined)
|
||||
|
||||
const processingTimes = completedOcr
|
||||
.map(doc => doc.ocr_processing_time_ms)
|
||||
.filter((time): time is number => time !== undefined)
|
||||
|
||||
const averageConfidence = confidenceScores.length > 0
|
||||
? confidenceScores.reduce((sum, conf) => sum + conf, 0) / confidenceScores.length
|
||||
: 0
|
||||
|
||||
const totalWords = wordCounts.reduce((sum, count) => sum + count, 0)
|
||||
|
||||
const averageProcessingTime = processingTimes.length > 0
|
||||
? processingTimes.reduce((sum, time) => sum + time, 0) / processingTimes.length
|
||||
: 0
|
||||
|
||||
const highConfidenceCount = confidenceScores.filter(conf => conf >= 80).length
|
||||
const lowConfidenceCount = confidenceScores.filter(conf => conf < 60).length
|
||||
|
||||
setStats({
|
||||
totalDocuments: documents.length,
|
||||
documentsWithOcr: ocrDocuments.length,
|
||||
averageConfidence,
|
||||
highConfidenceCount,
|
||||
lowConfidenceCount,
|
||||
failedCount: failedOcr.length,
|
||||
processingCount: processingOcr.length,
|
||||
totalWords,
|
||||
averageProcessingTime,
|
||||
})
|
||||
}, [documents])
|
||||
|
||||
if (!stats || stats.documentsWithOcr === 0) {
|
||||
return null
|
||||
}
|
||||
|
||||
const formatTime = (ms: number) => {
|
||||
if (ms < 1000) return `${Math.round(ms)}ms`
|
||||
return `${(ms / 1000).toFixed(1)}s`
|
||||
}
|
||||
|
||||
const getConfidenceColor = (confidence: number) => {
|
||||
if (confidence >= 80) return 'text-green-600'
|
||||
if (confidence >= 60) return 'text-yellow-600'
|
||||
return 'text-orange-600'
|
||||
}
|
||||
|
||||
const successRate = ((stats.documentsWithOcr - stats.failedCount) / stats.documentsWithOcr) * 100
|
||||
|
||||
return (
|
||||
<div className="bg-white overflow-hidden shadow rounded-lg">
|
||||
<div className="p-5">
|
||||
<div className="flex items-center">
|
||||
<div className="flex-shrink-0">
|
||||
<ChartBarIcon className="h-6 w-6 text-gray-400" />
|
||||
</div>
|
||||
<div className="ml-5 w-0 flex-1">
|
||||
<dl>
|
||||
<dt className="text-sm font-medium text-gray-500 truncate">
|
||||
OCR Analytics
|
||||
</dt>
|
||||
<dd className="text-lg font-medium text-gray-900">
|
||||
{stats.documentsWithOcr} of {stats.totalDocuments} documents processed
|
||||
</dd>
|
||||
</dl>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="bg-gray-50 px-5 py-3">
|
||||
<div className="grid grid-cols-2 gap-4 sm:grid-cols-4">
|
||||
{/* Success Rate */}
|
||||
<div className="text-center">
|
||||
<div className="text-lg font-semibold text-gray-900">
|
||||
{successRate.toFixed(0)}%
|
||||
</div>
|
||||
<div className="text-xs text-gray-500">Success Rate</div>
|
||||
</div>
|
||||
|
||||
{/* Average Confidence */}
|
||||
<div className="text-center">
|
||||
<div className={`text-lg font-semibold ${getConfidenceColor(stats.averageConfidence)}`}>
|
||||
{stats.averageConfidence.toFixed(0)}%
|
||||
</div>
|
||||
<div className="text-xs text-gray-500">Avg Confidence</div>
|
||||
</div>
|
||||
|
||||
{/* Total Words */}
|
||||
<div className="text-center">
|
||||
<div className="text-lg font-semibold text-gray-900">
|
||||
{stats.totalWords.toLocaleString()}
|
||||
</div>
|
||||
<div className="text-xs text-gray-500">Words Extracted</div>
|
||||
</div>
|
||||
|
||||
{/* Average Processing Time */}
|
||||
<div className="text-center">
|
||||
<div className="text-lg font-semibold text-gray-900">
|
||||
{formatTime(stats.averageProcessingTime)}
|
||||
</div>
|
||||
<div className="text-xs text-gray-500">Avg Time</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Quality Distribution */}
|
||||
<div className="mt-4 pt-4 border-t border-gray-200">
|
||||
<div className="flex justify-between items-center text-sm">
|
||||
<div className="flex items-center space-x-4">
|
||||
<div className="flex items-center">
|
||||
<div className="w-2 h-2 bg-green-500 rounded-full mr-1"></div>
|
||||
<span className="text-gray-600">High Quality: {stats.highConfidenceCount}</span>
|
||||
</div>
|
||||
|
||||
{stats.lowConfidenceCount > 0 && (
|
||||
<div className="flex items-center">
|
||||
<div className="w-2 h-2 bg-orange-500 rounded-full mr-1"></div>
|
||||
<span className="text-gray-600">Low Quality: {stats.lowConfidenceCount}</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{stats.failedCount > 0 && (
|
||||
<div className="flex items-center">
|
||||
<div className="w-2 h-2 bg-red-500 rounded-full mr-1"></div>
|
||||
<span className="text-gray-600">Failed: {stats.failedCount}</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{stats.processingCount > 0 && (
|
||||
<div className="flex items-center">
|
||||
<div className="w-2 h-2 bg-yellow-500 rounded-full mr-1 animate-pulse"></div>
|
||||
<span className="text-gray-600">Processing: {stats.processingCount}</span>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default OcrAnalytics
|
||||
|
|
@ -19,6 +19,10 @@ export interface Document {
|
|||
tags: string[]
|
||||
created_at: string
|
||||
has_ocr_text: boolean
|
||||
ocr_confidence?: number
|
||||
ocr_word_count?: number
|
||||
ocr_processing_time_ms?: number
|
||||
ocr_status?: string
|
||||
}
|
||||
|
||||
export interface SearchRequest {
|
||||
|
|
@ -53,6 +57,10 @@ export interface EnhancedDocument {
|
|||
tags: string[]
|
||||
created_at: string
|
||||
has_ocr_text: boolean
|
||||
ocr_confidence?: number
|
||||
ocr_word_count?: number
|
||||
ocr_processing_time_ms?: number
|
||||
ocr_status?: string
|
||||
search_rank?: number
|
||||
snippets: SearchSnippet[]
|
||||
}
|
||||
|
|
|
|||
20
src/db.rs
20
src/db.rs
|
|
@ -351,6 +351,10 @@ impl Database {
|
|||
mime_type: row.get("mime_type"),
|
||||
content: row.get("content"),
|
||||
ocr_text: row.get("ocr_text"),
|
||||
ocr_confidence: row.get("ocr_confidence"),
|
||||
ocr_word_count: row.get("ocr_word_count"),
|
||||
ocr_processing_time_ms: row.get("ocr_processing_time_ms"),
|
||||
ocr_status: row.get("ocr_status"),
|
||||
tags: row.get("tags"),
|
||||
created_at: row.get("created_at"),
|
||||
updated_at: row.get("updated_at"),
|
||||
|
|
@ -385,6 +389,10 @@ impl Database {
|
|||
mime_type: row.get("mime_type"),
|
||||
content: row.get("content"),
|
||||
ocr_text: row.get("ocr_text"),
|
||||
ocr_confidence: row.get("ocr_confidence"),
|
||||
ocr_word_count: row.get("ocr_word_count"),
|
||||
ocr_processing_time_ms: row.get("ocr_processing_time_ms"),
|
||||
ocr_status: row.get("ocr_status"),
|
||||
tags: row.get("tags"),
|
||||
created_at: row.get("created_at"),
|
||||
updated_at: row.get("updated_at"),
|
||||
|
|
@ -419,6 +427,10 @@ impl Database {
|
|||
mime_type: row.get("mime_type"),
|
||||
content: row.get("content"),
|
||||
ocr_text: row.get("ocr_text"),
|
||||
ocr_confidence: row.get("ocr_confidence"),
|
||||
ocr_word_count: row.get("ocr_word_count"),
|
||||
ocr_processing_time_ms: row.get("ocr_processing_time_ms"),
|
||||
ocr_status: row.get("ocr_status"),
|
||||
tags: row.get("tags"),
|
||||
created_at: row.get("created_at"),
|
||||
updated_at: row.get("updated_at"),
|
||||
|
|
@ -483,6 +495,10 @@ impl Database {
|
|||
mime_type: row.get("mime_type"),
|
||||
content: row.get("content"),
|
||||
ocr_text: row.get("ocr_text"),
|
||||
ocr_confidence: row.get("ocr_confidence"),
|
||||
ocr_word_count: row.get("ocr_word_count"),
|
||||
ocr_processing_time_ms: row.get("ocr_processing_time_ms"),
|
||||
ocr_status: row.get("ocr_status"),
|
||||
tags: row.get("tags"),
|
||||
created_at: row.get("created_at"),
|
||||
updated_at: row.get("updated_at"),
|
||||
|
|
@ -637,6 +653,10 @@ impl Database {
|
|||
tags: row.get("tags"),
|
||||
created_at: row.get("created_at"),
|
||||
has_ocr_text: ocr_text.is_some(),
|
||||
ocr_confidence: row.get("ocr_confidence"),
|
||||
ocr_word_count: row.get("ocr_word_count"),
|
||||
ocr_processing_time_ms: row.get("ocr_processing_time_ms"),
|
||||
ocr_status: row.get("ocr_status"),
|
||||
search_rank: Some(rank),
|
||||
snippets,
|
||||
});
|
||||
|
|
|
|||
|
|
@ -58,6 +58,10 @@ impl FileService {
|
|||
mime_type: mime_type.to_string(),
|
||||
content: None,
|
||||
ocr_text: None,
|
||||
ocr_confidence: None,
|
||||
ocr_word_count: None,
|
||||
ocr_processing_time_ms: None,
|
||||
ocr_status: Some("pending".to_string()),
|
||||
tags: Vec::new(),
|
||||
created_at: Utc::now(),
|
||||
updated_at: Utc::now(),
|
||||
|
|
|
|||
|
|
@ -2,8 +2,9 @@ pub mod auth;
|
|||
pub mod batch_ingest;
|
||||
pub mod config;
|
||||
pub mod db;
|
||||
pub mod enhanced_ocr; // Temporarily disabled due to compilation errors
|
||||
pub mod enhanced_ocr;
|
||||
pub mod file_service;
|
||||
pub mod migrations;
|
||||
pub mod models;
|
||||
pub mod ocr;
|
||||
pub mod ocr_queue;
|
||||
|
|
|
|||
|
|
@ -43,6 +43,12 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||
|
||||
db.migrate().await?;
|
||||
|
||||
// Run automatic migrations
|
||||
if let Err(e) = readur::migrations::run_startup_migrations(&config.database_url, "migrations").await {
|
||||
error!("Failed to run migrations: {}", e);
|
||||
return Err(e.into());
|
||||
}
|
||||
|
||||
// Seed admin user
|
||||
seed::seed_admin_user(&db).await?;
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,229 @@
|
|||
use anyhow::Result;
|
||||
use sqlx::PgPool;
|
||||
use tracing::{info, warn, error};
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
pub struct MigrationRunner {
|
||||
pool: PgPool,
|
||||
migrations_dir: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Migration {
|
||||
pub version: i32,
|
||||
pub name: String,
|
||||
pub sql: String,
|
||||
}
|
||||
|
||||
impl MigrationRunner {
|
||||
pub fn new(pool: PgPool, migrations_dir: String) -> Self {
|
||||
Self {
|
||||
pool,
|
||||
migrations_dir,
|
||||
}
|
||||
}
|
||||
|
||||
/// Initialize the migrations table if it doesn't exist
|
||||
pub async fn init(&self) -> Result<()> {
|
||||
sqlx::query(
|
||||
r#"
|
||||
CREATE TABLE IF NOT EXISTS schema_migrations (
|
||||
version INTEGER PRIMARY KEY,
|
||||
name VARCHAR(255) NOT NULL,
|
||||
applied_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
"#
|
||||
)
|
||||
.execute(&self.pool)
|
||||
.await?;
|
||||
|
||||
info!("Migration system initialized");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Load all migration files from the migrations directory
|
||||
pub fn load_migrations(&self) -> Result<Vec<Migration>> {
|
||||
let mut migrations = Vec::new();
|
||||
let migrations_path = Path::new(&self.migrations_dir);
|
||||
|
||||
if !migrations_path.exists() {
|
||||
warn!("Migrations directory not found: {}", self.migrations_dir);
|
||||
return Ok(migrations);
|
||||
}
|
||||
|
||||
let mut entries: Vec<_> = fs::read_dir(migrations_path)?
|
||||
.filter_map(|entry| entry.ok())
|
||||
.filter(|entry| {
|
||||
entry.path().extension()
|
||||
.and_then(|s| s.to_str())
|
||||
.map(|s| s == "sql")
|
||||
.unwrap_or(false)
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Sort by filename to ensure proper order
|
||||
entries.sort_by(|a, b| a.file_name().cmp(&b.file_name()));
|
||||
|
||||
for entry in entries {
|
||||
let filename = entry.file_name().to_string_lossy().to_string();
|
||||
|
||||
// Parse version from filename (e.g., "001_add_ocr_queue.sql" -> version 1)
|
||||
if let Some(version_str) = filename.split('_').next() {
|
||||
if let Ok(version) = version_str.parse::<i32>() {
|
||||
let sql = fs::read_to_string(entry.path())?;
|
||||
let name = filename.replace(".sql", "");
|
||||
|
||||
migrations.push(Migration {
|
||||
version,
|
||||
name,
|
||||
sql,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
migrations.sort_by_key(|m| m.version);
|
||||
Ok(migrations)
|
||||
}
|
||||
|
||||
/// Get the list of applied migration versions
|
||||
pub async fn get_applied_migrations(&self) -> Result<Vec<i32>> {
|
||||
let rows = sqlx::query_scalar::<_, i32>("SELECT version FROM schema_migrations ORDER BY version")
|
||||
.fetch_all(&self.pool)
|
||||
.await?;
|
||||
Ok(rows)
|
||||
}
|
||||
|
||||
/// Check if a specific migration has been applied
|
||||
pub async fn is_migration_applied(&self, version: i32) -> Result<bool> {
|
||||
let count: i64 = sqlx::query_scalar(
|
||||
"SELECT COUNT(*) FROM schema_migrations WHERE version = $1"
|
||||
)
|
||||
.bind(version)
|
||||
.fetch_one(&self.pool)
|
||||
.await?;
|
||||
|
||||
Ok(count > 0)
|
||||
}
|
||||
|
||||
/// Apply a single migration
|
||||
pub async fn apply_migration(&self, migration: &Migration) -> Result<()> {
|
||||
info!("Applying migration {}: {}", migration.version, migration.name);
|
||||
|
||||
// Start a transaction
|
||||
let mut tx = self.pool.begin().await?;
|
||||
|
||||
// Execute the migration SQL
|
||||
sqlx::query(&migration.sql)
|
||||
.execute(&mut *tx)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
error!("Failed to apply migration {}: {}", migration.version, e);
|
||||
e
|
||||
})?;
|
||||
|
||||
// Record the migration as applied
|
||||
sqlx::query(
|
||||
"INSERT INTO schema_migrations (version, name) VALUES ($1, $2)"
|
||||
)
|
||||
.bind(migration.version)
|
||||
.bind(&migration.name)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
|
||||
// Commit the transaction
|
||||
tx.commit().await?;
|
||||
|
||||
info!("Successfully applied migration {}: {}", migration.version, migration.name);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Run all pending migrations
|
||||
pub async fn run_migrations(&self) -> Result<()> {
|
||||
// Initialize migration system
|
||||
self.init().await?;
|
||||
|
||||
// Load all migrations
|
||||
let migrations = self.load_migrations()?;
|
||||
if migrations.is_empty() {
|
||||
info!("No migrations found");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Get applied migrations
|
||||
let applied = self.get_applied_migrations().await?;
|
||||
|
||||
// Find pending migrations
|
||||
let pending: Vec<&Migration> = migrations
|
||||
.iter()
|
||||
.filter(|m| !applied.contains(&m.version))
|
||||
.collect();
|
||||
|
||||
if pending.is_empty() {
|
||||
info!("All migrations are up to date");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
info!("Found {} pending migrations", pending.len());
|
||||
|
||||
// Apply each pending migration
|
||||
for migration in pending {
|
||||
self.apply_migration(migration).await?;
|
||||
}
|
||||
|
||||
info!("All migrations completed successfully");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get migration status summary
|
||||
pub async fn get_status(&self) -> Result<MigrationStatus> {
|
||||
self.init().await?;
|
||||
|
||||
let migrations = self.load_migrations()?;
|
||||
let applied = self.get_applied_migrations().await?;
|
||||
|
||||
let pending_count = migrations
|
||||
.iter()
|
||||
.filter(|m| !applied.contains(&m.version))
|
||||
.count();
|
||||
|
||||
Ok(MigrationStatus {
|
||||
total_migrations: migrations.len(),
|
||||
applied_migrations: applied.len(),
|
||||
pending_migrations: pending_count,
|
||||
latest_version: migrations.last().map(|m| m.version),
|
||||
current_version: applied.last().copied(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct MigrationStatus {
|
||||
pub total_migrations: usize,
|
||||
pub applied_migrations: usize,
|
||||
pub pending_migrations: usize,
|
||||
pub latest_version: Option<i32>,
|
||||
pub current_version: Option<i32>,
|
||||
}
|
||||
|
||||
impl MigrationStatus {
|
||||
pub fn is_up_to_date(&self) -> bool {
|
||||
self.pending_migrations == 0
|
||||
}
|
||||
|
||||
pub fn needs_migration(&self) -> bool {
|
||||
self.pending_migrations > 0
|
||||
}
|
||||
}
|
||||
|
||||
/// Convenience function to run migrations at startup
|
||||
pub async fn run_startup_migrations(database_url: &str, migrations_dir: &str) -> Result<()> {
|
||||
let pool = sqlx::PgPool::connect(database_url).await?;
|
||||
let runner = MigrationRunner::new(pool, migrations_dir.to_string());
|
||||
|
||||
info!("Running database migrations...");
|
||||
runner.run_migrations().await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -50,6 +50,10 @@ pub struct Document {
|
|||
pub mime_type: String,
|
||||
pub content: Option<String>,
|
||||
pub ocr_text: Option<String>,
|
||||
pub ocr_confidence: Option<f32>,
|
||||
pub ocr_word_count: Option<i32>,
|
||||
pub ocr_processing_time_ms: Option<i32>,
|
||||
pub ocr_status: Option<String>,
|
||||
pub tags: Vec<String>,
|
||||
pub created_at: DateTime<Utc>,
|
||||
pub updated_at: DateTime<Utc>,
|
||||
|
|
@ -66,6 +70,10 @@ pub struct DocumentResponse {
|
|||
pub tags: Vec<String>,
|
||||
pub created_at: DateTime<Utc>,
|
||||
pub has_ocr_text: bool,
|
||||
pub ocr_confidence: Option<f32>,
|
||||
pub ocr_word_count: Option<i32>,
|
||||
pub ocr_processing_time_ms: Option<i32>,
|
||||
pub ocr_status: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, ToSchema, IntoParams)]
|
||||
|
|
@ -122,6 +130,10 @@ pub struct EnhancedDocumentResponse {
|
|||
pub tags: Vec<String>,
|
||||
pub created_at: DateTime<Utc>,
|
||||
pub has_ocr_text: bool,
|
||||
pub ocr_confidence: Option<f32>,
|
||||
pub ocr_word_count: Option<i32>,
|
||||
pub ocr_processing_time_ms: Option<i32>,
|
||||
pub ocr_status: Option<String>,
|
||||
pub search_rank: Option<f32>,
|
||||
pub snippets: Vec<SearchSnippet>,
|
||||
}
|
||||
|
|
@ -145,6 +157,10 @@ impl From<Document> for DocumentResponse {
|
|||
tags: doc.tags,
|
||||
created_at: doc.created_at,
|
||||
has_ocr_text: doc.ocr_text.is_some(),
|
||||
ocr_confidence: doc.ocr_confidence,
|
||||
ocr_word_count: doc.ocr_word_count,
|
||||
ocr_processing_time_ms: doc.ocr_processing_time_ms,
|
||||
ocr_status: doc.ocr_status,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -55,6 +55,10 @@ async fn search_documents(
|
|||
tags: doc.tags,
|
||||
created_at: doc.created_at,
|
||||
has_ocr_text: doc.ocr_text.is_some(),
|
||||
ocr_confidence: doc.ocr_confidence,
|
||||
ocr_word_count: doc.ocr_word_count,
|
||||
ocr_processing_time_ms: doc.ocr_processing_time_ms,
|
||||
ocr_status: doc.ocr_status,
|
||||
search_rank: None,
|
||||
snippets: Vec::new(),
|
||||
}).collect(),
|
||||
|
|
|
|||
Loading…
Reference in New Issue