feat(server): implement ocr enhanced service throughout

2025-06-12 22:12:50 -07:00 · 2025-06-12 22:12:50 -07:00 · d61b1c3f4b
parent d5f419ca18
commit d61b1c3f4b
12 changed files with 561 additions and 6 deletions
--- a/4
+++ b/4
@ -24,6 +24,7 @@ RUN apt-get update && apt-get install -y \
 WORKDIR /app
 COPY Cargo.toml Cargo.lock ./
 COPY src ./src
+COPY migrations ./migrations
 RUN cargo build --release

 # --- Runtime stage ---
@ -41,6 +42,9 @@ WORKDIR /app
 # Copy backend binary
 COPY --from=backend-builder /app/target/release/readur /app/readur

+# Copy migrations directory
+COPY --from=backend-builder /app/migrations /app/migrations
+
 # Create necessary directories
 RUN mkdir -p /app/uploads /app/watch /app/frontend

--- a/frontend/src/components/Dashboard.tsx
+++ b/frontend/src/components/Dashboard.tsx
@ -2,6 +2,7 @@ import React, { useState, useEffect } from 'react'
 import FileUpload from './FileUpload'
 import DocumentList from './DocumentList'
 import SearchBar from './SearchBar'
+import OcrAnalytics from './OcrAnalytics'
 import { Document, documentService } from '../services/api'

 function Dashboard() {
@ -55,6 +56,12 @@ function Dashboard() {
        <SearchBar onSearch={handleSearch} />
      </div>

+      {!searchResults && (
+        <div className="mb-6">
+          <OcrAnalytics documents={documents} />
+        </div>
+      )}
+
      {searchResults && (
        <div className="mb-4">
          <button
--- a/frontend/src/components/DocumentList.tsx
+++ b/frontend/src/components/DocumentList.tsx
@ -42,6 +42,78 @@ function DocumentList({ documents, loading }: DocumentListProps) {
    return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i]
  }

+  const getOcrStatusBadge = (document: Document) => {
+    if (!document.has_ocr_text) {
+      return null
+    }
+
+    const confidence = document.ocr_confidence
+    const status = document.ocr_status
+
+    if (status === 'failed') {
+      return (
+        <span className="ml-2 inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-red-100 text-red-800">
+          OCR Failed
+        </span>
+      )
+    }
+
+    if (status === 'processing') {
+      return (
+        <span className="ml-2 inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-yellow-100 text-yellow-800">
+          Processing...
+        </span>
+      )
+    }
+
+    if (confidence !== undefined) {
+      let badgeClass = 'bg-green-100 text-green-800'
+      let label = 'OCR'
+      
+      if (confidence >= 80) {
+        badgeClass = 'bg-green-100 text-green-800'
+        label = `OCR ${confidence.toFixed(0)}%`
+      } else if (confidence >= 60) {
+        badgeClass = 'bg-yellow-100 text-yellow-800'
+        label = `OCR ${confidence.toFixed(0)}%`
+      } else {
+        badgeClass = 'bg-orange-100 text-orange-800'
+        label = `OCR ${confidence.toFixed(0)}%`
+      }
+
+      return (
+        <span className={`ml-2 inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium ${badgeClass}`}>
+          {label}
+        </span>
+      )
+    }
+
+    return (
+      <span className="ml-2 inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-green-100 text-green-800">
+        OCR
+      </span>
+    )
+  }
+
+  const getOcrMetrics = (document: Document) => {
+    if (!document.has_ocr_text || !document.ocr_word_count) {
+      return null
+    }
+
+    const metrics = []
+    
+    if (document.ocr_word_count) {
+      metrics.push(`${document.ocr_word_count} words`)
+    }
+    
+    if (document.ocr_processing_time_ms) {
+      const seconds = (document.ocr_processing_time_ms / 1000).toFixed(1)
+      metrics.push(`${seconds}s`)
+    }
+
+    return metrics.length > 0 ? ` • ${metrics.join(' • ')}` : null
+  }
+
  if (loading) {
    return (
      <div className="text-center py-8">
@ -74,11 +146,8 @@ function DocumentList({ documents, loading }: DocumentListProps) {
                  </div>
                  <div className="text-sm text-gray-500">
                    {formatFileSize(document.file_size)} • {document.mime_type}
-                    {document.has_ocr_text && (
-                      <span className="ml-2 inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-green-100 text-green-800">
-                        OCR
-                      </span>
-                    )}
+                    {getOcrMetrics(document)}
+                    {getOcrStatusBadge(document)}
                  </div>
                  <div className="text-xs text-gray-400">
                    {new Date(document.created_at).toLocaleDateString()}
--- a/frontend/src/components/OcrAnalytics.tsx
+++ b/frontend/src/components/OcrAnalytics.tsx
@ -0,0 +1,187 @@
+import React, { useState, useEffect } from 'react'
+import {
+  ChartBarIcon,
+  ClockIcon,
+  DocumentTextIcon,
+  ExclamationCircleIcon,
+} from '@heroicons/react/24/outline'
+import { Document } from '../services/api'
+
+interface OcrAnalyticsProps {
+  documents: Document[]
+}
+
+interface OcrStats {
+  totalDocuments: number
+  documentsWithOcr: number
+  averageConfidence: number
+  highConfidenceCount: number
+  lowConfidenceCount: number
+  failedCount: number
+  processingCount: number
+  totalWords: number
+  averageProcessingTime: number
+}
+
+function OcrAnalytics({ documents }: OcrAnalyticsProps) {
+  const [stats, setStats] = useState<OcrStats | null>(null)
+
+  useEffect(() => {
+    if (documents.length === 0) {
+      setStats(null)
+      return
+    }
+
+    const ocrDocuments = documents.filter(doc => doc.has_ocr_text)
+    const completedOcr = ocrDocuments.filter(doc => doc.ocr_status === 'completed')
+    const failedOcr = ocrDocuments.filter(doc => doc.ocr_status === 'failed')
+    const processingOcr = ocrDocuments.filter(doc => doc.ocr_status === 'processing')
+    
+    const confidenceScores = completedOcr
+      .map(doc => doc.ocr_confidence)
+      .filter((confidence): confidence is number => confidence !== undefined)
+    
+    const wordCounts = completedOcr
+      .map(doc => doc.ocr_word_count)
+      .filter((count): count is number => count !== undefined)
+    
+    const processingTimes = completedOcr
+      .map(doc => doc.ocr_processing_time_ms)
+      .filter((time): time is number => time !== undefined)
+
+    const averageConfidence = confidenceScores.length > 0 
+      ? confidenceScores.reduce((sum, conf) => sum + conf, 0) / confidenceScores.length
+      : 0
+
+    const totalWords = wordCounts.reduce((sum, count) => sum + count, 0)
+    
+    const averageProcessingTime = processingTimes.length > 0
+      ? processingTimes.reduce((sum, time) => sum + time, 0) / processingTimes.length
+      : 0
+
+    const highConfidenceCount = confidenceScores.filter(conf => conf >= 80).length
+    const lowConfidenceCount = confidenceScores.filter(conf => conf < 60).length
+
+    setStats({
+      totalDocuments: documents.length,
+      documentsWithOcr: ocrDocuments.length,
+      averageConfidence,
+      highConfidenceCount,
+      lowConfidenceCount,
+      failedCount: failedOcr.length,
+      processingCount: processingOcr.length,
+      totalWords,
+      averageProcessingTime,
+    })
+  }, [documents])
+
+  if (!stats || stats.documentsWithOcr === 0) {
+    return null
+  }
+
+  const formatTime = (ms: number) => {
+    if (ms < 1000) return `${Math.round(ms)}ms`
+    return `${(ms / 1000).toFixed(1)}s`
+  }
+
+  const getConfidenceColor = (confidence: number) => {
+    if (confidence >= 80) return 'text-green-600'
+    if (confidence >= 60) return 'text-yellow-600'
+    return 'text-orange-600'
+  }
+
+  const successRate = ((stats.documentsWithOcr - stats.failedCount) / stats.documentsWithOcr) * 100
+
+  return (
+    <div className="bg-white overflow-hidden shadow rounded-lg">
+      <div className="p-5">
+        <div className="flex items-center">
+          <div className="flex-shrink-0">
+            <ChartBarIcon className="h-6 w-6 text-gray-400" />
+          </div>
+          <div className="ml-5 w-0 flex-1">
+            <dl>
+              <dt className="text-sm font-medium text-gray-500 truncate">
+                OCR Analytics
+              </dt>
+              <dd className="text-lg font-medium text-gray-900">
+                {stats.documentsWithOcr} of {stats.totalDocuments} documents processed
+              </dd>
+            </dl>
+          </div>
+        </div>
+      </div>
+      
+      <div className="bg-gray-50 px-5 py-3">
+        <div className="grid grid-cols-2 gap-4 sm:grid-cols-4">
+          {/* Success Rate */}
+          <div className="text-center">
+            <div className="text-lg font-semibold text-gray-900">
+              {successRate.toFixed(0)}%
+            </div>
+            <div className="text-xs text-gray-500">Success Rate</div>
+          </div>
+
+          {/* Average Confidence */}
+          <div className="text-center">
+            <div className={`text-lg font-semibold ${getConfidenceColor(stats.averageConfidence)}`}>
+              {stats.averageConfidence.toFixed(0)}%
+            </div>
+            <div className="text-xs text-gray-500">Avg Confidence</div>
+          </div>
+
+          {/* Total Words */}
+          <div className="text-center">
+            <div className="text-lg font-semibold text-gray-900">
+              {stats.totalWords.toLocaleString()}
+            </div>
+            <div className="text-xs text-gray-500">Words Extracted</div>
+          </div>
+
+          {/* Average Processing Time */}
+          <div className="text-center">
+            <div className="text-lg font-semibold text-gray-900">
+              {formatTime(stats.averageProcessingTime)}
+            </div>
+            <div className="text-xs text-gray-500">Avg Time</div>
+          </div>
+        </div>
+
+        {/* Quality Distribution */}
+        <div className="mt-4 pt-4 border-t border-gray-200">
+          <div className="flex justify-between items-center text-sm">
+            <div className="flex items-center space-x-4">
+              <div className="flex items-center">
+                <div className="w-2 h-2 bg-green-500 rounded-full mr-1"></div>
+                <span className="text-gray-600">High Quality: {stats.highConfidenceCount}</span>
+              </div>
+              
+              {stats.lowConfidenceCount > 0 && (
+                <div className="flex items-center">
+                  <div className="w-2 h-2 bg-orange-500 rounded-full mr-1"></div>
+                  <span className="text-gray-600">Low Quality: {stats.lowConfidenceCount}</span>
+                </div>
+              )}
+              
+              {stats.failedCount > 0 && (
+                <div className="flex items-center">
+                  <div className="w-2 h-2 bg-red-500 rounded-full mr-1"></div>
+                  <span className="text-gray-600">Failed: {stats.failedCount}</span>
+                </div>
+              )}
+              
+              {stats.processingCount > 0 && (
+                <div className="flex items-center">
+                  <div className="w-2 h-2 bg-yellow-500 rounded-full mr-1 animate-pulse"></div>
+                  <span className="text-gray-600">Processing: {stats.processingCount}</span>
+                </div>
+              )}
+            </div>
+          </div>
+        </div>
+      </div>
+    </div>
+  )
+}
+
+export default OcrAnalytics
--- a/frontend/src/services/api.ts
+++ b/frontend/src/services/api.ts
@ -19,6 +19,10 @@ export interface Document {
  tags: string[]
  created_at: string
  has_ocr_text: boolean
+  ocr_confidence?: number
+  ocr_word_count?: number
+  ocr_processing_time_ms?: number
+  ocr_status?: string
 }

 export interface SearchRequest {
@ -53,6 +57,10 @@ export interface EnhancedDocument {
  tags: string[]
  created_at: string
  has_ocr_text: boolean
+  ocr_confidence?: number
+  ocr_word_count?: number
+  ocr_processing_time_ms?: number
+  ocr_status?: string
  search_rank?: number
  snippets: SearchSnippet[]
 }
--- a/src/db.rs
+++ b/src/db.rs
@ -351,6 +351,10 @@ impl Database {
            mime_type: row.get("mime_type"),
            content: row.get("content"),
            ocr_text: row.get("ocr_text"),
+            ocr_confidence: row.get("ocr_confidence"),
+            ocr_word_count: row.get("ocr_word_count"),
+            ocr_processing_time_ms: row.get("ocr_processing_time_ms"),
+            ocr_status: row.get("ocr_status"),
            tags: row.get("tags"),
            created_at: row.get("created_at"),
            updated_at: row.get("updated_at"),
@ -385,6 +389,10 @@ impl Database {
                mime_type: row.get("mime_type"),
                content: row.get("content"),
                ocr_text: row.get("ocr_text"),
+                ocr_confidence: row.get("ocr_confidence"),
+                ocr_word_count: row.get("ocr_word_count"),
+                ocr_processing_time_ms: row.get("ocr_processing_time_ms"),
+                ocr_status: row.get("ocr_status"),
                tags: row.get("tags"),
                created_at: row.get("created_at"),
                updated_at: row.get("updated_at"),
@ -419,6 +427,10 @@ impl Database {
                mime_type: row.get("mime_type"),
                content: row.get("content"),
                ocr_text: row.get("ocr_text"),
+                ocr_confidence: row.get("ocr_confidence"),
+                ocr_word_count: row.get("ocr_word_count"),
+                ocr_processing_time_ms: row.get("ocr_processing_time_ms"),
+                ocr_status: row.get("ocr_status"),
                tags: row.get("tags"),
                created_at: row.get("created_at"),
                updated_at: row.get("updated_at"),
@ -483,6 +495,10 @@ impl Database {
                mime_type: row.get("mime_type"),
                content: row.get("content"),
                ocr_text: row.get("ocr_text"),
+                ocr_confidence: row.get("ocr_confidence"),
+                ocr_word_count: row.get("ocr_word_count"),
+                ocr_processing_time_ms: row.get("ocr_processing_time_ms"),
+                ocr_status: row.get("ocr_status"),
                tags: row.get("tags"),
                created_at: row.get("created_at"),
                updated_at: row.get("updated_at"),
@ -637,6 +653,10 @@ impl Database {
                tags: row.get("tags"),
                created_at: row.get("created_at"),
                has_ocr_text: ocr_text.is_some(),
+                ocr_confidence: row.get("ocr_confidence"),
+                ocr_word_count: row.get("ocr_word_count"),
+                ocr_processing_time_ms: row.get("ocr_processing_time_ms"),
+                ocr_status: row.get("ocr_status"),
                search_rank: Some(rank),
                snippets,
            });
--- a/src/file_service.rs
+++ b/src/file_service.rs
@ -58,6 +58,10 @@ impl FileService {
            mime_type: mime_type.to_string(),
            content: None,
            ocr_text: None,
+            ocr_confidence: None,
+            ocr_word_count: None,
+            ocr_processing_time_ms: None,
+            ocr_status: Some("pending".to_string()),
            tags: Vec::new(),
            created_at: Utc::now(),
            updated_at: Utc::now(),
--- a/src/lib.rs
+++ b/src/lib.rs
@ -2,8 +2,9 @@ pub mod auth;
 pub mod batch_ingest;
 pub mod config;
 pub mod db;
-pub mod enhanced_ocr; // Temporarily disabled due to compilation errors
+pub mod enhanced_ocr;
 pub mod file_service;
+pub mod migrations;
 pub mod models;
 pub mod ocr;
 pub mod ocr_queue;
--- a/src/main.rs
+++ b/src/main.rs
@ -43,6 +43,12 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
    
    db.migrate().await?;
    
+    // Run automatic migrations
+    if let Err(e) = readur::migrations::run_startup_migrations(&config.database_url, "migrations").await {
+        error!("Failed to run migrations: {}", e);
+        return Err(e.into());
+    }
+    
    // Seed admin user
    seed::seed_admin_user(&db).await?;
    
--- a/src/migrations.rs
+++ b/src/migrations.rs
@ -0,0 +1,229 @@
+use anyhow::Result;
+use sqlx::PgPool;
+use tracing::{info, warn, error};
+use std::fs;
+use std::path::Path;
+
+pub struct MigrationRunner {
+    pool: PgPool,
+    migrations_dir: String,
+}
+
+#[derive(Debug)]
+pub struct Migration {
+    pub version: i32,
+    pub name: String,
+    pub sql: String,
+}
+
+impl MigrationRunner {
+    pub fn new(pool: PgPool, migrations_dir: String) -> Self {
+        Self {
+            pool,
+            migrations_dir,
+        }
+    }
+
+    /// Initialize the migrations table if it doesn't exist
+    pub async fn init(&self) -> Result<()> {
+        sqlx::query(
+            r#"
+            CREATE TABLE IF NOT EXISTS schema_migrations (
+                version INTEGER PRIMARY KEY,
+                name VARCHAR(255) NOT NULL,
+                applied_at TIMESTAMPTZ DEFAULT NOW()
+            );
+            "#
+        )
+        .execute(&self.pool)
+        .await?;
+
+        info!("Migration system initialized");
+        Ok(())
+    }
+
+    /// Load all migration files from the migrations directory
+    pub fn load_migrations(&self) -> Result<Vec<Migration>> {
+        let mut migrations = Vec::new();
+        let migrations_path = Path::new(&self.migrations_dir);
+
+        if !migrations_path.exists() {
+            warn!("Migrations directory not found: {}", self.migrations_dir);
+            return Ok(migrations);
+        }
+
+        let mut entries: Vec<_> = fs::read_dir(migrations_path)?
+            .filter_map(|entry| entry.ok())
+            .filter(|entry| {
+                entry.path().extension()
+                    .and_then(|s| s.to_str())
+                    .map(|s| s == "sql")
+                    .unwrap_or(false)
+            })
+            .collect();
+
+        // Sort by filename to ensure proper order
+        entries.sort_by(|a, b| a.file_name().cmp(&b.file_name()));
+
+        for entry in entries {
+            let filename = entry.file_name().to_string_lossy().to_string();
+            
+            // Parse version from filename (e.g., "001_add_ocr_queue.sql" -> version 1)
+            if let Some(version_str) = filename.split('_').next() {
+                if let Ok(version) = version_str.parse::<i32>() {
+                    let sql = fs::read_to_string(entry.path())?;
+                    let name = filename.replace(".sql", "");
+                    
+                    migrations.push(Migration {
+                        version,
+                        name,
+                        sql,
+                    });
+                }
+            }
+        }
+
+        migrations.sort_by_key(|m| m.version);
+        Ok(migrations)
+    }
+
+    /// Get the list of applied migration versions
+    pub async fn get_applied_migrations(&self) -> Result<Vec<i32>> {
+        let rows = sqlx::query_scalar::<_, i32>("SELECT version FROM schema_migrations ORDER BY version")
+            .fetch_all(&self.pool)
+            .await?;
+        Ok(rows)
+    }
+
+    /// Check if a specific migration has been applied
+    pub async fn is_migration_applied(&self, version: i32) -> Result<bool> {
+        let count: i64 = sqlx::query_scalar(
+            "SELECT COUNT(*) FROM schema_migrations WHERE version = $1"
+        )
+        .bind(version)
+        .fetch_one(&self.pool)
+        .await?;
+        
+        Ok(count > 0)
+    }
+
+    /// Apply a single migration
+    pub async fn apply_migration(&self, migration: &Migration) -> Result<()> {
+        info!("Applying migration {}: {}", migration.version, migration.name);
+
+        // Start a transaction
+        let mut tx = self.pool.begin().await?;
+
+        // Execute the migration SQL
+        sqlx::query(&migration.sql)
+            .execute(&mut *tx)
+            .await
+            .map_err(|e| {
+                error!("Failed to apply migration {}: {}", migration.version, e);
+                e
+            })?;
+
+        // Record the migration as applied
+        sqlx::query(
+            "INSERT INTO schema_migrations (version, name) VALUES ($1, $2)"
+        )
+        .bind(migration.version)
+        .bind(&migration.name)
+        .execute(&mut *tx)
+        .await?;
+
+        // Commit the transaction
+        tx.commit().await?;
+
+        info!("Successfully applied migration {}: {}", migration.version, migration.name);
+        Ok(())
+    }
+
+    /// Run all pending migrations
+    pub async fn run_migrations(&self) -> Result<()> {
+        // Initialize migration system
+        self.init().await?;
+
+        // Load all migrations
+        let migrations = self.load_migrations()?;
+        if migrations.is_empty() {
+            info!("No migrations found");
+            return Ok(());
+        }
+
+        // Get applied migrations
+        let applied = self.get_applied_migrations().await?;
+        
+        // Find pending migrations
+        let pending: Vec<&Migration> = migrations
+            .iter()
+            .filter(|m| !applied.contains(&m.version))
+            .collect();
+
+        if pending.is_empty() {
+            info!("All migrations are up to date");
+            return Ok(());
+        }
+
+        info!("Found {} pending migrations", pending.len());
+
+        // Apply each pending migration
+        for migration in pending {
+            self.apply_migration(migration).await?;
+        }
+
+        info!("All migrations completed successfully");
+        Ok(())
+    }
+
+    /// Get migration status summary
+    pub async fn get_status(&self) -> Result<MigrationStatus> {
+        self.init().await?;
+        
+        let migrations = self.load_migrations()?;
+        let applied = self.get_applied_migrations().await?;
+        
+        let pending_count = migrations
+            .iter()
+            .filter(|m| !applied.contains(&m.version))
+            .count();
+
+        Ok(MigrationStatus {
+            total_migrations: migrations.len(),
+            applied_migrations: applied.len(),
+            pending_migrations: pending_count,
+            latest_version: migrations.last().map(|m| m.version),
+            current_version: applied.last().copied(),
+        })
+    }
+}
+
+#[derive(Debug)]
+pub struct MigrationStatus {
+    pub total_migrations: usize,
+    pub applied_migrations: usize,
+    pub pending_migrations: usize,
+    pub latest_version: Option<i32>,
+    pub current_version: Option<i32>,
+}
+
+impl MigrationStatus {
+    pub fn is_up_to_date(&self) -> bool {
+        self.pending_migrations == 0
+    }
+
+    pub fn needs_migration(&self) -> bool {
+        self.pending_migrations > 0
+    }
+}
+
+/// Convenience function to run migrations at startup
+pub async fn run_startup_migrations(database_url: &str, migrations_dir: &str) -> Result<()> {
+    let pool = sqlx::PgPool::connect(database_url).await?;
+    let runner = MigrationRunner::new(pool, migrations_dir.to_string());
+    
+    info!("Running database migrations...");
+    runner.run_migrations().await?;
+    
+    Ok(())
+}
--- a/src/models.rs
+++ b/src/models.rs
@ -50,6 +50,10 @@ pub struct Document {
    pub mime_type: String,
    pub content: Option<String>,
    pub ocr_text: Option<String>,
+    pub ocr_confidence: Option<f32>,
+    pub ocr_word_count: Option<i32>,
+    pub ocr_processing_time_ms: Option<i32>,
+    pub ocr_status: Option<String>,
    pub tags: Vec<String>,
    pub created_at: DateTime<Utc>,
    pub updated_at: DateTime<Utc>,
@ -66,6 +70,10 @@ pub struct DocumentResponse {
    pub tags: Vec<String>,
    pub created_at: DateTime<Utc>,
    pub has_ocr_text: bool,
+    pub ocr_confidence: Option<f32>,
+    pub ocr_word_count: Option<i32>,
+    pub ocr_processing_time_ms: Option<i32>,
+    pub ocr_status: Option<String>,
 }

 #[derive(Debug, Serialize, Deserialize, ToSchema, IntoParams)]
@ -122,6 +130,10 @@ pub struct EnhancedDocumentResponse {
    pub tags: Vec<String>,
    pub created_at: DateTime<Utc>,
    pub has_ocr_text: bool,
+    pub ocr_confidence: Option<f32>,
+    pub ocr_word_count: Option<i32>,
+    pub ocr_processing_time_ms: Option<i32>,
+    pub ocr_status: Option<String>,
    pub search_rank: Option<f32>,
    pub snippets: Vec<SearchSnippet>,
 }
@ -145,6 +157,10 @@ impl From<Document> for DocumentResponse {
            tags: doc.tags,
            created_at: doc.created_at,
            has_ocr_text: doc.ocr_text.is_some(),
+            ocr_confidence: doc.ocr_confidence,
+            ocr_word_count: doc.ocr_word_count,
+            ocr_processing_time_ms: doc.ocr_processing_time_ms,
+            ocr_status: doc.ocr_status,
        }
    }
 }
--- a/src/routes/search.rs
+++ b/src/routes/search.rs
@ -55,6 +55,10 @@ async fn search_documents(
            tags: doc.tags,
            created_at: doc.created_at,
            has_ocr_text: doc.ocr_text.is_some(),
+            ocr_confidence: doc.ocr_confidence,
+            ocr_word_count: doc.ocr_word_count,
+            ocr_processing_time_ms: doc.ocr_processing_time_ms,
+            ocr_status: doc.ocr_status,
            search_rank: None,
            snippets: Vec::new(),
        }).collect(),