diff --git a/frontend/src/components/OcrLanguageSelector/OcrLanguageSelector.tsx b/frontend/src/components/OcrLanguageSelector/OcrLanguageSelector.tsx new file mode 100644 index 0000000..c90da84 --- /dev/null +++ b/frontend/src/components/OcrLanguageSelector/OcrLanguageSelector.tsx @@ -0,0 +1,178 @@ +import React, { useState, useEffect } from 'react'; +import { + FormControl, + InputLabel, + Select, + MenuItem, + Typography, + Box, + Chip, + CircularProgress, + Alert, + SelectChangeEvent, +} from '@mui/material'; +import { Language as LanguageIcon } from '@mui/icons-material'; +import { ocrService, LanguageInfo } from '../../services/api'; + +interface OcrLanguageSelectorProps { + value?: string; + onChange: (language: string) => void; + label?: string; + size?: 'small' | 'medium'; + fullWidth?: boolean; + disabled?: boolean; + showCurrentIndicator?: boolean; + required?: boolean; + helperText?: string; +} + +const OcrLanguageSelector: React.FC = ({ + value = '', + onChange, + label = 'OCR Language', + size = 'medium', + fullWidth = true, + disabled = false, + showCurrentIndicator = true, + required = false, + helperText, +}) => { + const [languages, setLanguages] = useState([]); + const [currentUserLanguage, setCurrentUserLanguage] = useState('eng'); + const [loading, setLoading] = useState(true); + const [error, setError] = useState(''); + + useEffect(() => { + fetchLanguages(); + }, []); + + const fetchLanguages = async () => { + try { + setLoading(true); + setError(''); + const response = await ocrService.getAvailableLanguages(); + setLanguages(response.data.available_languages); + setCurrentUserLanguage(response.data.current_user_language); + + // If no value is set, default to user's current language + if (!value) { + onChange(response.data.current_user_language); + } + } catch (err: any) { + setError(err.response?.data?.message || 'Failed to load OCR languages'); + // Fallback to English if API fails + setLanguages([ + { code: 'eng', name: 'English', installed: true } + ]); + if (!value) { + onChange('eng'); + } + } finally { + setLoading(false); + } + }; + + const handleChange = (event: SelectChangeEvent) => { + onChange(event.target.value); + }; + + const getLanguageDisplay = (langCode: string) => { + const language = languages.find(lang => lang.code === langCode); + return language ? language.name : langCode; + }; + + if (loading) { + return ( + + {label} + + + + Loading languages... + + + + ); + } + + if (error) { + return ( + + + Retry + + } + > + {error} + + + {label} + + + + ); + } + + return ( + + + {label} + + {helperText && ( + + {helperText} + + )} + + + {showCurrentIndicator && languages.length > 0 && ( + + {languages.length} language{languages.length !== 1 ? 's' : ''} available + {value && value !== currentUserLanguage && ( + • Selecting "{getLanguageDisplay(value)}" will update your default language + )} + + )} + + ); +}; + +export default OcrLanguageSelector; \ No newline at end of file diff --git a/frontend/src/components/OcrLanguageSelector/index.ts b/frontend/src/components/OcrLanguageSelector/index.ts new file mode 100644 index 0000000..3ad75a1 --- /dev/null +++ b/frontend/src/components/OcrLanguageSelector/index.ts @@ -0,0 +1 @@ +export { default } from './OcrLanguageSelector'; \ No newline at end of file diff --git a/frontend/src/components/OcrRetryDialog/OcrRetryDialog.tsx b/frontend/src/components/OcrRetryDialog/OcrRetryDialog.tsx new file mode 100644 index 0000000..4495fe5 --- /dev/null +++ b/frontend/src/components/OcrRetryDialog/OcrRetryDialog.tsx @@ -0,0 +1,158 @@ +import React, { useState } from 'react'; +import { + Dialog, + DialogTitle, + DialogContent, + DialogActions, + Button, + Box, + Typography, + CircularProgress, + Alert, + Divider, +} from '@mui/material'; +import { Refresh as RefreshIcon, Language as LanguageIcon } from '@mui/icons-material'; +import OcrLanguageSelector from '../OcrLanguageSelector'; +import { ocrService } from '../../services/api'; + +interface OcrRetryDialogProps { + open: boolean; + onClose: () => void; + document: { + id: string; + filename: string; + original_filename: string; + failure_category: string; + ocr_error: string; + retry_count: number; + } | null; + onRetrySuccess: (message: string) => void; + onRetryError: (message: string) => void; +} + +const OcrRetryDialog: React.FC = ({ + open, + onClose, + document, + onRetrySuccess, + onRetryError, +}) => { + const [selectedLanguage, setSelectedLanguage] = useState(''); + const [retrying, setRetrying] = useState(false); + + const handleRetry = async () => { + if (!document) return; + + try { + setRetrying(true); + const response = await ocrService.retryWithLanguage( + document.id, + selectedLanguage || undefined + ); + + if (response.data.success) { + const waitTime = response.data.estimated_wait_minutes || 'Unknown'; + const languageInfo = selectedLanguage ? ` with language "${selectedLanguage}"` : ''; + onRetrySuccess( + `OCR retry queued for "${document.filename}"${languageInfo}. Estimated wait time: ${waitTime} minutes.` + ); + onClose(); + } else { + onRetryError(response.data.message || 'Failed to retry OCR'); + } + } catch (error: any) { + console.error('Failed to retry OCR:', error); + onRetryError( + error.response?.data?.message || 'Failed to retry OCR processing' + ); + } finally { + setRetrying(false); + } + }; + + const handleClose = () => { + if (!retrying) { + setSelectedLanguage(''); + onClose(); + } + }; + + if (!document) return null; + + return ( + + + + + Retry OCR Processing + + + + + + + Document: {document.original_filename} + + + Previous attempts: {document.retry_count} + + + {document.failure_category && ( + + + Previous failure: {document.failure_category} + + {document.ocr_error && ( + + {document.ocr_error} + + )} + + )} + + + + + + + + OCR Language Selection + + + Choose a different language if the previous OCR attempt used the wrong language for this document. + + + + + + + The retry will use enhanced OCR processing and may take several minutes depending on document size and complexity. + + + + + + + + + + ); +}; + +export default OcrRetryDialog; \ No newline at end of file diff --git a/frontend/src/components/OcrRetryDialog/index.ts b/frontend/src/components/OcrRetryDialog/index.ts new file mode 100644 index 0000000..3211f47 --- /dev/null +++ b/frontend/src/components/OcrRetryDialog/index.ts @@ -0,0 +1 @@ +export { default } from './OcrRetryDialog'; \ No newline at end of file diff --git a/frontend/src/pages/SettingsPage.tsx b/frontend/src/pages/SettingsPage.tsx index b3be669..d0fef18 100644 --- a/frontend/src/pages/SettingsPage.tsx +++ b/frontend/src/pages/SettingsPage.tsx @@ -42,6 +42,7 @@ import { Edit as EditIcon, Delete as DeleteIcon, Add as AddIcon, Pause as PauseIcon, Stop as StopIcon } from '@mui/icons-material'; import { useAuth } from '../contexts/AuthContext'; import api, { queueService } from '../services/api'; +import OcrLanguageSelector from '../components/OcrLanguageSelector'; interface User { id: string; @@ -112,10 +113,6 @@ interface UserFormData { password: string; } -interface OcrLanguage { - code: string; - name: string; -} interface WebDAVFolderInfo { path: string; @@ -233,23 +230,6 @@ const SettingsPage: React.FC = () => { const [ocrStatus, setOcrStatus] = useState<{ is_paused: boolean; status: 'paused' | 'running' } | null>(null); const [ocrActionLoading, setOcrActionLoading] = useState(false); - const ocrLanguages: OcrLanguage[] = [ - { code: 'eng', name: 'English' }, - { code: 'spa', name: 'Spanish' }, - { code: 'fra', name: 'French' }, - { code: 'deu', name: 'German' }, - { code: 'ita', name: 'Italian' }, - { code: 'por', name: 'Portuguese' }, - { code: 'rus', name: 'Russian' }, - { code: 'jpn', name: 'Japanese' }, - { code: 'chi_sim', name: 'Chinese (Simplified)' }, - { code: 'chi_tra', name: 'Chinese (Traditional)' }, - { code: 'kor', name: 'Korean' }, - { code: 'ara', name: 'Arabic' }, - { code: 'hin', name: 'Hindi' }, - { code: 'nld', name: 'Dutch' }, - { code: 'pol', name: 'Polish' }, - ]; useEffect(() => { fetchSettings(); @@ -415,9 +395,6 @@ const SettingsPage: React.FC = () => { setTabValue(newValue); }; - const handleOcrLanguageChange = (event: SelectChangeEvent): void => { - handleSettingsChange('ocrLanguage', event.target.value); - }; const handleCpuPriorityChange = (event: SelectChangeEvent): void => { handleSettingsChange('cpuPriority', event.target.value); @@ -501,21 +478,13 @@ const SettingsPage: React.FC = () => { - - OCR Language - - + handleSettingsChange('ocrLanguage', language)} + disabled={loading} + showCurrentIndicator={false} + helperText="Default language for OCR text extraction from your documents" + /> { return api.get('/queue/stats') @@ -292,4 +307,22 @@ export const queueService = { resumeOcr: () => { return api.post('/queue/resume') }, +} + +export const ocrService = { + getAvailableLanguages: () => { + return api.get('/ocr/languages') + }, + + getHealthStatus: () => { + return api.get('/ocr/health') + }, + + retryWithLanguage: (documentId: string, language?: string) => { + const data: RetryOcrRequest = {} + if (language) { + data.language = language + } + return api.post(`/documents/${documentId}/retry-ocr`, data) + }, } \ No newline at end of file diff --git a/src/db/settings.rs b/src/db/settings.rs index 7e38f5c..e9167f9 100644 --- a/src/db/settings.rs +++ b/src/db/settings.rs @@ -384,4 +384,25 @@ impl Database { updated_at: row.get("updated_at"), }) } + + pub async fn update_user_ocr_language(&self, user_id: Uuid, language: &str) -> Result<()> { + self.with_retry(|| async { + sqlx::query( + r#" + INSERT INTO settings (user_id, ocr_language) + VALUES ($1, $2) + ON CONFLICT (user_id) DO UPDATE SET + ocr_language = $2, + updated_at = NOW() + "# + ) + .bind(user_id) + .bind(language) + .execute(&self.pool) + .await + .map_err(|e| anyhow::anyhow!("Failed to update OCR language: {}", e))?; + + Ok(()) + }).await + } } \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index db4d014..f28ea0e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -436,6 +436,7 @@ async fn main() -> anyhow::Result<()> { .nest("/api/metrics", readur::routes::metrics::router()) .nest("/metrics", readur::routes::prometheus_metrics::router()) .nest("/api/notifications", readur::routes::notifications::router()) + .nest("/api/ocr", readur::routes::ocr::router()) .nest("/api/queue", readur::routes::queue::router()) .nest("/api/search", readur::routes::search::router()) .nest("/api/settings", readur::routes::settings::router()) diff --git a/src/ocr/api.rs b/src/ocr/api.rs index e7a8087..8ce3b47 100644 --- a/src/ocr/api.rs +++ b/src/ocr/api.rs @@ -7,33 +7,43 @@ use axum::{ response::Json, }; use serde::{Deserialize, Serialize}; +use std::sync::Arc; -#[derive(Serialize)] +#[derive(Serialize, utoipa::ToSchema)] pub struct OcrHealthResponse { - status: String, - tesseract_installed: bool, - available_languages: Vec, - diagnostics: Option, - errors: Vec, + pub status: String, + pub tesseract_installed: bool, + pub available_languages: Vec, + pub diagnostics: Option, + pub errors: Vec, } -#[derive(Serialize)] +#[derive(Serialize, utoipa::ToSchema)] pub struct OcrErrorResponse { - error: String, - error_code: String, - details: Option, - is_recoverable: bool, + pub error: String, + pub error_code: String, + pub details: Option, + pub is_recoverable: bool, } -#[derive(Deserialize)] +#[derive(Deserialize, utoipa::ToSchema)] pub struct OcrRequest { - file_path: String, - language: Option, - use_fallback: Option, + pub file_path: String, + pub language: Option, + pub use_fallback: Option, } +#[utoipa::path( + get, + path = "/api/ocr/health", + tag = "ocr", + responses( + (status = 200, description = "OCR service health status", body = OcrHealthResponse), + (status = 500, description = "OCR service is unhealthy", body = OcrErrorResponse) + ) +)] pub async fn health_check( - State(_state): State, + State(_state): State>, ) -> Result, (StatusCode, Json)> { let service = EnhancedOcrService::new(); let diagnostics = service.get_diagnostics().await; @@ -72,8 +82,19 @@ pub async fn health_check( } } +#[utoipa::path( + post, + path = "/api/ocr/perform", + tag = "ocr", + request_body = OcrRequest, + responses( + (status = 200, description = "OCR text extraction successful", body = serde_json::Value), + (status = 400, description = "Bad request or invalid language", body = OcrErrorResponse), + (status = 500, description = "OCR processing failed", body = OcrErrorResponse) + ) +)] pub async fn perform_ocr( - State(_state): State, + State(_state): State>, Json(request): Json, ) -> Result, (StatusCode, Json)> { let service = EnhancedOcrService::new(); diff --git a/src/ocr/health.rs b/src/ocr/health.rs index e313d7e..77144de 100644 --- a/src/ocr/health.rs +++ b/src/ocr/health.rs @@ -73,11 +73,8 @@ impl OcrHealthChecker { }) } - pub fn get_available_languages(&self) -> Vec { - let tessdata_path = match self.get_tessdata_path() { - Ok(path) => path, - Err(_) => return vec![], - }; + pub fn get_available_languages(&self) -> Result, OcrError> { + let tessdata_path = self.get_tessdata_path()?; let mut languages = vec![]; if let Ok(entries) = std::fs::read_dir(&tessdata_path) { @@ -92,7 +89,18 @@ impl OcrHealthChecker { } languages.sort(); - languages + Ok(languages) + } + + pub fn validate_language(&self, lang: &str) -> Result<(), OcrError> { + // Check if language is supported + let available_languages = self.get_available_languages()?; + if !available_languages.contains(&lang.to_string()) { + return Err(OcrError::LanguageDataNotFound { + lang: lang.to_string(), + }); + } + Ok(()) } pub fn check_cpu_features(&self) -> CpuFeatures { @@ -240,7 +248,7 @@ impl OcrHealthChecker { pub fn get_full_diagnostics(&self) -> OcrDiagnostics { OcrDiagnostics { tesseract_version: self.check_tesseract_installation().ok(), - available_languages: self.get_available_languages(), + available_languages: self.get_available_languages().unwrap_or_else(|_| vec![]), tessdata_path: self.get_tessdata_path().ok(), cpu_features: self.check_cpu_features(), memory_available_mb: self.check_memory_available(), diff --git a/src/routes/documents.rs b/src/routes/documents.rs index 5082b1d..e0023cc 100644 --- a/src/routes/documents.rs +++ b/src/routes/documents.rs @@ -26,6 +26,11 @@ struct PaginationQuery { ocr_status: Option, } +#[derive(Deserialize, ToSchema)] +pub struct RetryOcrRequest { + pub language: Option, +} + #[derive(Deserialize, ToSchema)] struct FailedDocumentsQuery { limit: Option, @@ -152,6 +157,7 @@ async fn upload_document( .unwrap_or_else(|| crate::models::Settings::default()); let mut label_ids: Option> = None; + let mut ocr_language: Option = None; // First pass: collect all multipart fields while let Some(field) = multipart.next_field().await.map_err(|_| StatusCode::BAD_REQUEST)? { @@ -172,6 +178,23 @@ async fn upload_document( tracing::warn!("Failed to parse label_ids from upload: {} - Error: {}", label_ids_text, e); } } + } else if name == "ocr_language" { + let language = field.text().await.map_err(|_| StatusCode::BAD_REQUEST)?; + if !language.trim().is_empty() { + // Validate that the language is available + let health_checker = crate::ocr::health::OcrHealthChecker::new(); + match health_checker.validate_language(language.trim()) { + Ok(_) => { + ocr_language = Some(language.trim().to_string()); + tracing::info!("OCR language specified and validated: {}", language); + } + Err(e) => { + tracing::warn!("Invalid OCR language specified '{}': {}", language, e); + // Return early with bad request for invalid language + return Err(StatusCode::BAD_REQUEST); + } + } + } } else if name == "file" { let filename = field .file_name() @@ -214,6 +237,15 @@ async fn upload_document( let enable_background_ocr = settings.enable_background_ocr; if enable_background_ocr && should_queue_ocr { + // If a language was specified, update the user's OCR language setting for this session + if let Some(lang) = &ocr_language { + if let Err(e) = state.db.update_user_ocr_language(auth_user.user.id, lang).await { + tracing::warn!("Failed to update user OCR language to {}: {}", lang, e); + } else { + tracing::info!("Updated user {} OCR language to: {}", auth_user.user.id, lang); + } + } + // Use the shared queue service from AppState instead of creating a new one // Calculate priority based on file size let priority = match saved_document.file_size { @@ -550,6 +582,7 @@ async fn get_processed_image( params( ("id" = uuid::Uuid, Path, description = "Document ID") ), + request_body(content = RetryOcrRequest, description = "OCR retry options"), responses( (status = 200, description = "OCR retry queued successfully", body = String), (status = 404, description = "Document not found"), @@ -561,6 +594,7 @@ async fn retry_ocr( State(state): State>, auth_user: AuthUser, Path(document_id): Path, + Json(request): Json, ) -> Result, StatusCode> { // Check if document exists and belongs to user let document = state @@ -617,12 +651,36 @@ async fn retry_ocr( _ => 6, // > 50MB: lowest priority }; + // If a language was specified, validate and update the user's OCR language setting + if let Some(lang) = &request.language { + // Validate that the language is available + let health_checker = crate::ocr::health::OcrHealthChecker::new(); + match health_checker.validate_language(lang) { + Ok(_) => { + if let Err(e) = state.db.update_user_ocr_language(auth_user.user.id, lang).await { + tracing::warn!("Failed to update user OCR language to {}: {}", lang, e); + } else { + tracing::info!("Updated user {} OCR language to: {} for retry", auth_user.user.id, lang); + } + } + Err(e) => { + tracing::warn!("Invalid OCR language specified '{}' for retry: {}", lang, e); + return Ok(Json(serde_json::json!({ + "success": false, + "message": format!("Invalid OCR language '{}': {}", lang, e), + "error_code": "INVALID_LANGUAGE" + }))); + } + } + } + // Add to OCR queue with detailed logging match state.queue_service.enqueue_document(document_id, priority, document.file_size).await { Ok(queue_id) => { + let language_info = request.language.as_ref().map(|l| format!(" with language: {}", l)).unwrap_or_default(); tracing::info!( - "OCR retry queued for document {} ({}): queue_id={}, priority={}, size={}", - document_id, document.filename, queue_id, priority, document.file_size + "OCR retry queued for document {} ({}): queue_id={}, priority={}, size={}{}", + document_id, document.filename, queue_id, priority, document.file_size, language_info ); Ok(Json(serde_json::json!({ @@ -631,6 +689,7 @@ async fn retry_ocr( "queue_id": queue_id, "document_id": document_id, "priority": priority, + "language": request.language, "estimated_wait_minutes": calculate_estimated_wait_time(priority).await }))) } diff --git a/src/routes/mod.rs b/src/routes/mod.rs index 6b0a01d..8a81dc7 100644 --- a/src/routes/mod.rs +++ b/src/routes/mod.rs @@ -4,6 +4,7 @@ pub mod ignored_files; pub mod labels; pub mod metrics; pub mod notifications; +pub mod ocr; pub mod prometheus_metrics; pub mod queue; pub mod search; diff --git a/src/routes/ocr.rs b/src/routes/ocr.rs new file mode 100644 index 0000000..a7687b9 --- /dev/null +++ b/src/routes/ocr.rs @@ -0,0 +1,173 @@ +use axum::{ + extract::State, + http::StatusCode, + response::Json, + routing::get, + Router, +}; +use serde::Serialize; +use std::sync::Arc; +use utoipa::ToSchema; + +use crate::{ + auth::AuthUser, + ocr::health::OcrHealthChecker, + AppState, +}; + +#[derive(Serialize, ToSchema)] +pub struct AvailableLanguagesResponse { + pub available_languages: Vec, + pub current_user_language: String, +} + +#[derive(Serialize, ToSchema)] +pub struct LanguageInfo { + pub code: String, + pub name: String, + pub installed: bool, +} + +pub fn router() -> Router> { + Router::new() + .route("/health", get(crate::ocr::api::health_check)) + .route("/perform", axum::routing::post(crate::ocr::api::perform_ocr)) + .route("/languages", get(get_available_languages)) +} + +#[utoipa::path( + get, + path = "/api/ocr/languages", + tag = "ocr", + security( + ("bearer_auth" = []) + ), + responses( + (status = 200, description = "Available OCR languages and user's current language", body = AvailableLanguagesResponse), + (status = 401, description = "Unauthorized"), + (status = 500, description = "Internal server error") + ) +)] +async fn get_available_languages( + State(state): State>, + auth_user: AuthUser, +) -> Result, StatusCode> { + // Get user's current OCR language setting + let user_settings = state + .db + .get_user_settings(auth_user.user.id) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + let current_language = user_settings + .map(|s| s.ocr_language) + .unwrap_or_else(|| "eng".to_string()); + + // Get available languages from Tesseract + let health_checker = OcrHealthChecker::new(); + let available_languages = match health_checker.get_available_languages() { + Ok(langs) => langs, + Err(_) => { + // Fallback to common languages if detection fails + vec!["eng".to_string()] + } + }; + + // Create language info with display names + let language_info: Vec = available_languages + .into_iter() + .map(|code| LanguageInfo { + name: get_language_display_name(&code), + installed: true, // If it's returned by get_available_languages, it's installed + code, + }) + .collect(); + + Ok(Json(AvailableLanguagesResponse { + available_languages: language_info, + current_user_language: current_language, + })) +} + +/// Convert language codes to human-readable names +fn get_language_display_name(code: &str) -> String { + match code { + "eng" => "English", + "spa" => "Spanish", + "fra" => "French", + "deu" => "German", + "ita" => "Italian", + "por" => "Portuguese", + "rus" => "Russian", + "jpn" => "Japanese", + "chi_sim" => "Chinese (Simplified)", + "chi_tra" => "Chinese (Traditional)", + "kor" => "Korean", + "ara" => "Arabic", + "hin" => "Hindi", + "tha" => "Thai", + "vie" => "Vietnamese", + "pol" => "Polish", + "nld" => "Dutch", + "dan" => "Danish", + "nor" => "Norwegian", + "swe" => "Swedish", + "fin" => "Finnish", + "ces" => "Czech", + "hun" => "Hungarian", + "tur" => "Turkish", + "heb" => "Hebrew", + "ukr" => "Ukrainian", + "bul" => "Bulgarian", + "ron" => "Romanian", + "hrv" => "Croatian", + "slk" => "Slovak", + "slv" => "Slovenian", + "est" => "Estonian", + "lav" => "Latvian", + "lit" => "Lithuanian", + "ell" => "Greek", + "cat" => "Catalan", + "eus" => "Basque", + "gla" => "Scottish Gaelic", + "gle" => "Irish", + "cym" => "Welsh", + "isl" => "Icelandic", + "mlt" => "Maltese", + "afr" => "Afrikaans", + "sqi" => "Albanian", + "aze" => "Azerbaijani", + "bel" => "Belarusian", + "ben" => "Bengali", + "bos" => "Bosnian", + "bre" => "Breton", + "kan" => "Kannada", + "kat" => "Georgian", + "kaz" => "Kazakh", + "kir" => "Kyrgyz", + "lao" => "Lao", + "lat" => "Latin", + "ltz" => "Luxembourgish", + "mkd" => "Macedonian", + "msa" => "Malay", + "mal" => "Malayalam", + "mar" => "Marathi", + "nep" => "Nepali", + "ori" => "Odia", + "pan" => "Punjabi", + "pus" => "Pashto", + "fas" => "Persian", + "san" => "Sanskrit", + "sin" => "Sinhala", + "srp" => "Serbian", + "tam" => "Tamil", + "tel" => "Telugu", + "tgk" => "Tajik", + "uzb" => "Uzbek", + "urd" => "Urdu", + _ => { + // For unknown codes, just return the code as-is + code + } + }.to_string() +} \ No newline at end of file diff --git a/src/swagger.rs b/src/swagger.rs index 673a06a..b9dbb57 100644 --- a/src/swagger.rs +++ b/src/swagger.rs @@ -109,6 +109,10 @@ use crate::{ crate::routes::webdav::get_webdav_sync_status, crate::routes::webdav::test_webdav_connection, crate::routes::webdav::estimate_webdav_crawl, + // OCR endpoints + crate::routes::ocr::get_available_languages, + crate::ocr::api::health_check, + crate::ocr::api::perform_ocr, // Ignored files endpoints crate::routes::ignored_files::list_ignored_files, crate::routes::ignored_files::get_ignored_file, @@ -136,7 +140,10 @@ use crate::{ Label, CreateLabel, UpdateLabel, LabelAssignment, LabelQuery, LabelBulkUpdateRequest, // Document schemas BulkDeleteRequest, DocumentListResponse, DocumentOcrResponse, DocumentOperationResponse, - BulkDeleteResponse, PaginationInfo, DocumentDuplicatesResponse + BulkDeleteResponse, PaginationInfo, DocumentDuplicatesResponse, crate::routes::documents::RetryOcrRequest, + // OCR schemas + crate::routes::ocr::AvailableLanguagesResponse, crate::routes::ocr::LanguageInfo, + crate::ocr::api::OcrHealthResponse, crate::ocr::api::OcrErrorResponse, crate::ocr::api::OcrRequest ) ), tags( @@ -152,6 +159,7 @@ use crate::{ (name = "sources", description = "Document source management endpoints"), (name = "webdav", description = "WebDAV synchronization endpoints"), (name = "ignored_files", description = "Ignored files management endpoints"), + (name = "ocr", description = "OCR service management endpoints"), (name = "health", description = "Health check endpoint"), ), modifiers(&SecurityAddon),