feat(server/client): support multiple OCR languages

This commit is contained in:
perf3ct 2025-06-29 22:51:06 +00:00
parent 929ea96e6f
commit b4ddf034b0
No known key found for this signature in database
GPG Key ID: 569C4EEC436F5232
14 changed files with 698 additions and 66 deletions

View File

@ -0,0 +1,178 @@
import React, { useState, useEffect } from 'react';
import {
FormControl,
InputLabel,
Select,
MenuItem,
Typography,
Box,
Chip,
CircularProgress,
Alert,
SelectChangeEvent,
} from '@mui/material';
import { Language as LanguageIcon } from '@mui/icons-material';
import { ocrService, LanguageInfo } from '../../services/api';
interface OcrLanguageSelectorProps {
value?: string;
onChange: (language: string) => void;
label?: string;
size?: 'small' | 'medium';
fullWidth?: boolean;
disabled?: boolean;
showCurrentIndicator?: boolean;
required?: boolean;
helperText?: string;
}
const OcrLanguageSelector: React.FC<OcrLanguageSelectorProps> = ({
value = '',
onChange,
label = 'OCR Language',
size = 'medium',
fullWidth = true,
disabled = false,
showCurrentIndicator = true,
required = false,
helperText,
}) => {
const [languages, setLanguages] = useState<LanguageInfo[]>([]);
const [currentUserLanguage, setCurrentUserLanguage] = useState<string>('eng');
const [loading, setLoading] = useState<boolean>(true);
const [error, setError] = useState<string>('');
useEffect(() => {
fetchLanguages();
}, []);
const fetchLanguages = async () => {
try {
setLoading(true);
setError('');
const response = await ocrService.getAvailableLanguages();
setLanguages(response.data.available_languages);
setCurrentUserLanguage(response.data.current_user_language);
// If no value is set, default to user's current language
if (!value) {
onChange(response.data.current_user_language);
}
} catch (err: any) {
setError(err.response?.data?.message || 'Failed to load OCR languages');
// Fallback to English if API fails
setLanguages([
{ code: 'eng', name: 'English', installed: true }
]);
if (!value) {
onChange('eng');
}
} finally {
setLoading(false);
}
};
const handleChange = (event: SelectChangeEvent) => {
onChange(event.target.value);
};
const getLanguageDisplay = (langCode: string) => {
const language = languages.find(lang => lang.code === langCode);
return language ? language.name : langCode;
};
if (loading) {
return (
<FormControl fullWidth={fullWidth} size={size}>
<InputLabel>{label}</InputLabel>
<Box sx={{ display: 'flex', alignItems: 'center', p: 2 }}>
<CircularProgress size={20} sx={{ mr: 1 }} />
<Typography variant="body2" color="text.secondary">
Loading languages...
</Typography>
</Box>
</FormControl>
);
}
if (error) {
return (
<Box>
<Alert
severity="warning"
sx={{ mb: 1 }}
action={
<Typography
variant="button"
onClick={fetchLanguages}
sx={{ cursor: 'pointer', textDecoration: 'underline' }}
>
Retry
</Typography>
}
>
{error}
</Alert>
<FormControl fullWidth={fullWidth} size={size} disabled>
<InputLabel>{label}</InputLabel>
<Select value="eng">
<MenuItem value="eng">English (Fallback)</MenuItem>
</Select>
</FormControl>
</Box>
);
}
return (
<Box>
<FormControl fullWidth={fullWidth} size={size} disabled={disabled} required={required}>
<InputLabel id="ocr-language-label">{label}</InputLabel>
<Select
labelId="ocr-language-label"
value={value || currentUserLanguage}
onChange={handleChange}
label={label}
startAdornment={<LanguageIcon sx={{ mr: 1, color: 'text.secondary' }} />}
>
{languages.map((language) => (
<MenuItem key={language.code} value={language.code}>
<Box sx={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between', width: '100%' }}>
<Typography>{language.name}</Typography>
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
<Typography variant="caption" color="text.secondary">
{language.code}
</Typography>
{showCurrentIndicator && language.code === currentUserLanguage && (
<Chip
label="Current"
size="small"
color="primary"
variant="outlined"
sx={{ fontSize: '0.7rem', height: '20px' }}
/>
)}
</Box>
</Box>
</MenuItem>
))}
</Select>
{helperText && (
<Typography variant="caption" color="text.secondary" sx={{ mt: 0.5, ml: 1.5 }}>
{helperText}
</Typography>
)}
</FormControl>
{showCurrentIndicator && languages.length > 0 && (
<Typography variant="caption" color="text.secondary" sx={{ display: 'block', mt: 1 }}>
{languages.length} language{languages.length !== 1 ? 's' : ''} available
{value && value !== currentUserLanguage && (
<span> Selecting "{getLanguageDisplay(value)}" will update your default language</span>
)}
</Typography>
)}
</Box>
);
};
export default OcrLanguageSelector;

View File

@ -0,0 +1 @@
export { default } from './OcrLanguageSelector';

View File

@ -0,0 +1,158 @@
import React, { useState } from 'react';
import {
Dialog,
DialogTitle,
DialogContent,
DialogActions,
Button,
Box,
Typography,
CircularProgress,
Alert,
Divider,
} from '@mui/material';
import { Refresh as RefreshIcon, Language as LanguageIcon } from '@mui/icons-material';
import OcrLanguageSelector from '../OcrLanguageSelector';
import { ocrService } from '../../services/api';
interface OcrRetryDialogProps {
open: boolean;
onClose: () => void;
document: {
id: string;
filename: string;
original_filename: string;
failure_category: string;
ocr_error: string;
retry_count: number;
} | null;
onRetrySuccess: (message: string) => void;
onRetryError: (message: string) => void;
}
const OcrRetryDialog: React.FC<OcrRetryDialogProps> = ({
open,
onClose,
document,
onRetrySuccess,
onRetryError,
}) => {
const [selectedLanguage, setSelectedLanguage] = useState<string>('');
const [retrying, setRetrying] = useState<boolean>(false);
const handleRetry = async () => {
if (!document) return;
try {
setRetrying(true);
const response = await ocrService.retryWithLanguage(
document.id,
selectedLanguage || undefined
);
if (response.data.success) {
const waitTime = response.data.estimated_wait_minutes || 'Unknown';
const languageInfo = selectedLanguage ? ` with language "${selectedLanguage}"` : '';
onRetrySuccess(
`OCR retry queued for "${document.filename}"${languageInfo}. Estimated wait time: ${waitTime} minutes.`
);
onClose();
} else {
onRetryError(response.data.message || 'Failed to retry OCR');
}
} catch (error: any) {
console.error('Failed to retry OCR:', error);
onRetryError(
error.response?.data?.message || 'Failed to retry OCR processing'
);
} finally {
setRetrying(false);
}
};
const handleClose = () => {
if (!retrying) {
setSelectedLanguage('');
onClose();
}
};
if (!document) return null;
return (
<Dialog open={open} onClose={handleClose} maxWidth="sm" fullWidth>
<DialogTitle>
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
<RefreshIcon />
<Typography variant="h6">Retry OCR Processing</Typography>
</Box>
</DialogTitle>
<DialogContent>
<Box sx={{ mb: 3 }}>
<Typography variant="subtitle1" sx={{ fontWeight: 600, mb: 1 }}>
Document: {document.original_filename}
</Typography>
<Typography variant="body2" color="text.secondary" sx={{ mb: 2 }}>
Previous attempts: {document.retry_count}
</Typography>
{document.failure_category && (
<Alert severity="warning" sx={{ mb: 2 }}>
<Typography variant="body2">
<strong>Previous failure:</strong> {document.failure_category}
</Typography>
{document.ocr_error && (
<Typography variant="caption" sx={{ display: 'block', mt: 1 }}>
{document.ocr_error}
</Typography>
)}
</Alert>
)}
</Box>
<Divider sx={{ my: 2 }} />
<Box sx={{ mb: 3 }}>
<Typography variant="subtitle2" sx={{ fontWeight: 600, mb: 2, display: 'flex', alignItems: 'center', gap: 1 }}>
<LanguageIcon fontSize="small" />
OCR Language Selection
</Typography>
<Typography variant="body2" color="text.secondary" sx={{ mb: 2 }}>
Choose a different language if the previous OCR attempt used the wrong language for this document.
</Typography>
<OcrLanguageSelector
value={selectedLanguage}
onChange={setSelectedLanguage}
label="OCR Language (Optional)"
size="medium"
helperText="Leave empty to use your default language setting"
showCurrentIndicator={true}
/>
</Box>
<Alert severity="info" sx={{ mt: 2 }}>
<Typography variant="body2">
The retry will use enhanced OCR processing and may take several minutes depending on document size and complexity.
</Typography>
</Alert>
</DialogContent>
<DialogActions sx={{ px: 3, pb: 3 }}>
<Button onClick={handleClose} disabled={retrying}>
Cancel
</Button>
<Button
onClick={handleRetry}
variant="contained"
disabled={retrying}
startIcon={retrying ? <CircularProgress size={20} /> : <RefreshIcon />}
>
{retrying ? 'Retrying...' : 'Retry OCR'}
</Button>
</DialogActions>
</Dialog>
);
};
export default OcrRetryDialog;

View File

@ -0,0 +1 @@
export { default } from './OcrRetryDialog';

View File

@ -42,6 +42,7 @@ import { Edit as EditIcon, Delete as DeleteIcon, Add as AddIcon,
Pause as PauseIcon, Stop as StopIcon } from '@mui/icons-material';
import { useAuth } from '../contexts/AuthContext';
import api, { queueService } from '../services/api';
import OcrLanguageSelector from '../components/OcrLanguageSelector';
interface User {
id: string;
@ -112,10 +113,6 @@ interface UserFormData {
password: string;
}
interface OcrLanguage {
code: string;
name: string;
}
interface WebDAVFolderInfo {
path: string;
@ -233,23 +230,6 @@ const SettingsPage: React.FC = () => {
const [ocrStatus, setOcrStatus] = useState<{ is_paused: boolean; status: 'paused' | 'running' } | null>(null);
const [ocrActionLoading, setOcrActionLoading] = useState(false);
const ocrLanguages: OcrLanguage[] = [
{ code: 'eng', name: 'English' },
{ code: 'spa', name: 'Spanish' },
{ code: 'fra', name: 'French' },
{ code: 'deu', name: 'German' },
{ code: 'ita', name: 'Italian' },
{ code: 'por', name: 'Portuguese' },
{ code: 'rus', name: 'Russian' },
{ code: 'jpn', name: 'Japanese' },
{ code: 'chi_sim', name: 'Chinese (Simplified)' },
{ code: 'chi_tra', name: 'Chinese (Traditional)' },
{ code: 'kor', name: 'Korean' },
{ code: 'ara', name: 'Arabic' },
{ code: 'hin', name: 'Hindi' },
{ code: 'nld', name: 'Dutch' },
{ code: 'pol', name: 'Polish' },
];
useEffect(() => {
fetchSettings();
@ -415,9 +395,6 @@ const SettingsPage: React.FC = () => {
setTabValue(newValue);
};
const handleOcrLanguageChange = (event: SelectChangeEvent<string>): void => {
handleSettingsChange('ocrLanguage', event.target.value);
};
const handleCpuPriorityChange = (event: SelectChangeEvent<string>): void => {
handleSettingsChange('cpuPriority', event.target.value);
@ -501,21 +478,13 @@ const SettingsPage: React.FC = () => {
<Divider sx={{ mb: 2 }} />
<Grid container spacing={2}>
<Grid item xs={12} md={6}>
<FormControl fullWidth>
<InputLabel>OCR Language</InputLabel>
<Select
value={settings.ocrLanguage}
label="OCR Language"
onChange={handleOcrLanguageChange}
disabled={loading}
>
{ocrLanguages.map((lang) => (
<MenuItem key={lang.code} value={lang.code}>
{lang.name}
</MenuItem>
))}
</Select>
</FormControl>
<OcrLanguageSelector
value={settings.ocrLanguage}
onChange={(language) => handleSettingsChange('ocrLanguage', language)}
disabled={loading}
showCurrentIndicator={false}
helperText="Default language for OCR text extraction from your documents"
/>
</Grid>
<Grid item xs={12} md={6}>
<TextField

View File

@ -272,6 +272,21 @@ export interface OcrActionResponse {
message: string
}
export interface LanguageInfo {
code: string
name: string
installed: boolean
}
export interface AvailableLanguagesResponse {
available_languages: LanguageInfo[]
current_user_language: string
}
export interface RetryOcrRequest {
language?: string
}
export const queueService = {
getStats: () => {
return api.get<QueueStats>('/queue/stats')
@ -292,4 +307,22 @@ export const queueService = {
resumeOcr: () => {
return api.post<OcrActionResponse>('/queue/resume')
},
}
export const ocrService = {
getAvailableLanguages: () => {
return api.get<AvailableLanguagesResponse>('/ocr/languages')
},
getHealthStatus: () => {
return api.get('/ocr/health')
},
retryWithLanguage: (documentId: string, language?: string) => {
const data: RetryOcrRequest = {}
if (language) {
data.language = language
}
return api.post(`/documents/${documentId}/retry-ocr`, data)
},
}

View File

@ -384,4 +384,25 @@ impl Database {
updated_at: row.get("updated_at"),
})
}
pub async fn update_user_ocr_language(&self, user_id: Uuid, language: &str) -> Result<()> {
self.with_retry(|| async {
sqlx::query(
r#"
INSERT INTO settings (user_id, ocr_language)
VALUES ($1, $2)
ON CONFLICT (user_id) DO UPDATE SET
ocr_language = $2,
updated_at = NOW()
"#
)
.bind(user_id)
.bind(language)
.execute(&self.pool)
.await
.map_err(|e| anyhow::anyhow!("Failed to update OCR language: {}", e))?;
Ok(())
}).await
}
}

View File

@ -436,6 +436,7 @@ async fn main() -> anyhow::Result<()> {
.nest("/api/metrics", readur::routes::metrics::router())
.nest("/metrics", readur::routes::prometheus_metrics::router())
.nest("/api/notifications", readur::routes::notifications::router())
.nest("/api/ocr", readur::routes::ocr::router())
.nest("/api/queue", readur::routes::queue::router())
.nest("/api/search", readur::routes::search::router())
.nest("/api/settings", readur::routes::settings::router())

View File

@ -7,33 +7,43 @@ use axum::{
response::Json,
};
use serde::{Deserialize, Serialize};
use std::sync::Arc;
#[derive(Serialize)]
#[derive(Serialize, utoipa::ToSchema)]
pub struct OcrHealthResponse {
status: String,
tesseract_installed: bool,
available_languages: Vec<String>,
diagnostics: Option<String>,
errors: Vec<String>,
pub status: String,
pub tesseract_installed: bool,
pub available_languages: Vec<String>,
pub diagnostics: Option<String>,
pub errors: Vec<String>,
}
#[derive(Serialize)]
#[derive(Serialize, utoipa::ToSchema)]
pub struct OcrErrorResponse {
error: String,
error_code: String,
details: Option<String>,
is_recoverable: bool,
pub error: String,
pub error_code: String,
pub details: Option<String>,
pub is_recoverable: bool,
}
#[derive(Deserialize)]
#[derive(Deserialize, utoipa::ToSchema)]
pub struct OcrRequest {
file_path: String,
language: Option<String>,
use_fallback: Option<bool>,
pub file_path: String,
pub language: Option<String>,
pub use_fallback: Option<bool>,
}
#[utoipa::path(
get,
path = "/api/ocr/health",
tag = "ocr",
responses(
(status = 200, description = "OCR service health status", body = OcrHealthResponse),
(status = 500, description = "OCR service is unhealthy", body = OcrErrorResponse)
)
)]
pub async fn health_check(
State(_state): State<AppState>,
State(_state): State<Arc<AppState>>,
) -> Result<Json<OcrHealthResponse>, (StatusCode, Json<OcrErrorResponse>)> {
let service = EnhancedOcrService::new();
let diagnostics = service.get_diagnostics().await;
@ -72,8 +82,19 @@ pub async fn health_check(
}
}
#[utoipa::path(
post,
path = "/api/ocr/perform",
tag = "ocr",
request_body = OcrRequest,
responses(
(status = 200, description = "OCR text extraction successful", body = serde_json::Value),
(status = 400, description = "Bad request or invalid language", body = OcrErrorResponse),
(status = 500, description = "OCR processing failed", body = OcrErrorResponse)
)
)]
pub async fn perform_ocr(
State(_state): State<AppState>,
State(_state): State<Arc<AppState>>,
Json(request): Json<OcrRequest>,
) -> Result<Json<serde_json::Value>, (StatusCode, Json<OcrErrorResponse>)> {
let service = EnhancedOcrService::new();

View File

@ -73,11 +73,8 @@ impl OcrHealthChecker {
})
}
pub fn get_available_languages(&self) -> Vec<String> {
let tessdata_path = match self.get_tessdata_path() {
Ok(path) => path,
Err(_) => return vec![],
};
pub fn get_available_languages(&self) -> Result<Vec<String>, OcrError> {
let tessdata_path = self.get_tessdata_path()?;
let mut languages = vec![];
if let Ok(entries) = std::fs::read_dir(&tessdata_path) {
@ -92,7 +89,18 @@ impl OcrHealthChecker {
}
languages.sort();
languages
Ok(languages)
}
pub fn validate_language(&self, lang: &str) -> Result<(), OcrError> {
// Check if language is supported
let available_languages = self.get_available_languages()?;
if !available_languages.contains(&lang.to_string()) {
return Err(OcrError::LanguageDataNotFound {
lang: lang.to_string(),
});
}
Ok(())
}
pub fn check_cpu_features(&self) -> CpuFeatures {
@ -240,7 +248,7 @@ impl OcrHealthChecker {
pub fn get_full_diagnostics(&self) -> OcrDiagnostics {
OcrDiagnostics {
tesseract_version: self.check_tesseract_installation().ok(),
available_languages: self.get_available_languages(),
available_languages: self.get_available_languages().unwrap_or_else(|_| vec![]),
tessdata_path: self.get_tessdata_path().ok(),
cpu_features: self.check_cpu_features(),
memory_available_mb: self.check_memory_available(),

View File

@ -26,6 +26,11 @@ struct PaginationQuery {
ocr_status: Option<String>,
}
#[derive(Deserialize, ToSchema)]
pub struct RetryOcrRequest {
pub language: Option<String>,
}
#[derive(Deserialize, ToSchema)]
struct FailedDocumentsQuery {
limit: Option<i64>,
@ -152,6 +157,7 @@ async fn upload_document(
.unwrap_or_else(|| crate::models::Settings::default());
let mut label_ids: Option<Vec<uuid::Uuid>> = None;
let mut ocr_language: Option<String> = None;
// First pass: collect all multipart fields
while let Some(field) = multipart.next_field().await.map_err(|_| StatusCode::BAD_REQUEST)? {
@ -172,6 +178,23 @@ async fn upload_document(
tracing::warn!("Failed to parse label_ids from upload: {} - Error: {}", label_ids_text, e);
}
}
} else if name == "ocr_language" {
let language = field.text().await.map_err(|_| StatusCode::BAD_REQUEST)?;
if !language.trim().is_empty() {
// Validate that the language is available
let health_checker = crate::ocr::health::OcrHealthChecker::new();
match health_checker.validate_language(language.trim()) {
Ok(_) => {
ocr_language = Some(language.trim().to_string());
tracing::info!("OCR language specified and validated: {}", language);
}
Err(e) => {
tracing::warn!("Invalid OCR language specified '{}': {}", language, e);
// Return early with bad request for invalid language
return Err(StatusCode::BAD_REQUEST);
}
}
}
} else if name == "file" {
let filename = field
.file_name()
@ -214,6 +237,15 @@ async fn upload_document(
let enable_background_ocr = settings.enable_background_ocr;
if enable_background_ocr && should_queue_ocr {
// If a language was specified, update the user's OCR language setting for this session
if let Some(lang) = &ocr_language {
if let Err(e) = state.db.update_user_ocr_language(auth_user.user.id, lang).await {
tracing::warn!("Failed to update user OCR language to {}: {}", lang, e);
} else {
tracing::info!("Updated user {} OCR language to: {}", auth_user.user.id, lang);
}
}
// Use the shared queue service from AppState instead of creating a new one
// Calculate priority based on file size
let priority = match saved_document.file_size {
@ -550,6 +582,7 @@ async fn get_processed_image(
params(
("id" = uuid::Uuid, Path, description = "Document ID")
),
request_body(content = RetryOcrRequest, description = "OCR retry options"),
responses(
(status = 200, description = "OCR retry queued successfully", body = String),
(status = 404, description = "Document not found"),
@ -561,6 +594,7 @@ async fn retry_ocr(
State(state): State<Arc<AppState>>,
auth_user: AuthUser,
Path(document_id): Path<uuid::Uuid>,
Json(request): Json<RetryOcrRequest>,
) -> Result<Json<serde_json::Value>, StatusCode> {
// Check if document exists and belongs to user
let document = state
@ -617,12 +651,36 @@ async fn retry_ocr(
_ => 6, // > 50MB: lowest priority
};
// If a language was specified, validate and update the user's OCR language setting
if let Some(lang) = &request.language {
// Validate that the language is available
let health_checker = crate::ocr::health::OcrHealthChecker::new();
match health_checker.validate_language(lang) {
Ok(_) => {
if let Err(e) = state.db.update_user_ocr_language(auth_user.user.id, lang).await {
tracing::warn!("Failed to update user OCR language to {}: {}", lang, e);
} else {
tracing::info!("Updated user {} OCR language to: {} for retry", auth_user.user.id, lang);
}
}
Err(e) => {
tracing::warn!("Invalid OCR language specified '{}' for retry: {}", lang, e);
return Ok(Json(serde_json::json!({
"success": false,
"message": format!("Invalid OCR language '{}': {}", lang, e),
"error_code": "INVALID_LANGUAGE"
})));
}
}
}
// Add to OCR queue with detailed logging
match state.queue_service.enqueue_document(document_id, priority, document.file_size).await {
Ok(queue_id) => {
let language_info = request.language.as_ref().map(|l| format!(" with language: {}", l)).unwrap_or_default();
tracing::info!(
"OCR retry queued for document {} ({}): queue_id={}, priority={}, size={}",
document_id, document.filename, queue_id, priority, document.file_size
"OCR retry queued for document {} ({}): queue_id={}, priority={}, size={}{}",
document_id, document.filename, queue_id, priority, document.file_size, language_info
);
Ok(Json(serde_json::json!({
@ -631,6 +689,7 @@ async fn retry_ocr(
"queue_id": queue_id,
"document_id": document_id,
"priority": priority,
"language": request.language,
"estimated_wait_minutes": calculate_estimated_wait_time(priority).await
})))
}

View File

@ -4,6 +4,7 @@ pub mod ignored_files;
pub mod labels;
pub mod metrics;
pub mod notifications;
pub mod ocr;
pub mod prometheus_metrics;
pub mod queue;
pub mod search;

173
src/routes/ocr.rs Normal file
View File

@ -0,0 +1,173 @@
use axum::{
extract::State,
http::StatusCode,
response::Json,
routing::get,
Router,
};
use serde::Serialize;
use std::sync::Arc;
use utoipa::ToSchema;
use crate::{
auth::AuthUser,
ocr::health::OcrHealthChecker,
AppState,
};
#[derive(Serialize, ToSchema)]
pub struct AvailableLanguagesResponse {
pub available_languages: Vec<LanguageInfo>,
pub current_user_language: String,
}
#[derive(Serialize, ToSchema)]
pub struct LanguageInfo {
pub code: String,
pub name: String,
pub installed: bool,
}
pub fn router() -> Router<Arc<AppState>> {
Router::new()
.route("/health", get(crate::ocr::api::health_check))
.route("/perform", axum::routing::post(crate::ocr::api::perform_ocr))
.route("/languages", get(get_available_languages))
}
#[utoipa::path(
get,
path = "/api/ocr/languages",
tag = "ocr",
security(
("bearer_auth" = [])
),
responses(
(status = 200, description = "Available OCR languages and user's current language", body = AvailableLanguagesResponse),
(status = 401, description = "Unauthorized"),
(status = 500, description = "Internal server error")
)
)]
async fn get_available_languages(
State(state): State<Arc<AppState>>,
auth_user: AuthUser,
) -> Result<Json<AvailableLanguagesResponse>, StatusCode> {
// Get user's current OCR language setting
let user_settings = state
.db
.get_user_settings(auth_user.user.id)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let current_language = user_settings
.map(|s| s.ocr_language)
.unwrap_or_else(|| "eng".to_string());
// Get available languages from Tesseract
let health_checker = OcrHealthChecker::new();
let available_languages = match health_checker.get_available_languages() {
Ok(langs) => langs,
Err(_) => {
// Fallback to common languages if detection fails
vec!["eng".to_string()]
}
};
// Create language info with display names
let language_info: Vec<LanguageInfo> = available_languages
.into_iter()
.map(|code| LanguageInfo {
name: get_language_display_name(&code),
installed: true, // If it's returned by get_available_languages, it's installed
code,
})
.collect();
Ok(Json(AvailableLanguagesResponse {
available_languages: language_info,
current_user_language: current_language,
}))
}
/// Convert language codes to human-readable names
fn get_language_display_name(code: &str) -> String {
match code {
"eng" => "English",
"spa" => "Spanish",
"fra" => "French",
"deu" => "German",
"ita" => "Italian",
"por" => "Portuguese",
"rus" => "Russian",
"jpn" => "Japanese",
"chi_sim" => "Chinese (Simplified)",
"chi_tra" => "Chinese (Traditional)",
"kor" => "Korean",
"ara" => "Arabic",
"hin" => "Hindi",
"tha" => "Thai",
"vie" => "Vietnamese",
"pol" => "Polish",
"nld" => "Dutch",
"dan" => "Danish",
"nor" => "Norwegian",
"swe" => "Swedish",
"fin" => "Finnish",
"ces" => "Czech",
"hun" => "Hungarian",
"tur" => "Turkish",
"heb" => "Hebrew",
"ukr" => "Ukrainian",
"bul" => "Bulgarian",
"ron" => "Romanian",
"hrv" => "Croatian",
"slk" => "Slovak",
"slv" => "Slovenian",
"est" => "Estonian",
"lav" => "Latvian",
"lit" => "Lithuanian",
"ell" => "Greek",
"cat" => "Catalan",
"eus" => "Basque",
"gla" => "Scottish Gaelic",
"gle" => "Irish",
"cym" => "Welsh",
"isl" => "Icelandic",
"mlt" => "Maltese",
"afr" => "Afrikaans",
"sqi" => "Albanian",
"aze" => "Azerbaijani",
"bel" => "Belarusian",
"ben" => "Bengali",
"bos" => "Bosnian",
"bre" => "Breton",
"kan" => "Kannada",
"kat" => "Georgian",
"kaz" => "Kazakh",
"kir" => "Kyrgyz",
"lao" => "Lao",
"lat" => "Latin",
"ltz" => "Luxembourgish",
"mkd" => "Macedonian",
"msa" => "Malay",
"mal" => "Malayalam",
"mar" => "Marathi",
"nep" => "Nepali",
"ori" => "Odia",
"pan" => "Punjabi",
"pus" => "Pashto",
"fas" => "Persian",
"san" => "Sanskrit",
"sin" => "Sinhala",
"srp" => "Serbian",
"tam" => "Tamil",
"tel" => "Telugu",
"tgk" => "Tajik",
"uzb" => "Uzbek",
"urd" => "Urdu",
_ => {
// For unknown codes, just return the code as-is
code
}
}.to_string()
}

View File

@ -109,6 +109,10 @@ use crate::{
crate::routes::webdav::get_webdav_sync_status,
crate::routes::webdav::test_webdav_connection,
crate::routes::webdav::estimate_webdav_crawl,
// OCR endpoints
crate::routes::ocr::get_available_languages,
crate::ocr::api::health_check,
crate::ocr::api::perform_ocr,
// Ignored files endpoints
crate::routes::ignored_files::list_ignored_files,
crate::routes::ignored_files::get_ignored_file,
@ -136,7 +140,10 @@ use crate::{
Label, CreateLabel, UpdateLabel, LabelAssignment, LabelQuery, LabelBulkUpdateRequest,
// Document schemas
BulkDeleteRequest, DocumentListResponse, DocumentOcrResponse, DocumentOperationResponse,
BulkDeleteResponse, PaginationInfo, DocumentDuplicatesResponse
BulkDeleteResponse, PaginationInfo, DocumentDuplicatesResponse, crate::routes::documents::RetryOcrRequest,
// OCR schemas
crate::routes::ocr::AvailableLanguagesResponse, crate::routes::ocr::LanguageInfo,
crate::ocr::api::OcrHealthResponse, crate::ocr::api::OcrErrorResponse, crate::ocr::api::OcrRequest
)
),
tags(
@ -152,6 +159,7 @@ use crate::{
(name = "sources", description = "Document source management endpoints"),
(name = "webdav", description = "WebDAV synchronization endpoints"),
(name = "ignored_files", description = "Ignored files management endpoints"),
(name = "ocr", description = "OCR service management endpoints"),
(name = "health", description = "Health check endpoint"),
),
modifiers(&SecurityAddon),