feat(server/client): support multiple OCR languages
This commit is contained in:
parent
929ea96e6f
commit
b4ddf034b0
|
|
@ -0,0 +1,178 @@
|
|||
import React, { useState, useEffect } from 'react';
|
||||
import {
|
||||
FormControl,
|
||||
InputLabel,
|
||||
Select,
|
||||
MenuItem,
|
||||
Typography,
|
||||
Box,
|
||||
Chip,
|
||||
CircularProgress,
|
||||
Alert,
|
||||
SelectChangeEvent,
|
||||
} from '@mui/material';
|
||||
import { Language as LanguageIcon } from '@mui/icons-material';
|
||||
import { ocrService, LanguageInfo } from '../../services/api';
|
||||
|
||||
interface OcrLanguageSelectorProps {
|
||||
value?: string;
|
||||
onChange: (language: string) => void;
|
||||
label?: string;
|
||||
size?: 'small' | 'medium';
|
||||
fullWidth?: boolean;
|
||||
disabled?: boolean;
|
||||
showCurrentIndicator?: boolean;
|
||||
required?: boolean;
|
||||
helperText?: string;
|
||||
}
|
||||
|
||||
const OcrLanguageSelector: React.FC<OcrLanguageSelectorProps> = ({
|
||||
value = '',
|
||||
onChange,
|
||||
label = 'OCR Language',
|
||||
size = 'medium',
|
||||
fullWidth = true,
|
||||
disabled = false,
|
||||
showCurrentIndicator = true,
|
||||
required = false,
|
||||
helperText,
|
||||
}) => {
|
||||
const [languages, setLanguages] = useState<LanguageInfo[]>([]);
|
||||
const [currentUserLanguage, setCurrentUserLanguage] = useState<string>('eng');
|
||||
const [loading, setLoading] = useState<boolean>(true);
|
||||
const [error, setError] = useState<string>('');
|
||||
|
||||
useEffect(() => {
|
||||
fetchLanguages();
|
||||
}, []);
|
||||
|
||||
const fetchLanguages = async () => {
|
||||
try {
|
||||
setLoading(true);
|
||||
setError('');
|
||||
const response = await ocrService.getAvailableLanguages();
|
||||
setLanguages(response.data.available_languages);
|
||||
setCurrentUserLanguage(response.data.current_user_language);
|
||||
|
||||
// If no value is set, default to user's current language
|
||||
if (!value) {
|
||||
onChange(response.data.current_user_language);
|
||||
}
|
||||
} catch (err: any) {
|
||||
setError(err.response?.data?.message || 'Failed to load OCR languages');
|
||||
// Fallback to English if API fails
|
||||
setLanguages([
|
||||
{ code: 'eng', name: 'English', installed: true }
|
||||
]);
|
||||
if (!value) {
|
||||
onChange('eng');
|
||||
}
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleChange = (event: SelectChangeEvent) => {
|
||||
onChange(event.target.value);
|
||||
};
|
||||
|
||||
const getLanguageDisplay = (langCode: string) => {
|
||||
const language = languages.find(lang => lang.code === langCode);
|
||||
return language ? language.name : langCode;
|
||||
};
|
||||
|
||||
if (loading) {
|
||||
return (
|
||||
<FormControl fullWidth={fullWidth} size={size}>
|
||||
<InputLabel>{label}</InputLabel>
|
||||
<Box sx={{ display: 'flex', alignItems: 'center', p: 2 }}>
|
||||
<CircularProgress size={20} sx={{ mr: 1 }} />
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
Loading languages...
|
||||
</Typography>
|
||||
</Box>
|
||||
</FormControl>
|
||||
);
|
||||
}
|
||||
|
||||
if (error) {
|
||||
return (
|
||||
<Box>
|
||||
<Alert
|
||||
severity="warning"
|
||||
sx={{ mb: 1 }}
|
||||
action={
|
||||
<Typography
|
||||
variant="button"
|
||||
onClick={fetchLanguages}
|
||||
sx={{ cursor: 'pointer', textDecoration: 'underline' }}
|
||||
>
|
||||
Retry
|
||||
</Typography>
|
||||
}
|
||||
>
|
||||
{error}
|
||||
</Alert>
|
||||
<FormControl fullWidth={fullWidth} size={size} disabled>
|
||||
<InputLabel>{label}</InputLabel>
|
||||
<Select value="eng">
|
||||
<MenuItem value="eng">English (Fallback)</MenuItem>
|
||||
</Select>
|
||||
</FormControl>
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<Box>
|
||||
<FormControl fullWidth={fullWidth} size={size} disabled={disabled} required={required}>
|
||||
<InputLabel id="ocr-language-label">{label}</InputLabel>
|
||||
<Select
|
||||
labelId="ocr-language-label"
|
||||
value={value || currentUserLanguage}
|
||||
onChange={handleChange}
|
||||
label={label}
|
||||
startAdornment={<LanguageIcon sx={{ mr: 1, color: 'text.secondary' }} />}
|
||||
>
|
||||
{languages.map((language) => (
|
||||
<MenuItem key={language.code} value={language.code}>
|
||||
<Box sx={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between', width: '100%' }}>
|
||||
<Typography>{language.name}</Typography>
|
||||
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
|
||||
<Typography variant="caption" color="text.secondary">
|
||||
{language.code}
|
||||
</Typography>
|
||||
{showCurrentIndicator && language.code === currentUserLanguage && (
|
||||
<Chip
|
||||
label="Current"
|
||||
size="small"
|
||||
color="primary"
|
||||
variant="outlined"
|
||||
sx={{ fontSize: '0.7rem', height: '20px' }}
|
||||
/>
|
||||
)}
|
||||
</Box>
|
||||
</Box>
|
||||
</MenuItem>
|
||||
))}
|
||||
</Select>
|
||||
{helperText && (
|
||||
<Typography variant="caption" color="text.secondary" sx={{ mt: 0.5, ml: 1.5 }}>
|
||||
{helperText}
|
||||
</Typography>
|
||||
)}
|
||||
</FormControl>
|
||||
|
||||
{showCurrentIndicator && languages.length > 0 && (
|
||||
<Typography variant="caption" color="text.secondary" sx={{ display: 'block', mt: 1 }}>
|
||||
{languages.length} language{languages.length !== 1 ? 's' : ''} available
|
||||
{value && value !== currentUserLanguage && (
|
||||
<span> • Selecting "{getLanguageDisplay(value)}" will update your default language</span>
|
||||
)}
|
||||
</Typography>
|
||||
)}
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
|
||||
export default OcrLanguageSelector;
|
||||
|
|
@ -0,0 +1 @@
|
|||
export { default } from './OcrLanguageSelector';
|
||||
|
|
@ -0,0 +1,158 @@
|
|||
import React, { useState } from 'react';
|
||||
import {
|
||||
Dialog,
|
||||
DialogTitle,
|
||||
DialogContent,
|
||||
DialogActions,
|
||||
Button,
|
||||
Box,
|
||||
Typography,
|
||||
CircularProgress,
|
||||
Alert,
|
||||
Divider,
|
||||
} from '@mui/material';
|
||||
import { Refresh as RefreshIcon, Language as LanguageIcon } from '@mui/icons-material';
|
||||
import OcrLanguageSelector from '../OcrLanguageSelector';
|
||||
import { ocrService } from '../../services/api';
|
||||
|
||||
interface OcrRetryDialogProps {
|
||||
open: boolean;
|
||||
onClose: () => void;
|
||||
document: {
|
||||
id: string;
|
||||
filename: string;
|
||||
original_filename: string;
|
||||
failure_category: string;
|
||||
ocr_error: string;
|
||||
retry_count: number;
|
||||
} | null;
|
||||
onRetrySuccess: (message: string) => void;
|
||||
onRetryError: (message: string) => void;
|
||||
}
|
||||
|
||||
const OcrRetryDialog: React.FC<OcrRetryDialogProps> = ({
|
||||
open,
|
||||
onClose,
|
||||
document,
|
||||
onRetrySuccess,
|
||||
onRetryError,
|
||||
}) => {
|
||||
const [selectedLanguage, setSelectedLanguage] = useState<string>('');
|
||||
const [retrying, setRetrying] = useState<boolean>(false);
|
||||
|
||||
const handleRetry = async () => {
|
||||
if (!document) return;
|
||||
|
||||
try {
|
||||
setRetrying(true);
|
||||
const response = await ocrService.retryWithLanguage(
|
||||
document.id,
|
||||
selectedLanguage || undefined
|
||||
);
|
||||
|
||||
if (response.data.success) {
|
||||
const waitTime = response.data.estimated_wait_minutes || 'Unknown';
|
||||
const languageInfo = selectedLanguage ? ` with language "${selectedLanguage}"` : '';
|
||||
onRetrySuccess(
|
||||
`OCR retry queued for "${document.filename}"${languageInfo}. Estimated wait time: ${waitTime} minutes.`
|
||||
);
|
||||
onClose();
|
||||
} else {
|
||||
onRetryError(response.data.message || 'Failed to retry OCR');
|
||||
}
|
||||
} catch (error: any) {
|
||||
console.error('Failed to retry OCR:', error);
|
||||
onRetryError(
|
||||
error.response?.data?.message || 'Failed to retry OCR processing'
|
||||
);
|
||||
} finally {
|
||||
setRetrying(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleClose = () => {
|
||||
if (!retrying) {
|
||||
setSelectedLanguage('');
|
||||
onClose();
|
||||
}
|
||||
};
|
||||
|
||||
if (!document) return null;
|
||||
|
||||
return (
|
||||
<Dialog open={open} onClose={handleClose} maxWidth="sm" fullWidth>
|
||||
<DialogTitle>
|
||||
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
|
||||
<RefreshIcon />
|
||||
<Typography variant="h6">Retry OCR Processing</Typography>
|
||||
</Box>
|
||||
</DialogTitle>
|
||||
|
||||
<DialogContent>
|
||||
<Box sx={{ mb: 3 }}>
|
||||
<Typography variant="subtitle1" sx={{ fontWeight: 600, mb: 1 }}>
|
||||
Document: {document.original_filename}
|
||||
</Typography>
|
||||
<Typography variant="body2" color="text.secondary" sx={{ mb: 2 }}>
|
||||
Previous attempts: {document.retry_count}
|
||||
</Typography>
|
||||
|
||||
{document.failure_category && (
|
||||
<Alert severity="warning" sx={{ mb: 2 }}>
|
||||
<Typography variant="body2">
|
||||
<strong>Previous failure:</strong> {document.failure_category}
|
||||
</Typography>
|
||||
{document.ocr_error && (
|
||||
<Typography variant="caption" sx={{ display: 'block', mt: 1 }}>
|
||||
{document.ocr_error}
|
||||
</Typography>
|
||||
)}
|
||||
</Alert>
|
||||
)}
|
||||
</Box>
|
||||
|
||||
<Divider sx={{ my: 2 }} />
|
||||
|
||||
<Box sx={{ mb: 3 }}>
|
||||
<Typography variant="subtitle2" sx={{ fontWeight: 600, mb: 2, display: 'flex', alignItems: 'center', gap: 1 }}>
|
||||
<LanguageIcon fontSize="small" />
|
||||
OCR Language Selection
|
||||
</Typography>
|
||||
<Typography variant="body2" color="text.secondary" sx={{ mb: 2 }}>
|
||||
Choose a different language if the previous OCR attempt used the wrong language for this document.
|
||||
</Typography>
|
||||
<OcrLanguageSelector
|
||||
value={selectedLanguage}
|
||||
onChange={setSelectedLanguage}
|
||||
label="OCR Language (Optional)"
|
||||
size="medium"
|
||||
helperText="Leave empty to use your default language setting"
|
||||
showCurrentIndicator={true}
|
||||
/>
|
||||
</Box>
|
||||
|
||||
<Alert severity="info" sx={{ mt: 2 }}>
|
||||
<Typography variant="body2">
|
||||
The retry will use enhanced OCR processing and may take several minutes depending on document size and complexity.
|
||||
</Typography>
|
||||
</Alert>
|
||||
</DialogContent>
|
||||
|
||||
<DialogActions sx={{ px: 3, pb: 3 }}>
|
||||
<Button onClick={handleClose} disabled={retrying}>
|
||||
Cancel
|
||||
</Button>
|
||||
<Button
|
||||
onClick={handleRetry}
|
||||
variant="contained"
|
||||
disabled={retrying}
|
||||
startIcon={retrying ? <CircularProgress size={20} /> : <RefreshIcon />}
|
||||
>
|
||||
{retrying ? 'Retrying...' : 'Retry OCR'}
|
||||
</Button>
|
||||
</DialogActions>
|
||||
</Dialog>
|
||||
);
|
||||
};
|
||||
|
||||
export default OcrRetryDialog;
|
||||
|
|
@ -0,0 +1 @@
|
|||
export { default } from './OcrRetryDialog';
|
||||
|
|
@ -42,6 +42,7 @@ import { Edit as EditIcon, Delete as DeleteIcon, Add as AddIcon,
|
|||
Pause as PauseIcon, Stop as StopIcon } from '@mui/icons-material';
|
||||
import { useAuth } from '../contexts/AuthContext';
|
||||
import api, { queueService } from '../services/api';
|
||||
import OcrLanguageSelector from '../components/OcrLanguageSelector';
|
||||
|
||||
interface User {
|
||||
id: string;
|
||||
|
|
@ -112,10 +113,6 @@ interface UserFormData {
|
|||
password: string;
|
||||
}
|
||||
|
||||
interface OcrLanguage {
|
||||
code: string;
|
||||
name: string;
|
||||
}
|
||||
|
||||
interface WebDAVFolderInfo {
|
||||
path: string;
|
||||
|
|
@ -233,23 +230,6 @@ const SettingsPage: React.FC = () => {
|
|||
const [ocrStatus, setOcrStatus] = useState<{ is_paused: boolean; status: 'paused' | 'running' } | null>(null);
|
||||
const [ocrActionLoading, setOcrActionLoading] = useState(false);
|
||||
|
||||
const ocrLanguages: OcrLanguage[] = [
|
||||
{ code: 'eng', name: 'English' },
|
||||
{ code: 'spa', name: 'Spanish' },
|
||||
{ code: 'fra', name: 'French' },
|
||||
{ code: 'deu', name: 'German' },
|
||||
{ code: 'ita', name: 'Italian' },
|
||||
{ code: 'por', name: 'Portuguese' },
|
||||
{ code: 'rus', name: 'Russian' },
|
||||
{ code: 'jpn', name: 'Japanese' },
|
||||
{ code: 'chi_sim', name: 'Chinese (Simplified)' },
|
||||
{ code: 'chi_tra', name: 'Chinese (Traditional)' },
|
||||
{ code: 'kor', name: 'Korean' },
|
||||
{ code: 'ara', name: 'Arabic' },
|
||||
{ code: 'hin', name: 'Hindi' },
|
||||
{ code: 'nld', name: 'Dutch' },
|
||||
{ code: 'pol', name: 'Polish' },
|
||||
];
|
||||
|
||||
useEffect(() => {
|
||||
fetchSettings();
|
||||
|
|
@ -415,9 +395,6 @@ const SettingsPage: React.FC = () => {
|
|||
setTabValue(newValue);
|
||||
};
|
||||
|
||||
const handleOcrLanguageChange = (event: SelectChangeEvent<string>): void => {
|
||||
handleSettingsChange('ocrLanguage', event.target.value);
|
||||
};
|
||||
|
||||
const handleCpuPriorityChange = (event: SelectChangeEvent<string>): void => {
|
||||
handleSettingsChange('cpuPriority', event.target.value);
|
||||
|
|
@ -501,21 +478,13 @@ const SettingsPage: React.FC = () => {
|
|||
<Divider sx={{ mb: 2 }} />
|
||||
<Grid container spacing={2}>
|
||||
<Grid item xs={12} md={6}>
|
||||
<FormControl fullWidth>
|
||||
<InputLabel>OCR Language</InputLabel>
|
||||
<Select
|
||||
value={settings.ocrLanguage}
|
||||
label="OCR Language"
|
||||
onChange={handleOcrLanguageChange}
|
||||
disabled={loading}
|
||||
>
|
||||
{ocrLanguages.map((lang) => (
|
||||
<MenuItem key={lang.code} value={lang.code}>
|
||||
{lang.name}
|
||||
</MenuItem>
|
||||
))}
|
||||
</Select>
|
||||
</FormControl>
|
||||
<OcrLanguageSelector
|
||||
value={settings.ocrLanguage}
|
||||
onChange={(language) => handleSettingsChange('ocrLanguage', language)}
|
||||
disabled={loading}
|
||||
showCurrentIndicator={false}
|
||||
helperText="Default language for OCR text extraction from your documents"
|
||||
/>
|
||||
</Grid>
|
||||
<Grid item xs={12} md={6}>
|
||||
<TextField
|
||||
|
|
|
|||
|
|
@ -272,6 +272,21 @@ export interface OcrActionResponse {
|
|||
message: string
|
||||
}
|
||||
|
||||
export interface LanguageInfo {
|
||||
code: string
|
||||
name: string
|
||||
installed: boolean
|
||||
}
|
||||
|
||||
export interface AvailableLanguagesResponse {
|
||||
available_languages: LanguageInfo[]
|
||||
current_user_language: string
|
||||
}
|
||||
|
||||
export interface RetryOcrRequest {
|
||||
language?: string
|
||||
}
|
||||
|
||||
export const queueService = {
|
||||
getStats: () => {
|
||||
return api.get<QueueStats>('/queue/stats')
|
||||
|
|
@ -292,4 +307,22 @@ export const queueService = {
|
|||
resumeOcr: () => {
|
||||
return api.post<OcrActionResponse>('/queue/resume')
|
||||
},
|
||||
}
|
||||
|
||||
export const ocrService = {
|
||||
getAvailableLanguages: () => {
|
||||
return api.get<AvailableLanguagesResponse>('/ocr/languages')
|
||||
},
|
||||
|
||||
getHealthStatus: () => {
|
||||
return api.get('/ocr/health')
|
||||
},
|
||||
|
||||
retryWithLanguage: (documentId: string, language?: string) => {
|
||||
const data: RetryOcrRequest = {}
|
||||
if (language) {
|
||||
data.language = language
|
||||
}
|
||||
return api.post(`/documents/${documentId}/retry-ocr`, data)
|
||||
},
|
||||
}
|
||||
|
|
@ -384,4 +384,25 @@ impl Database {
|
|||
updated_at: row.get("updated_at"),
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn update_user_ocr_language(&self, user_id: Uuid, language: &str) -> Result<()> {
|
||||
self.with_retry(|| async {
|
||||
sqlx::query(
|
||||
r#"
|
||||
INSERT INTO settings (user_id, ocr_language)
|
||||
VALUES ($1, $2)
|
||||
ON CONFLICT (user_id) DO UPDATE SET
|
||||
ocr_language = $2,
|
||||
updated_at = NOW()
|
||||
"#
|
||||
)
|
||||
.bind(user_id)
|
||||
.bind(language)
|
||||
.execute(&self.pool)
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!("Failed to update OCR language: {}", e))?;
|
||||
|
||||
Ok(())
|
||||
}).await
|
||||
}
|
||||
}
|
||||
|
|
@ -436,6 +436,7 @@ async fn main() -> anyhow::Result<()> {
|
|||
.nest("/api/metrics", readur::routes::metrics::router())
|
||||
.nest("/metrics", readur::routes::prometheus_metrics::router())
|
||||
.nest("/api/notifications", readur::routes::notifications::router())
|
||||
.nest("/api/ocr", readur::routes::ocr::router())
|
||||
.nest("/api/queue", readur::routes::queue::router())
|
||||
.nest("/api/search", readur::routes::search::router())
|
||||
.nest("/api/settings", readur::routes::settings::router())
|
||||
|
|
|
|||
|
|
@ -7,33 +7,43 @@ use axum::{
|
|||
response::Json,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::sync::Arc;
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[derive(Serialize, utoipa::ToSchema)]
|
||||
pub struct OcrHealthResponse {
|
||||
status: String,
|
||||
tesseract_installed: bool,
|
||||
available_languages: Vec<String>,
|
||||
diagnostics: Option<String>,
|
||||
errors: Vec<String>,
|
||||
pub status: String,
|
||||
pub tesseract_installed: bool,
|
||||
pub available_languages: Vec<String>,
|
||||
pub diagnostics: Option<String>,
|
||||
pub errors: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[derive(Serialize, utoipa::ToSchema)]
|
||||
pub struct OcrErrorResponse {
|
||||
error: String,
|
||||
error_code: String,
|
||||
details: Option<String>,
|
||||
is_recoverable: bool,
|
||||
pub error: String,
|
||||
pub error_code: String,
|
||||
pub details: Option<String>,
|
||||
pub is_recoverable: bool,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[derive(Deserialize, utoipa::ToSchema)]
|
||||
pub struct OcrRequest {
|
||||
file_path: String,
|
||||
language: Option<String>,
|
||||
use_fallback: Option<bool>,
|
||||
pub file_path: String,
|
||||
pub language: Option<String>,
|
||||
pub use_fallback: Option<bool>,
|
||||
}
|
||||
|
||||
#[utoipa::path(
|
||||
get,
|
||||
path = "/api/ocr/health",
|
||||
tag = "ocr",
|
||||
responses(
|
||||
(status = 200, description = "OCR service health status", body = OcrHealthResponse),
|
||||
(status = 500, description = "OCR service is unhealthy", body = OcrErrorResponse)
|
||||
)
|
||||
)]
|
||||
pub async fn health_check(
|
||||
State(_state): State<AppState>,
|
||||
State(_state): State<Arc<AppState>>,
|
||||
) -> Result<Json<OcrHealthResponse>, (StatusCode, Json<OcrErrorResponse>)> {
|
||||
let service = EnhancedOcrService::new();
|
||||
let diagnostics = service.get_diagnostics().await;
|
||||
|
|
@ -72,8 +82,19 @@ pub async fn health_check(
|
|||
}
|
||||
}
|
||||
|
||||
#[utoipa::path(
|
||||
post,
|
||||
path = "/api/ocr/perform",
|
||||
tag = "ocr",
|
||||
request_body = OcrRequest,
|
||||
responses(
|
||||
(status = 200, description = "OCR text extraction successful", body = serde_json::Value),
|
||||
(status = 400, description = "Bad request or invalid language", body = OcrErrorResponse),
|
||||
(status = 500, description = "OCR processing failed", body = OcrErrorResponse)
|
||||
)
|
||||
)]
|
||||
pub async fn perform_ocr(
|
||||
State(_state): State<AppState>,
|
||||
State(_state): State<Arc<AppState>>,
|
||||
Json(request): Json<OcrRequest>,
|
||||
) -> Result<Json<serde_json::Value>, (StatusCode, Json<OcrErrorResponse>)> {
|
||||
let service = EnhancedOcrService::new();
|
||||
|
|
|
|||
|
|
@ -73,11 +73,8 @@ impl OcrHealthChecker {
|
|||
})
|
||||
}
|
||||
|
||||
pub fn get_available_languages(&self) -> Vec<String> {
|
||||
let tessdata_path = match self.get_tessdata_path() {
|
||||
Ok(path) => path,
|
||||
Err(_) => return vec![],
|
||||
};
|
||||
pub fn get_available_languages(&self) -> Result<Vec<String>, OcrError> {
|
||||
let tessdata_path = self.get_tessdata_path()?;
|
||||
|
||||
let mut languages = vec![];
|
||||
if let Ok(entries) = std::fs::read_dir(&tessdata_path) {
|
||||
|
|
@ -92,7 +89,18 @@ impl OcrHealthChecker {
|
|||
}
|
||||
|
||||
languages.sort();
|
||||
languages
|
||||
Ok(languages)
|
||||
}
|
||||
|
||||
pub fn validate_language(&self, lang: &str) -> Result<(), OcrError> {
|
||||
// Check if language is supported
|
||||
let available_languages = self.get_available_languages()?;
|
||||
if !available_languages.contains(&lang.to_string()) {
|
||||
return Err(OcrError::LanguageDataNotFound {
|
||||
lang: lang.to_string(),
|
||||
});
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn check_cpu_features(&self) -> CpuFeatures {
|
||||
|
|
@ -240,7 +248,7 @@ impl OcrHealthChecker {
|
|||
pub fn get_full_diagnostics(&self) -> OcrDiagnostics {
|
||||
OcrDiagnostics {
|
||||
tesseract_version: self.check_tesseract_installation().ok(),
|
||||
available_languages: self.get_available_languages(),
|
||||
available_languages: self.get_available_languages().unwrap_or_else(|_| vec![]),
|
||||
tessdata_path: self.get_tessdata_path().ok(),
|
||||
cpu_features: self.check_cpu_features(),
|
||||
memory_available_mb: self.check_memory_available(),
|
||||
|
|
|
|||
|
|
@ -26,6 +26,11 @@ struct PaginationQuery {
|
|||
ocr_status: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, ToSchema)]
|
||||
pub struct RetryOcrRequest {
|
||||
pub language: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, ToSchema)]
|
||||
struct FailedDocumentsQuery {
|
||||
limit: Option<i64>,
|
||||
|
|
@ -152,6 +157,7 @@ async fn upload_document(
|
|||
.unwrap_or_else(|| crate::models::Settings::default());
|
||||
|
||||
let mut label_ids: Option<Vec<uuid::Uuid>> = None;
|
||||
let mut ocr_language: Option<String> = None;
|
||||
|
||||
// First pass: collect all multipart fields
|
||||
while let Some(field) = multipart.next_field().await.map_err(|_| StatusCode::BAD_REQUEST)? {
|
||||
|
|
@ -172,6 +178,23 @@ async fn upload_document(
|
|||
tracing::warn!("Failed to parse label_ids from upload: {} - Error: {}", label_ids_text, e);
|
||||
}
|
||||
}
|
||||
} else if name == "ocr_language" {
|
||||
let language = field.text().await.map_err(|_| StatusCode::BAD_REQUEST)?;
|
||||
if !language.trim().is_empty() {
|
||||
// Validate that the language is available
|
||||
let health_checker = crate::ocr::health::OcrHealthChecker::new();
|
||||
match health_checker.validate_language(language.trim()) {
|
||||
Ok(_) => {
|
||||
ocr_language = Some(language.trim().to_string());
|
||||
tracing::info!("OCR language specified and validated: {}", language);
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("Invalid OCR language specified '{}': {}", language, e);
|
||||
// Return early with bad request for invalid language
|
||||
return Err(StatusCode::BAD_REQUEST);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if name == "file" {
|
||||
let filename = field
|
||||
.file_name()
|
||||
|
|
@ -214,6 +237,15 @@ async fn upload_document(
|
|||
let enable_background_ocr = settings.enable_background_ocr;
|
||||
|
||||
if enable_background_ocr && should_queue_ocr {
|
||||
// If a language was specified, update the user's OCR language setting for this session
|
||||
if let Some(lang) = &ocr_language {
|
||||
if let Err(e) = state.db.update_user_ocr_language(auth_user.user.id, lang).await {
|
||||
tracing::warn!("Failed to update user OCR language to {}: {}", lang, e);
|
||||
} else {
|
||||
tracing::info!("Updated user {} OCR language to: {}", auth_user.user.id, lang);
|
||||
}
|
||||
}
|
||||
|
||||
// Use the shared queue service from AppState instead of creating a new one
|
||||
// Calculate priority based on file size
|
||||
let priority = match saved_document.file_size {
|
||||
|
|
@ -550,6 +582,7 @@ async fn get_processed_image(
|
|||
params(
|
||||
("id" = uuid::Uuid, Path, description = "Document ID")
|
||||
),
|
||||
request_body(content = RetryOcrRequest, description = "OCR retry options"),
|
||||
responses(
|
||||
(status = 200, description = "OCR retry queued successfully", body = String),
|
||||
(status = 404, description = "Document not found"),
|
||||
|
|
@ -561,6 +594,7 @@ async fn retry_ocr(
|
|||
State(state): State<Arc<AppState>>,
|
||||
auth_user: AuthUser,
|
||||
Path(document_id): Path<uuid::Uuid>,
|
||||
Json(request): Json<RetryOcrRequest>,
|
||||
) -> Result<Json<serde_json::Value>, StatusCode> {
|
||||
// Check if document exists and belongs to user
|
||||
let document = state
|
||||
|
|
@ -617,12 +651,36 @@ async fn retry_ocr(
|
|||
_ => 6, // > 50MB: lowest priority
|
||||
};
|
||||
|
||||
// If a language was specified, validate and update the user's OCR language setting
|
||||
if let Some(lang) = &request.language {
|
||||
// Validate that the language is available
|
||||
let health_checker = crate::ocr::health::OcrHealthChecker::new();
|
||||
match health_checker.validate_language(lang) {
|
||||
Ok(_) => {
|
||||
if let Err(e) = state.db.update_user_ocr_language(auth_user.user.id, lang).await {
|
||||
tracing::warn!("Failed to update user OCR language to {}: {}", lang, e);
|
||||
} else {
|
||||
tracing::info!("Updated user {} OCR language to: {} for retry", auth_user.user.id, lang);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("Invalid OCR language specified '{}' for retry: {}", lang, e);
|
||||
return Ok(Json(serde_json::json!({
|
||||
"success": false,
|
||||
"message": format!("Invalid OCR language '{}': {}", lang, e),
|
||||
"error_code": "INVALID_LANGUAGE"
|
||||
})));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add to OCR queue with detailed logging
|
||||
match state.queue_service.enqueue_document(document_id, priority, document.file_size).await {
|
||||
Ok(queue_id) => {
|
||||
let language_info = request.language.as_ref().map(|l| format!(" with language: {}", l)).unwrap_or_default();
|
||||
tracing::info!(
|
||||
"OCR retry queued for document {} ({}): queue_id={}, priority={}, size={}",
|
||||
document_id, document.filename, queue_id, priority, document.file_size
|
||||
"OCR retry queued for document {} ({}): queue_id={}, priority={}, size={}{}",
|
||||
document_id, document.filename, queue_id, priority, document.file_size, language_info
|
||||
);
|
||||
|
||||
Ok(Json(serde_json::json!({
|
||||
|
|
@ -631,6 +689,7 @@ async fn retry_ocr(
|
|||
"queue_id": queue_id,
|
||||
"document_id": document_id,
|
||||
"priority": priority,
|
||||
"language": request.language,
|
||||
"estimated_wait_minutes": calculate_estimated_wait_time(priority).await
|
||||
})))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ pub mod ignored_files;
|
|||
pub mod labels;
|
||||
pub mod metrics;
|
||||
pub mod notifications;
|
||||
pub mod ocr;
|
||||
pub mod prometheus_metrics;
|
||||
pub mod queue;
|
||||
pub mod search;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,173 @@
|
|||
use axum::{
|
||||
extract::State,
|
||||
http::StatusCode,
|
||||
response::Json,
|
||||
routing::get,
|
||||
Router,
|
||||
};
|
||||
use serde::Serialize;
|
||||
use std::sync::Arc;
|
||||
use utoipa::ToSchema;
|
||||
|
||||
use crate::{
|
||||
auth::AuthUser,
|
||||
ocr::health::OcrHealthChecker,
|
||||
AppState,
|
||||
};
|
||||
|
||||
#[derive(Serialize, ToSchema)]
|
||||
pub struct AvailableLanguagesResponse {
|
||||
pub available_languages: Vec<LanguageInfo>,
|
||||
pub current_user_language: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, ToSchema)]
|
||||
pub struct LanguageInfo {
|
||||
pub code: String,
|
||||
pub name: String,
|
||||
pub installed: bool,
|
||||
}
|
||||
|
||||
pub fn router() -> Router<Arc<AppState>> {
|
||||
Router::new()
|
||||
.route("/health", get(crate::ocr::api::health_check))
|
||||
.route("/perform", axum::routing::post(crate::ocr::api::perform_ocr))
|
||||
.route("/languages", get(get_available_languages))
|
||||
}
|
||||
|
||||
#[utoipa::path(
|
||||
get,
|
||||
path = "/api/ocr/languages",
|
||||
tag = "ocr",
|
||||
security(
|
||||
("bearer_auth" = [])
|
||||
),
|
||||
responses(
|
||||
(status = 200, description = "Available OCR languages and user's current language", body = AvailableLanguagesResponse),
|
||||
(status = 401, description = "Unauthorized"),
|
||||
(status = 500, description = "Internal server error")
|
||||
)
|
||||
)]
|
||||
async fn get_available_languages(
|
||||
State(state): State<Arc<AppState>>,
|
||||
auth_user: AuthUser,
|
||||
) -> Result<Json<AvailableLanguagesResponse>, StatusCode> {
|
||||
// Get user's current OCR language setting
|
||||
let user_settings = state
|
||||
.db
|
||||
.get_user_settings(auth_user.user.id)
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
let current_language = user_settings
|
||||
.map(|s| s.ocr_language)
|
||||
.unwrap_or_else(|| "eng".to_string());
|
||||
|
||||
// Get available languages from Tesseract
|
||||
let health_checker = OcrHealthChecker::new();
|
||||
let available_languages = match health_checker.get_available_languages() {
|
||||
Ok(langs) => langs,
|
||||
Err(_) => {
|
||||
// Fallback to common languages if detection fails
|
||||
vec!["eng".to_string()]
|
||||
}
|
||||
};
|
||||
|
||||
// Create language info with display names
|
||||
let language_info: Vec<LanguageInfo> = available_languages
|
||||
.into_iter()
|
||||
.map(|code| LanguageInfo {
|
||||
name: get_language_display_name(&code),
|
||||
installed: true, // If it's returned by get_available_languages, it's installed
|
||||
code,
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(Json(AvailableLanguagesResponse {
|
||||
available_languages: language_info,
|
||||
current_user_language: current_language,
|
||||
}))
|
||||
}
|
||||
|
||||
/// Convert language codes to human-readable names
|
||||
fn get_language_display_name(code: &str) -> String {
|
||||
match code {
|
||||
"eng" => "English",
|
||||
"spa" => "Spanish",
|
||||
"fra" => "French",
|
||||
"deu" => "German",
|
||||
"ita" => "Italian",
|
||||
"por" => "Portuguese",
|
||||
"rus" => "Russian",
|
||||
"jpn" => "Japanese",
|
||||
"chi_sim" => "Chinese (Simplified)",
|
||||
"chi_tra" => "Chinese (Traditional)",
|
||||
"kor" => "Korean",
|
||||
"ara" => "Arabic",
|
||||
"hin" => "Hindi",
|
||||
"tha" => "Thai",
|
||||
"vie" => "Vietnamese",
|
||||
"pol" => "Polish",
|
||||
"nld" => "Dutch",
|
||||
"dan" => "Danish",
|
||||
"nor" => "Norwegian",
|
||||
"swe" => "Swedish",
|
||||
"fin" => "Finnish",
|
||||
"ces" => "Czech",
|
||||
"hun" => "Hungarian",
|
||||
"tur" => "Turkish",
|
||||
"heb" => "Hebrew",
|
||||
"ukr" => "Ukrainian",
|
||||
"bul" => "Bulgarian",
|
||||
"ron" => "Romanian",
|
||||
"hrv" => "Croatian",
|
||||
"slk" => "Slovak",
|
||||
"slv" => "Slovenian",
|
||||
"est" => "Estonian",
|
||||
"lav" => "Latvian",
|
||||
"lit" => "Lithuanian",
|
||||
"ell" => "Greek",
|
||||
"cat" => "Catalan",
|
||||
"eus" => "Basque",
|
||||
"gla" => "Scottish Gaelic",
|
||||
"gle" => "Irish",
|
||||
"cym" => "Welsh",
|
||||
"isl" => "Icelandic",
|
||||
"mlt" => "Maltese",
|
||||
"afr" => "Afrikaans",
|
||||
"sqi" => "Albanian",
|
||||
"aze" => "Azerbaijani",
|
||||
"bel" => "Belarusian",
|
||||
"ben" => "Bengali",
|
||||
"bos" => "Bosnian",
|
||||
"bre" => "Breton",
|
||||
"kan" => "Kannada",
|
||||
"kat" => "Georgian",
|
||||
"kaz" => "Kazakh",
|
||||
"kir" => "Kyrgyz",
|
||||
"lao" => "Lao",
|
||||
"lat" => "Latin",
|
||||
"ltz" => "Luxembourgish",
|
||||
"mkd" => "Macedonian",
|
||||
"msa" => "Malay",
|
||||
"mal" => "Malayalam",
|
||||
"mar" => "Marathi",
|
||||
"nep" => "Nepali",
|
||||
"ori" => "Odia",
|
||||
"pan" => "Punjabi",
|
||||
"pus" => "Pashto",
|
||||
"fas" => "Persian",
|
||||
"san" => "Sanskrit",
|
||||
"sin" => "Sinhala",
|
||||
"srp" => "Serbian",
|
||||
"tam" => "Tamil",
|
||||
"tel" => "Telugu",
|
||||
"tgk" => "Tajik",
|
||||
"uzb" => "Uzbek",
|
||||
"urd" => "Urdu",
|
||||
_ => {
|
||||
// For unknown codes, just return the code as-is
|
||||
code
|
||||
}
|
||||
}.to_string()
|
||||
}
|
||||
|
|
@ -109,6 +109,10 @@ use crate::{
|
|||
crate::routes::webdav::get_webdav_sync_status,
|
||||
crate::routes::webdav::test_webdav_connection,
|
||||
crate::routes::webdav::estimate_webdav_crawl,
|
||||
// OCR endpoints
|
||||
crate::routes::ocr::get_available_languages,
|
||||
crate::ocr::api::health_check,
|
||||
crate::ocr::api::perform_ocr,
|
||||
// Ignored files endpoints
|
||||
crate::routes::ignored_files::list_ignored_files,
|
||||
crate::routes::ignored_files::get_ignored_file,
|
||||
|
|
@ -136,7 +140,10 @@ use crate::{
|
|||
Label, CreateLabel, UpdateLabel, LabelAssignment, LabelQuery, LabelBulkUpdateRequest,
|
||||
// Document schemas
|
||||
BulkDeleteRequest, DocumentListResponse, DocumentOcrResponse, DocumentOperationResponse,
|
||||
BulkDeleteResponse, PaginationInfo, DocumentDuplicatesResponse
|
||||
BulkDeleteResponse, PaginationInfo, DocumentDuplicatesResponse, crate::routes::documents::RetryOcrRequest,
|
||||
// OCR schemas
|
||||
crate::routes::ocr::AvailableLanguagesResponse, crate::routes::ocr::LanguageInfo,
|
||||
crate::ocr::api::OcrHealthResponse, crate::ocr::api::OcrErrorResponse, crate::ocr::api::OcrRequest
|
||||
)
|
||||
),
|
||||
tags(
|
||||
|
|
@ -152,6 +159,7 @@ use crate::{
|
|||
(name = "sources", description = "Document source management endpoints"),
|
||||
(name = "webdav", description = "WebDAV synchronization endpoints"),
|
||||
(name = "ignored_files", description = "Ignored files management endpoints"),
|
||||
(name = "ocr", description = "OCR service management endpoints"),
|
||||
(name = "health", description = "Health check endpoint"),
|
||||
),
|
||||
modifiers(&SecurityAddon),
|
||||
|
|
|
|||
Loading…
Reference in New Issue