feat(ocr): add even more about the multiple ocr languages

This commit is contained in:
perf3ct 2025-07-03 19:20:19 +00:00
parent a88f387aeb
commit bdf4f5f8fe
No known key found for this signature in database
GPG Key ID: 569C4EEC436F5232
4 changed files with 85 additions and 28 deletions

View File

@ -5,15 +5,16 @@ import { ThemeProvider, createTheme } from '@mui/material/styles';
import OcrLanguageSelector from '../OcrLanguageSelector';
import { ocrService } from '../../../services/api';
// Create mock functions
const mockGetAvailableLanguages = vi.fn();
// Mock the API service
vi.mock('../../../services/api', () => ({
ocrService: {
getAvailableLanguages: vi.fn(),
getAvailableLanguages: mockGetAvailableLanguages,
},
}));
const mockOcrService = vi.mocked(ocrService);
const theme = createTheme();
const renderWithTheme = (component: React.ReactElement) => {
@ -33,11 +34,11 @@ describe('OcrLanguageSelector', () => {
const mockLanguagesResponse = {
data: {
languages: [
{ code: 'eng', name: 'English' },
{ code: 'spa', name: 'Spanish' },
{ code: 'fra', name: 'French' },
{ code: 'deu', name: 'German' },
available_languages: [
{ code: 'eng', name: 'English', installed: true },
{ code: 'spa', name: 'Spanish', installed: true },
{ code: 'fra', name: 'French', installed: true },
{ code: 'deu', name: 'German', installed: true },
],
current_user_language: 'eng',
},
@ -45,7 +46,7 @@ describe('OcrLanguageSelector', () => {
beforeEach(() => {
vi.clearAllMocks();
mockOcrService.getAvailableLanguages.mockResolvedValue(mockLanguagesResponse);
mockGetAvailableLanguages.mockResolvedValue(mockLanguagesResponse);
});
afterEach(() => {
@ -59,7 +60,7 @@ describe('OcrLanguageSelector', () => {
// Wait for languages to load
await waitFor(() => {
expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1);
expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(1);
});
});
@ -73,7 +74,7 @@ describe('OcrLanguageSelector', () => {
renderWithTheme(<OcrLanguageSelector {...defaultProps} />);
await waitFor(() => {
expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1);
expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(1);
});
// Open the select dropdown
@ -96,7 +97,7 @@ describe('OcrLanguageSelector', () => {
);
await waitFor(() => {
expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1);
expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(1);
});
// Open the select dropdown
@ -117,7 +118,7 @@ describe('OcrLanguageSelector', () => {
);
await waitFor(() => {
expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1);
expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(1);
});
// Open the select dropdown
@ -131,7 +132,7 @@ describe('OcrLanguageSelector', () => {
it('displays error state when API call fails', async () => {
const mockError = new Error('Failed to fetch languages');
mockOcrService.getAvailableLanguages.mockRejectedValue(mockError);
mockGetAvailableLanguages.mockRejectedValue(mockError);
renderWithTheme(<OcrLanguageSelector {...defaultProps} />);
@ -142,8 +143,8 @@ describe('OcrLanguageSelector', () => {
it('retries loading languages when retry button is clicked', async () => {
const mockError = new Error('Failed to fetch languages');
mockOcrService.getAvailableLanguages.mockRejectedValueOnce(mockError);
mockOcrService.getAvailableLanguages.mockResolvedValueOnce(mockLanguagesResponse);
mockGetAvailableLanguages.mockRejectedValueOnce(mockError);
mockGetAvailableLanguages.mockResolvedValueOnce(mockLanguagesResponse);
renderWithTheme(<OcrLanguageSelector {...defaultProps} />);
@ -157,7 +158,7 @@ describe('OcrLanguageSelector', () => {
// Should call API again
await waitFor(() => {
expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(2);
expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(2);
});
});
@ -208,9 +209,9 @@ describe('OcrLanguageSelector', () => {
});
it('handles empty language list gracefully', async () => {
mockOcrService.getAvailableLanguages.mockResolvedValue({
mockGetAvailableLanguages.mockResolvedValue({
data: {
languages: [],
available_languages: [],
current_user_language: null,
},
});
@ -218,7 +219,7 @@ describe('OcrLanguageSelector', () => {
renderWithTheme(<OcrLanguageSelector {...defaultProps} />);
await waitFor(() => {
expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1);
expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(1);
});
// Open the select dropdown
@ -238,7 +239,7 @@ describe('OcrLanguageSelector', () => {
);
await waitFor(() => {
expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1);
expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(1);
});
// The selected value should be displayed
@ -248,7 +249,7 @@ describe('OcrLanguageSelector', () => {
it('handles network errors gracefully', async () => {
const networkError = new Error('Network Error');
networkError.name = 'NetworkError';
mockOcrService.getAvailableLanguages.mockRejectedValue(networkError);
mockGetAvailableLanguages.mockRejectedValue(networkError);
renderWithTheme(<OcrLanguageSelector {...defaultProps} />);
@ -267,7 +268,7 @@ describe('OcrLanguageSelector', () => {
);
await waitFor(() => {
expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1);
expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(1);
});
const select = screen.getByRole('combobox');

View File

@ -12,6 +12,9 @@ pub enum OcrError {
#[error("TESSDATA_PREFIX environment variable not set or invalid: {path}")]
TessdataPathInvalid { path: String },
#[error("Tessdata path not found: {path}")]
TessdataPathNotFound { path: String },
#[error("Insufficient memory for OCR operation. Required: {required}MB, Available: {available}MB")]
InsufficientMemory { required: u64, available: u64 },
@ -67,6 +70,7 @@ impl OcrError {
OcrError::TesseractNotInstalled
| OcrError::LanguageDataNotFound { .. }
| OcrError::TessdataPathInvalid { .. }
| OcrError::TessdataPathNotFound { .. }
| OcrError::MissingCpuInstruction { .. }
)
}
@ -76,6 +80,7 @@ impl OcrError {
OcrError::TesseractNotInstalled => "OCR_NOT_INSTALLED",
OcrError::LanguageDataNotFound { .. } => "OCR_LANG_MISSING",
OcrError::TessdataPathInvalid { .. } => "OCR_DATA_PATH_INVALID",
OcrError::TessdataPathNotFound { .. } => "OCR_DATA_PATH_NOT_FOUND",
OcrError::InsufficientMemory { .. } => "OCR_OUT_OF_MEMORY",
OcrError::MissingCpuInstruction { .. } => "OCR_CPU_UNSUPPORTED",
OcrError::ImageTooLarge { .. } => "OCR_IMAGE_TOO_LARGE",

View File

@ -4,11 +4,21 @@ use std::env;
use std::path::Path;
use sysinfo::System;
pub struct OcrHealthChecker;
pub struct OcrHealthChecker {
custom_tessdata_path: Option<String>,
}
impl OcrHealthChecker {
pub fn new() -> Self {
Self
Self {
custom_tessdata_path: None,
}
}
pub fn new_with_path<P: AsRef<Path>>(custom_tessdata_path: P) -> Self {
Self {
custom_tessdata_path: Some(custom_tessdata_path.as_ref().to_string_lossy().to_string()),
}
}
pub fn check_tesseract_installation(&self) -> Result<String, OcrError> {
@ -46,6 +56,17 @@ impl OcrHealthChecker {
}
pub fn get_tessdata_path(&self) -> Result<String, OcrError> {
// Use custom tessdata path if provided
if let Some(ref custom_path) = self.custom_tessdata_path {
if Path::new(custom_path).exists() {
return Ok(custom_path.clone());
} else {
return Err(OcrError::TessdataPathNotFound {
path: custom_path.clone()
});
}
}
if let Ok(path) = env::var("TESSDATA_PREFIX") {
if Path::new(&path).exists() {
return Ok(path);
@ -103,6 +124,36 @@ impl OcrHealthChecker {
Ok(())
}
pub fn get_language_display_name(&self, lang_code: &str) -> String {
match lang_code {
"eng" => "English".to_string(),
"spa" => "Spanish".to_string(),
"fra" => "French".to_string(),
"deu" => "German".to_string(),
"ita" => "Italian".to_string(),
"por" => "Portuguese".to_string(),
"rus" => "Russian".to_string(),
"chi_sim" => "Chinese (Simplified)".to_string(),
"chi_tra" => "Chinese (Traditional)".to_string(),
"jpn" => "Japanese".to_string(),
"kor" => "Korean".to_string(),
"ara" => "Arabic".to_string(),
"hin" => "Hindi".to_string(),
"nld" => "Dutch".to_string(),
"swe" => "Swedish".to_string(),
"nor" => "Norwegian".to_string(),
"dan" => "Danish".to_string(),
"fin" => "Finnish".to_string(),
"pol" => "Polish".to_string(),
"ces" => "Czech".to_string(),
"hun" => "Hungarian".to_string(),
"tur" => "Turkish".to_string(),
"tha" => "Thai".to_string(),
"vie" => "Vietnamese".to_string(),
_ => lang_code.to_string(), // Return the code itself for unknown languages
}
}
pub fn check_cpu_features(&self) -> CpuFeatures {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{

View File

@ -134,7 +134,7 @@ mod tests {
env::set_var("TESSDATA_PREFIX", &tessdata_path);
let languages = checker.get_available_languages();
let languages = checker.get_available_languages().unwrap();
assert!(languages.contains(&"eng".to_string()));
assert!(languages.contains(&"fra".to_string()));
assert!(languages.contains(&"deu".to_string()));
@ -231,7 +231,7 @@ mod tests {
.expect("Failed to create mock language file");
}
let health_checker = OcrHealthChecker::new(tessdata_path);
let health_checker = OcrHealthChecker::new_with_path(tessdata_path);
(health_checker, temp_dir)
}
@ -331,7 +331,7 @@ mod tests {
.expect("Failed to create mock file");
}
let health_checker = OcrHealthChecker::new(tessdata_path);
let health_checker = OcrHealthChecker::new_with_path(tessdata_path);
let languages = health_checker.get_available_languages().unwrap();
// Should only include .traineddata files