From 44aaaca5c593e0ef7c944ca40d6d3793aa8cb528 Mon Sep 17 00:00:00 2001 From: perf3ct Date: Thu, 3 Jul 2025 19:20:19 +0000 Subject: [PATCH] feat(ocr): add even more about the multiple ocr languages --- .../__tests__/OcrLanguageSelector.test.tsx | 47 ++++++++-------- src/ocr/error.rs | 5 ++ src/ocr/health.rs | 55 ++++++++++++++++++- src/ocr/tests.rs | 6 +- 4 files changed, 85 insertions(+), 28 deletions(-) diff --git a/frontend/src/components/OcrLanguageSelector/__tests__/OcrLanguageSelector.test.tsx b/frontend/src/components/OcrLanguageSelector/__tests__/OcrLanguageSelector.test.tsx index 1036c1a..8d70a6c 100644 --- a/frontend/src/components/OcrLanguageSelector/__tests__/OcrLanguageSelector.test.tsx +++ b/frontend/src/components/OcrLanguageSelector/__tests__/OcrLanguageSelector.test.tsx @@ -5,15 +5,16 @@ import { ThemeProvider, createTheme } from '@mui/material/styles'; import OcrLanguageSelector from '../OcrLanguageSelector'; import { ocrService } from '../../../services/api'; +// Create mock functions +const mockGetAvailableLanguages = vi.fn(); + // Mock the API service vi.mock('../../../services/api', () => ({ ocrService: { - getAvailableLanguages: vi.fn(), + getAvailableLanguages: mockGetAvailableLanguages, }, })); -const mockOcrService = vi.mocked(ocrService); - const theme = createTheme(); const renderWithTheme = (component: React.ReactElement) => { @@ -33,11 +34,11 @@ describe('OcrLanguageSelector', () => { const mockLanguagesResponse = { data: { - languages: [ - { code: 'eng', name: 'English' }, - { code: 'spa', name: 'Spanish' }, - { code: 'fra', name: 'French' }, - { code: 'deu', name: 'German' }, + available_languages: [ + { code: 'eng', name: 'English', installed: true }, + { code: 'spa', name: 'Spanish', installed: true }, + { code: 'fra', name: 'French', installed: true }, + { code: 'deu', name: 'German', installed: true }, ], current_user_language: 'eng', }, @@ -45,7 +46,7 @@ describe('OcrLanguageSelector', () => { beforeEach(() => { vi.clearAllMocks(); - mockOcrService.getAvailableLanguages.mockResolvedValue(mockLanguagesResponse); + mockGetAvailableLanguages.mockResolvedValue(mockLanguagesResponse); }); afterEach(() => { @@ -59,7 +60,7 @@ describe('OcrLanguageSelector', () => { // Wait for languages to load await waitFor(() => { - expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1); + expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(1); }); }); @@ -73,7 +74,7 @@ describe('OcrLanguageSelector', () => { renderWithTheme(); await waitFor(() => { - expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1); + expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(1); }); // Open the select dropdown @@ -96,7 +97,7 @@ describe('OcrLanguageSelector', () => { ); await waitFor(() => { - expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1); + expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(1); }); // Open the select dropdown @@ -117,7 +118,7 @@ describe('OcrLanguageSelector', () => { ); await waitFor(() => { - expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1); + expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(1); }); // Open the select dropdown @@ -131,7 +132,7 @@ describe('OcrLanguageSelector', () => { it('displays error state when API call fails', async () => { const mockError = new Error('Failed to fetch languages'); - mockOcrService.getAvailableLanguages.mockRejectedValue(mockError); + mockGetAvailableLanguages.mockRejectedValue(mockError); renderWithTheme(); @@ -142,8 +143,8 @@ describe('OcrLanguageSelector', () => { it('retries loading languages when retry button is clicked', async () => { const mockError = new Error('Failed to fetch languages'); - mockOcrService.getAvailableLanguages.mockRejectedValueOnce(mockError); - mockOcrService.getAvailableLanguages.mockResolvedValueOnce(mockLanguagesResponse); + mockGetAvailableLanguages.mockRejectedValueOnce(mockError); + mockGetAvailableLanguages.mockResolvedValueOnce(mockLanguagesResponse); renderWithTheme(); @@ -157,7 +158,7 @@ describe('OcrLanguageSelector', () => { // Should call API again await waitFor(() => { - expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(2); + expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(2); }); }); @@ -208,9 +209,9 @@ describe('OcrLanguageSelector', () => { }); it('handles empty language list gracefully', async () => { - mockOcrService.getAvailableLanguages.mockResolvedValue({ + mockGetAvailableLanguages.mockResolvedValue({ data: { - languages: [], + available_languages: [], current_user_language: null, }, }); @@ -218,7 +219,7 @@ describe('OcrLanguageSelector', () => { renderWithTheme(); await waitFor(() => { - expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1); + expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(1); }); // Open the select dropdown @@ -238,7 +239,7 @@ describe('OcrLanguageSelector', () => { ); await waitFor(() => { - expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1); + expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(1); }); // The selected value should be displayed @@ -248,7 +249,7 @@ describe('OcrLanguageSelector', () => { it('handles network errors gracefully', async () => { const networkError = new Error('Network Error'); networkError.name = 'NetworkError'; - mockOcrService.getAvailableLanguages.mockRejectedValue(networkError); + mockGetAvailableLanguages.mockRejectedValue(networkError); renderWithTheme(); @@ -267,7 +268,7 @@ describe('OcrLanguageSelector', () => { ); await waitFor(() => { - expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1); + expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(1); }); const select = screen.getByRole('combobox'); diff --git a/src/ocr/error.rs b/src/ocr/error.rs index 4023318..77d1b09 100644 --- a/src/ocr/error.rs +++ b/src/ocr/error.rs @@ -12,6 +12,9 @@ pub enum OcrError { #[error("TESSDATA_PREFIX environment variable not set or invalid: {path}")] TessdataPathInvalid { path: String }, + #[error("Tessdata path not found: {path}")] + TessdataPathNotFound { path: String }, + #[error("Insufficient memory for OCR operation. Required: {required}MB, Available: {available}MB")] InsufficientMemory { required: u64, available: u64 }, @@ -67,6 +70,7 @@ impl OcrError { OcrError::TesseractNotInstalled | OcrError::LanguageDataNotFound { .. } | OcrError::TessdataPathInvalid { .. } + | OcrError::TessdataPathNotFound { .. } | OcrError::MissingCpuInstruction { .. } ) } @@ -76,6 +80,7 @@ impl OcrError { OcrError::TesseractNotInstalled => "OCR_NOT_INSTALLED", OcrError::LanguageDataNotFound { .. } => "OCR_LANG_MISSING", OcrError::TessdataPathInvalid { .. } => "OCR_DATA_PATH_INVALID", + OcrError::TessdataPathNotFound { .. } => "OCR_DATA_PATH_NOT_FOUND", OcrError::InsufficientMemory { .. } => "OCR_OUT_OF_MEMORY", OcrError::MissingCpuInstruction { .. } => "OCR_CPU_UNSUPPORTED", OcrError::ImageTooLarge { .. } => "OCR_IMAGE_TOO_LARGE", diff --git a/src/ocr/health.rs b/src/ocr/health.rs index 77144de..0011055 100644 --- a/src/ocr/health.rs +++ b/src/ocr/health.rs @@ -4,11 +4,21 @@ use std::env; use std::path::Path; use sysinfo::System; -pub struct OcrHealthChecker; +pub struct OcrHealthChecker { + custom_tessdata_path: Option, +} impl OcrHealthChecker { pub fn new() -> Self { - Self + Self { + custom_tessdata_path: None, + } + } + + pub fn new_with_path>(custom_tessdata_path: P) -> Self { + Self { + custom_tessdata_path: Some(custom_tessdata_path.as_ref().to_string_lossy().to_string()), + } } pub fn check_tesseract_installation(&self) -> Result { @@ -46,6 +56,17 @@ impl OcrHealthChecker { } pub fn get_tessdata_path(&self) -> Result { + // Use custom tessdata path if provided + if let Some(ref custom_path) = self.custom_tessdata_path { + if Path::new(custom_path).exists() { + return Ok(custom_path.clone()); + } else { + return Err(OcrError::TessdataPathNotFound { + path: custom_path.clone() + }); + } + } + if let Ok(path) = env::var("TESSDATA_PREFIX") { if Path::new(&path).exists() { return Ok(path); @@ -103,6 +124,36 @@ impl OcrHealthChecker { Ok(()) } + pub fn get_language_display_name(&self, lang_code: &str) -> String { + match lang_code { + "eng" => "English".to_string(), + "spa" => "Spanish".to_string(), + "fra" => "French".to_string(), + "deu" => "German".to_string(), + "ita" => "Italian".to_string(), + "por" => "Portuguese".to_string(), + "rus" => "Russian".to_string(), + "chi_sim" => "Chinese (Simplified)".to_string(), + "chi_tra" => "Chinese (Traditional)".to_string(), + "jpn" => "Japanese".to_string(), + "kor" => "Korean".to_string(), + "ara" => "Arabic".to_string(), + "hin" => "Hindi".to_string(), + "nld" => "Dutch".to_string(), + "swe" => "Swedish".to_string(), + "nor" => "Norwegian".to_string(), + "dan" => "Danish".to_string(), + "fin" => "Finnish".to_string(), + "pol" => "Polish".to_string(), + "ces" => "Czech".to_string(), + "hun" => "Hungarian".to_string(), + "tur" => "Turkish".to_string(), + "tha" => "Thai".to_string(), + "vie" => "Vietnamese".to_string(), + _ => lang_code.to_string(), // Return the code itself for unknown languages + } + } + pub fn check_cpu_features(&self) -> CpuFeatures { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { diff --git a/src/ocr/tests.rs b/src/ocr/tests.rs index a9986aa..6dd607f 100644 --- a/src/ocr/tests.rs +++ b/src/ocr/tests.rs @@ -134,7 +134,7 @@ mod tests { env::set_var("TESSDATA_PREFIX", &tessdata_path); - let languages = checker.get_available_languages(); + let languages = checker.get_available_languages().unwrap(); assert!(languages.contains(&"eng".to_string())); assert!(languages.contains(&"fra".to_string())); assert!(languages.contains(&"deu".to_string())); @@ -231,7 +231,7 @@ mod tests { .expect("Failed to create mock language file"); } - let health_checker = OcrHealthChecker::new(tessdata_path); + let health_checker = OcrHealthChecker::new_with_path(tessdata_path); (health_checker, temp_dir) } @@ -331,7 +331,7 @@ mod tests { .expect("Failed to create mock file"); } - let health_checker = OcrHealthChecker::new(tessdata_path); + let health_checker = OcrHealthChecker::new_with_path(tessdata_path); let languages = health_checker.get_available_languages().unwrap(); // Should only include .traineddata files