feat(ocr): add even more about the multiple ocr languages
This commit is contained in:
parent
a88f387aeb
commit
bdf4f5f8fe
|
|
@ -5,15 +5,16 @@ import { ThemeProvider, createTheme } from '@mui/material/styles';
|
|||
import OcrLanguageSelector from '../OcrLanguageSelector';
|
||||
import { ocrService } from '../../../services/api';
|
||||
|
||||
// Create mock functions
|
||||
const mockGetAvailableLanguages = vi.fn();
|
||||
|
||||
// Mock the API service
|
||||
vi.mock('../../../services/api', () => ({
|
||||
ocrService: {
|
||||
getAvailableLanguages: vi.fn(),
|
||||
getAvailableLanguages: mockGetAvailableLanguages,
|
||||
},
|
||||
}));
|
||||
|
||||
const mockOcrService = vi.mocked(ocrService);
|
||||
|
||||
const theme = createTheme();
|
||||
|
||||
const renderWithTheme = (component: React.ReactElement) => {
|
||||
|
|
@ -33,11 +34,11 @@ describe('OcrLanguageSelector', () => {
|
|||
|
||||
const mockLanguagesResponse = {
|
||||
data: {
|
||||
languages: [
|
||||
{ code: 'eng', name: 'English' },
|
||||
{ code: 'spa', name: 'Spanish' },
|
||||
{ code: 'fra', name: 'French' },
|
||||
{ code: 'deu', name: 'German' },
|
||||
available_languages: [
|
||||
{ code: 'eng', name: 'English', installed: true },
|
||||
{ code: 'spa', name: 'Spanish', installed: true },
|
||||
{ code: 'fra', name: 'French', installed: true },
|
||||
{ code: 'deu', name: 'German', installed: true },
|
||||
],
|
||||
current_user_language: 'eng',
|
||||
},
|
||||
|
|
@ -45,7 +46,7 @@ describe('OcrLanguageSelector', () => {
|
|||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
mockOcrService.getAvailableLanguages.mockResolvedValue(mockLanguagesResponse);
|
||||
mockGetAvailableLanguages.mockResolvedValue(mockLanguagesResponse);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
|
|
@ -59,7 +60,7 @@ describe('OcrLanguageSelector', () => {
|
|||
|
||||
// Wait for languages to load
|
||||
await waitFor(() => {
|
||||
expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1);
|
||||
expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -73,7 +74,7 @@ describe('OcrLanguageSelector', () => {
|
|||
renderWithTheme(<OcrLanguageSelector {...defaultProps} />);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1);
|
||||
expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
// Open the select dropdown
|
||||
|
|
@ -96,7 +97,7 @@ describe('OcrLanguageSelector', () => {
|
|||
);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1);
|
||||
expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
// Open the select dropdown
|
||||
|
|
@ -117,7 +118,7 @@ describe('OcrLanguageSelector', () => {
|
|||
);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1);
|
||||
expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
// Open the select dropdown
|
||||
|
|
@ -131,7 +132,7 @@ describe('OcrLanguageSelector', () => {
|
|||
|
||||
it('displays error state when API call fails', async () => {
|
||||
const mockError = new Error('Failed to fetch languages');
|
||||
mockOcrService.getAvailableLanguages.mockRejectedValue(mockError);
|
||||
mockGetAvailableLanguages.mockRejectedValue(mockError);
|
||||
|
||||
renderWithTheme(<OcrLanguageSelector {...defaultProps} />);
|
||||
|
||||
|
|
@ -142,8 +143,8 @@ describe('OcrLanguageSelector', () => {
|
|||
|
||||
it('retries loading languages when retry button is clicked', async () => {
|
||||
const mockError = new Error('Failed to fetch languages');
|
||||
mockOcrService.getAvailableLanguages.mockRejectedValueOnce(mockError);
|
||||
mockOcrService.getAvailableLanguages.mockResolvedValueOnce(mockLanguagesResponse);
|
||||
mockGetAvailableLanguages.mockRejectedValueOnce(mockError);
|
||||
mockGetAvailableLanguages.mockResolvedValueOnce(mockLanguagesResponse);
|
||||
|
||||
renderWithTheme(<OcrLanguageSelector {...defaultProps} />);
|
||||
|
||||
|
|
@ -157,7 +158,7 @@ describe('OcrLanguageSelector', () => {
|
|||
|
||||
// Should call API again
|
||||
await waitFor(() => {
|
||||
expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(2);
|
||||
expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -208,9 +209,9 @@ describe('OcrLanguageSelector', () => {
|
|||
});
|
||||
|
||||
it('handles empty language list gracefully', async () => {
|
||||
mockOcrService.getAvailableLanguages.mockResolvedValue({
|
||||
mockGetAvailableLanguages.mockResolvedValue({
|
||||
data: {
|
||||
languages: [],
|
||||
available_languages: [],
|
||||
current_user_language: null,
|
||||
},
|
||||
});
|
||||
|
|
@ -218,7 +219,7 @@ describe('OcrLanguageSelector', () => {
|
|||
renderWithTheme(<OcrLanguageSelector {...defaultProps} />);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1);
|
||||
expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
// Open the select dropdown
|
||||
|
|
@ -238,7 +239,7 @@ describe('OcrLanguageSelector', () => {
|
|||
);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1);
|
||||
expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
// The selected value should be displayed
|
||||
|
|
@ -248,7 +249,7 @@ describe('OcrLanguageSelector', () => {
|
|||
it('handles network errors gracefully', async () => {
|
||||
const networkError = new Error('Network Error');
|
||||
networkError.name = 'NetworkError';
|
||||
mockOcrService.getAvailableLanguages.mockRejectedValue(networkError);
|
||||
mockGetAvailableLanguages.mockRejectedValue(networkError);
|
||||
|
||||
renderWithTheme(<OcrLanguageSelector {...defaultProps} />);
|
||||
|
||||
|
|
@ -267,7 +268,7 @@ describe('OcrLanguageSelector', () => {
|
|||
);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1);
|
||||
expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
const select = screen.getByRole('combobox');
|
||||
|
|
|
|||
|
|
@ -12,6 +12,9 @@ pub enum OcrError {
|
|||
#[error("TESSDATA_PREFIX environment variable not set or invalid: {path}")]
|
||||
TessdataPathInvalid { path: String },
|
||||
|
||||
#[error("Tessdata path not found: {path}")]
|
||||
TessdataPathNotFound { path: String },
|
||||
|
||||
#[error("Insufficient memory for OCR operation. Required: {required}MB, Available: {available}MB")]
|
||||
InsufficientMemory { required: u64, available: u64 },
|
||||
|
||||
|
|
@ -67,6 +70,7 @@ impl OcrError {
|
|||
OcrError::TesseractNotInstalled
|
||||
| OcrError::LanguageDataNotFound { .. }
|
||||
| OcrError::TessdataPathInvalid { .. }
|
||||
| OcrError::TessdataPathNotFound { .. }
|
||||
| OcrError::MissingCpuInstruction { .. }
|
||||
)
|
||||
}
|
||||
|
|
@ -76,6 +80,7 @@ impl OcrError {
|
|||
OcrError::TesseractNotInstalled => "OCR_NOT_INSTALLED",
|
||||
OcrError::LanguageDataNotFound { .. } => "OCR_LANG_MISSING",
|
||||
OcrError::TessdataPathInvalid { .. } => "OCR_DATA_PATH_INVALID",
|
||||
OcrError::TessdataPathNotFound { .. } => "OCR_DATA_PATH_NOT_FOUND",
|
||||
OcrError::InsufficientMemory { .. } => "OCR_OUT_OF_MEMORY",
|
||||
OcrError::MissingCpuInstruction { .. } => "OCR_CPU_UNSUPPORTED",
|
||||
OcrError::ImageTooLarge { .. } => "OCR_IMAGE_TOO_LARGE",
|
||||
|
|
|
|||
|
|
@ -4,11 +4,21 @@ use std::env;
|
|||
use std::path::Path;
|
||||
use sysinfo::System;
|
||||
|
||||
pub struct OcrHealthChecker;
|
||||
pub struct OcrHealthChecker {
|
||||
custom_tessdata_path: Option<String>,
|
||||
}
|
||||
|
||||
impl OcrHealthChecker {
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
Self {
|
||||
custom_tessdata_path: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_with_path<P: AsRef<Path>>(custom_tessdata_path: P) -> Self {
|
||||
Self {
|
||||
custom_tessdata_path: Some(custom_tessdata_path.as_ref().to_string_lossy().to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_tesseract_installation(&self) -> Result<String, OcrError> {
|
||||
|
|
@ -46,6 +56,17 @@ impl OcrHealthChecker {
|
|||
}
|
||||
|
||||
pub fn get_tessdata_path(&self) -> Result<String, OcrError> {
|
||||
// Use custom tessdata path if provided
|
||||
if let Some(ref custom_path) = self.custom_tessdata_path {
|
||||
if Path::new(custom_path).exists() {
|
||||
return Ok(custom_path.clone());
|
||||
} else {
|
||||
return Err(OcrError::TessdataPathNotFound {
|
||||
path: custom_path.clone()
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if let Ok(path) = env::var("TESSDATA_PREFIX") {
|
||||
if Path::new(&path).exists() {
|
||||
return Ok(path);
|
||||
|
|
@ -103,6 +124,36 @@ impl OcrHealthChecker {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_language_display_name(&self, lang_code: &str) -> String {
|
||||
match lang_code {
|
||||
"eng" => "English".to_string(),
|
||||
"spa" => "Spanish".to_string(),
|
||||
"fra" => "French".to_string(),
|
||||
"deu" => "German".to_string(),
|
||||
"ita" => "Italian".to_string(),
|
||||
"por" => "Portuguese".to_string(),
|
||||
"rus" => "Russian".to_string(),
|
||||
"chi_sim" => "Chinese (Simplified)".to_string(),
|
||||
"chi_tra" => "Chinese (Traditional)".to_string(),
|
||||
"jpn" => "Japanese".to_string(),
|
||||
"kor" => "Korean".to_string(),
|
||||
"ara" => "Arabic".to_string(),
|
||||
"hin" => "Hindi".to_string(),
|
||||
"nld" => "Dutch".to_string(),
|
||||
"swe" => "Swedish".to_string(),
|
||||
"nor" => "Norwegian".to_string(),
|
||||
"dan" => "Danish".to_string(),
|
||||
"fin" => "Finnish".to_string(),
|
||||
"pol" => "Polish".to_string(),
|
||||
"ces" => "Czech".to_string(),
|
||||
"hun" => "Hungarian".to_string(),
|
||||
"tur" => "Turkish".to_string(),
|
||||
"tha" => "Thai".to_string(),
|
||||
"vie" => "Vietnamese".to_string(),
|
||||
_ => lang_code.to_string(), // Return the code itself for unknown languages
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_cpu_features(&self) -> CpuFeatures {
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
{
|
||||
|
|
|
|||
|
|
@ -134,7 +134,7 @@ mod tests {
|
|||
|
||||
env::set_var("TESSDATA_PREFIX", &tessdata_path);
|
||||
|
||||
let languages = checker.get_available_languages();
|
||||
let languages = checker.get_available_languages().unwrap();
|
||||
assert!(languages.contains(&"eng".to_string()));
|
||||
assert!(languages.contains(&"fra".to_string()));
|
||||
assert!(languages.contains(&"deu".to_string()));
|
||||
|
|
@ -231,7 +231,7 @@ mod tests {
|
|||
.expect("Failed to create mock language file");
|
||||
}
|
||||
|
||||
let health_checker = OcrHealthChecker::new(tessdata_path);
|
||||
let health_checker = OcrHealthChecker::new_with_path(tessdata_path);
|
||||
(health_checker, temp_dir)
|
||||
}
|
||||
|
||||
|
|
@ -331,7 +331,7 @@ mod tests {
|
|||
.expect("Failed to create mock file");
|
||||
}
|
||||
|
||||
let health_checker = OcrHealthChecker::new(tessdata_path);
|
||||
let health_checker = OcrHealthChecker::new_with_path(tessdata_path);
|
||||
let languages = health_checker.get_available_languages().unwrap();
|
||||
|
||||
// Should only include .traineddata files
|
||||
|
|
|
|||
Loading…
Reference in New Issue