feat(ocr): add even more about the multiple ocr languages
This commit is contained in:
parent
a88f387aeb
commit
bdf4f5f8fe
|
|
@ -5,15 +5,16 @@ import { ThemeProvider, createTheme } from '@mui/material/styles';
|
||||||
import OcrLanguageSelector from '../OcrLanguageSelector';
|
import OcrLanguageSelector from '../OcrLanguageSelector';
|
||||||
import { ocrService } from '../../../services/api';
|
import { ocrService } from '../../../services/api';
|
||||||
|
|
||||||
|
// Create mock functions
|
||||||
|
const mockGetAvailableLanguages = vi.fn();
|
||||||
|
|
||||||
// Mock the API service
|
// Mock the API service
|
||||||
vi.mock('../../../services/api', () => ({
|
vi.mock('../../../services/api', () => ({
|
||||||
ocrService: {
|
ocrService: {
|
||||||
getAvailableLanguages: vi.fn(),
|
getAvailableLanguages: mockGetAvailableLanguages,
|
||||||
},
|
},
|
||||||
}));
|
}));
|
||||||
|
|
||||||
const mockOcrService = vi.mocked(ocrService);
|
|
||||||
|
|
||||||
const theme = createTheme();
|
const theme = createTheme();
|
||||||
|
|
||||||
const renderWithTheme = (component: React.ReactElement) => {
|
const renderWithTheme = (component: React.ReactElement) => {
|
||||||
|
|
@ -33,11 +34,11 @@ describe('OcrLanguageSelector', () => {
|
||||||
|
|
||||||
const mockLanguagesResponse = {
|
const mockLanguagesResponse = {
|
||||||
data: {
|
data: {
|
||||||
languages: [
|
available_languages: [
|
||||||
{ code: 'eng', name: 'English' },
|
{ code: 'eng', name: 'English', installed: true },
|
||||||
{ code: 'spa', name: 'Spanish' },
|
{ code: 'spa', name: 'Spanish', installed: true },
|
||||||
{ code: 'fra', name: 'French' },
|
{ code: 'fra', name: 'French', installed: true },
|
||||||
{ code: 'deu', name: 'German' },
|
{ code: 'deu', name: 'German', installed: true },
|
||||||
],
|
],
|
||||||
current_user_language: 'eng',
|
current_user_language: 'eng',
|
||||||
},
|
},
|
||||||
|
|
@ -45,7 +46,7 @@ describe('OcrLanguageSelector', () => {
|
||||||
|
|
||||||
beforeEach(() => {
|
beforeEach(() => {
|
||||||
vi.clearAllMocks();
|
vi.clearAllMocks();
|
||||||
mockOcrService.getAvailableLanguages.mockResolvedValue(mockLanguagesResponse);
|
mockGetAvailableLanguages.mockResolvedValue(mockLanguagesResponse);
|
||||||
});
|
});
|
||||||
|
|
||||||
afterEach(() => {
|
afterEach(() => {
|
||||||
|
|
@ -59,7 +60,7 @@ describe('OcrLanguageSelector', () => {
|
||||||
|
|
||||||
// Wait for languages to load
|
// Wait for languages to load
|
||||||
await waitFor(() => {
|
await waitFor(() => {
|
||||||
expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1);
|
expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(1);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
@ -73,7 +74,7 @@ describe('OcrLanguageSelector', () => {
|
||||||
renderWithTheme(<OcrLanguageSelector {...defaultProps} />);
|
renderWithTheme(<OcrLanguageSelector {...defaultProps} />);
|
||||||
|
|
||||||
await waitFor(() => {
|
await waitFor(() => {
|
||||||
expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1);
|
expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
// Open the select dropdown
|
// Open the select dropdown
|
||||||
|
|
@ -96,7 +97,7 @@ describe('OcrLanguageSelector', () => {
|
||||||
);
|
);
|
||||||
|
|
||||||
await waitFor(() => {
|
await waitFor(() => {
|
||||||
expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1);
|
expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
// Open the select dropdown
|
// Open the select dropdown
|
||||||
|
|
@ -117,7 +118,7 @@ describe('OcrLanguageSelector', () => {
|
||||||
);
|
);
|
||||||
|
|
||||||
await waitFor(() => {
|
await waitFor(() => {
|
||||||
expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1);
|
expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
// Open the select dropdown
|
// Open the select dropdown
|
||||||
|
|
@ -131,7 +132,7 @@ describe('OcrLanguageSelector', () => {
|
||||||
|
|
||||||
it('displays error state when API call fails', async () => {
|
it('displays error state when API call fails', async () => {
|
||||||
const mockError = new Error('Failed to fetch languages');
|
const mockError = new Error('Failed to fetch languages');
|
||||||
mockOcrService.getAvailableLanguages.mockRejectedValue(mockError);
|
mockGetAvailableLanguages.mockRejectedValue(mockError);
|
||||||
|
|
||||||
renderWithTheme(<OcrLanguageSelector {...defaultProps} />);
|
renderWithTheme(<OcrLanguageSelector {...defaultProps} />);
|
||||||
|
|
||||||
|
|
@ -142,8 +143,8 @@ describe('OcrLanguageSelector', () => {
|
||||||
|
|
||||||
it('retries loading languages when retry button is clicked', async () => {
|
it('retries loading languages when retry button is clicked', async () => {
|
||||||
const mockError = new Error('Failed to fetch languages');
|
const mockError = new Error('Failed to fetch languages');
|
||||||
mockOcrService.getAvailableLanguages.mockRejectedValueOnce(mockError);
|
mockGetAvailableLanguages.mockRejectedValueOnce(mockError);
|
||||||
mockOcrService.getAvailableLanguages.mockResolvedValueOnce(mockLanguagesResponse);
|
mockGetAvailableLanguages.mockResolvedValueOnce(mockLanguagesResponse);
|
||||||
|
|
||||||
renderWithTheme(<OcrLanguageSelector {...defaultProps} />);
|
renderWithTheme(<OcrLanguageSelector {...defaultProps} />);
|
||||||
|
|
||||||
|
|
@ -157,7 +158,7 @@ describe('OcrLanguageSelector', () => {
|
||||||
|
|
||||||
// Should call API again
|
// Should call API again
|
||||||
await waitFor(() => {
|
await waitFor(() => {
|
||||||
expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(2);
|
expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(2);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
@ -208,9 +209,9 @@ describe('OcrLanguageSelector', () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
it('handles empty language list gracefully', async () => {
|
it('handles empty language list gracefully', async () => {
|
||||||
mockOcrService.getAvailableLanguages.mockResolvedValue({
|
mockGetAvailableLanguages.mockResolvedValue({
|
||||||
data: {
|
data: {
|
||||||
languages: [],
|
available_languages: [],
|
||||||
current_user_language: null,
|
current_user_language: null,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
@ -218,7 +219,7 @@ describe('OcrLanguageSelector', () => {
|
||||||
renderWithTheme(<OcrLanguageSelector {...defaultProps} />);
|
renderWithTheme(<OcrLanguageSelector {...defaultProps} />);
|
||||||
|
|
||||||
await waitFor(() => {
|
await waitFor(() => {
|
||||||
expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1);
|
expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
// Open the select dropdown
|
// Open the select dropdown
|
||||||
|
|
@ -238,7 +239,7 @@ describe('OcrLanguageSelector', () => {
|
||||||
);
|
);
|
||||||
|
|
||||||
await waitFor(() => {
|
await waitFor(() => {
|
||||||
expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1);
|
expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
// The selected value should be displayed
|
// The selected value should be displayed
|
||||||
|
|
@ -248,7 +249,7 @@ describe('OcrLanguageSelector', () => {
|
||||||
it('handles network errors gracefully', async () => {
|
it('handles network errors gracefully', async () => {
|
||||||
const networkError = new Error('Network Error');
|
const networkError = new Error('Network Error');
|
||||||
networkError.name = 'NetworkError';
|
networkError.name = 'NetworkError';
|
||||||
mockOcrService.getAvailableLanguages.mockRejectedValue(networkError);
|
mockGetAvailableLanguages.mockRejectedValue(networkError);
|
||||||
|
|
||||||
renderWithTheme(<OcrLanguageSelector {...defaultProps} />);
|
renderWithTheme(<OcrLanguageSelector {...defaultProps} />);
|
||||||
|
|
||||||
|
|
@ -267,7 +268,7 @@ describe('OcrLanguageSelector', () => {
|
||||||
);
|
);
|
||||||
|
|
||||||
await waitFor(() => {
|
await waitFor(() => {
|
||||||
expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1);
|
expect(mockGetAvailableLanguages).toHaveBeenCalledTimes(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
const select = screen.getByRole('combobox');
|
const select = screen.getByRole('combobox');
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,9 @@ pub enum OcrError {
|
||||||
#[error("TESSDATA_PREFIX environment variable not set or invalid: {path}")]
|
#[error("TESSDATA_PREFIX environment variable not set or invalid: {path}")]
|
||||||
TessdataPathInvalid { path: String },
|
TessdataPathInvalid { path: String },
|
||||||
|
|
||||||
|
#[error("Tessdata path not found: {path}")]
|
||||||
|
TessdataPathNotFound { path: String },
|
||||||
|
|
||||||
#[error("Insufficient memory for OCR operation. Required: {required}MB, Available: {available}MB")]
|
#[error("Insufficient memory for OCR operation. Required: {required}MB, Available: {available}MB")]
|
||||||
InsufficientMemory { required: u64, available: u64 },
|
InsufficientMemory { required: u64, available: u64 },
|
||||||
|
|
||||||
|
|
@ -67,6 +70,7 @@ impl OcrError {
|
||||||
OcrError::TesseractNotInstalled
|
OcrError::TesseractNotInstalled
|
||||||
| OcrError::LanguageDataNotFound { .. }
|
| OcrError::LanguageDataNotFound { .. }
|
||||||
| OcrError::TessdataPathInvalid { .. }
|
| OcrError::TessdataPathInvalid { .. }
|
||||||
|
| OcrError::TessdataPathNotFound { .. }
|
||||||
| OcrError::MissingCpuInstruction { .. }
|
| OcrError::MissingCpuInstruction { .. }
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
@ -76,6 +80,7 @@ impl OcrError {
|
||||||
OcrError::TesseractNotInstalled => "OCR_NOT_INSTALLED",
|
OcrError::TesseractNotInstalled => "OCR_NOT_INSTALLED",
|
||||||
OcrError::LanguageDataNotFound { .. } => "OCR_LANG_MISSING",
|
OcrError::LanguageDataNotFound { .. } => "OCR_LANG_MISSING",
|
||||||
OcrError::TessdataPathInvalid { .. } => "OCR_DATA_PATH_INVALID",
|
OcrError::TessdataPathInvalid { .. } => "OCR_DATA_PATH_INVALID",
|
||||||
|
OcrError::TessdataPathNotFound { .. } => "OCR_DATA_PATH_NOT_FOUND",
|
||||||
OcrError::InsufficientMemory { .. } => "OCR_OUT_OF_MEMORY",
|
OcrError::InsufficientMemory { .. } => "OCR_OUT_OF_MEMORY",
|
||||||
OcrError::MissingCpuInstruction { .. } => "OCR_CPU_UNSUPPORTED",
|
OcrError::MissingCpuInstruction { .. } => "OCR_CPU_UNSUPPORTED",
|
||||||
OcrError::ImageTooLarge { .. } => "OCR_IMAGE_TOO_LARGE",
|
OcrError::ImageTooLarge { .. } => "OCR_IMAGE_TOO_LARGE",
|
||||||
|
|
|
||||||
|
|
@ -4,11 +4,21 @@ use std::env;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use sysinfo::System;
|
use sysinfo::System;
|
||||||
|
|
||||||
pub struct OcrHealthChecker;
|
pub struct OcrHealthChecker {
|
||||||
|
custom_tessdata_path: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
impl OcrHealthChecker {
|
impl OcrHealthChecker {
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
Self
|
Self {
|
||||||
|
custom_tessdata_path: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn new_with_path<P: AsRef<Path>>(custom_tessdata_path: P) -> Self {
|
||||||
|
Self {
|
||||||
|
custom_tessdata_path: Some(custom_tessdata_path.as_ref().to_string_lossy().to_string()),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn check_tesseract_installation(&self) -> Result<String, OcrError> {
|
pub fn check_tesseract_installation(&self) -> Result<String, OcrError> {
|
||||||
|
|
@ -46,6 +56,17 @@ impl OcrHealthChecker {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_tessdata_path(&self) -> Result<String, OcrError> {
|
pub fn get_tessdata_path(&self) -> Result<String, OcrError> {
|
||||||
|
// Use custom tessdata path if provided
|
||||||
|
if let Some(ref custom_path) = self.custom_tessdata_path {
|
||||||
|
if Path::new(custom_path).exists() {
|
||||||
|
return Ok(custom_path.clone());
|
||||||
|
} else {
|
||||||
|
return Err(OcrError::TessdataPathNotFound {
|
||||||
|
path: custom_path.clone()
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if let Ok(path) = env::var("TESSDATA_PREFIX") {
|
if let Ok(path) = env::var("TESSDATA_PREFIX") {
|
||||||
if Path::new(&path).exists() {
|
if Path::new(&path).exists() {
|
||||||
return Ok(path);
|
return Ok(path);
|
||||||
|
|
@ -103,6 +124,36 @@ impl OcrHealthChecker {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn get_language_display_name(&self, lang_code: &str) -> String {
|
||||||
|
match lang_code {
|
||||||
|
"eng" => "English".to_string(),
|
||||||
|
"spa" => "Spanish".to_string(),
|
||||||
|
"fra" => "French".to_string(),
|
||||||
|
"deu" => "German".to_string(),
|
||||||
|
"ita" => "Italian".to_string(),
|
||||||
|
"por" => "Portuguese".to_string(),
|
||||||
|
"rus" => "Russian".to_string(),
|
||||||
|
"chi_sim" => "Chinese (Simplified)".to_string(),
|
||||||
|
"chi_tra" => "Chinese (Traditional)".to_string(),
|
||||||
|
"jpn" => "Japanese".to_string(),
|
||||||
|
"kor" => "Korean".to_string(),
|
||||||
|
"ara" => "Arabic".to_string(),
|
||||||
|
"hin" => "Hindi".to_string(),
|
||||||
|
"nld" => "Dutch".to_string(),
|
||||||
|
"swe" => "Swedish".to_string(),
|
||||||
|
"nor" => "Norwegian".to_string(),
|
||||||
|
"dan" => "Danish".to_string(),
|
||||||
|
"fin" => "Finnish".to_string(),
|
||||||
|
"pol" => "Polish".to_string(),
|
||||||
|
"ces" => "Czech".to_string(),
|
||||||
|
"hun" => "Hungarian".to_string(),
|
||||||
|
"tur" => "Turkish".to_string(),
|
||||||
|
"tha" => "Thai".to_string(),
|
||||||
|
"vie" => "Vietnamese".to_string(),
|
||||||
|
_ => lang_code.to_string(), // Return the code itself for unknown languages
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn check_cpu_features(&self) -> CpuFeatures {
|
pub fn check_cpu_features(&self) -> CpuFeatures {
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -134,7 +134,7 @@ mod tests {
|
||||||
|
|
||||||
env::set_var("TESSDATA_PREFIX", &tessdata_path);
|
env::set_var("TESSDATA_PREFIX", &tessdata_path);
|
||||||
|
|
||||||
let languages = checker.get_available_languages();
|
let languages = checker.get_available_languages().unwrap();
|
||||||
assert!(languages.contains(&"eng".to_string()));
|
assert!(languages.contains(&"eng".to_string()));
|
||||||
assert!(languages.contains(&"fra".to_string()));
|
assert!(languages.contains(&"fra".to_string()));
|
||||||
assert!(languages.contains(&"deu".to_string()));
|
assert!(languages.contains(&"deu".to_string()));
|
||||||
|
|
@ -231,7 +231,7 @@ mod tests {
|
||||||
.expect("Failed to create mock language file");
|
.expect("Failed to create mock language file");
|
||||||
}
|
}
|
||||||
|
|
||||||
let health_checker = OcrHealthChecker::new(tessdata_path);
|
let health_checker = OcrHealthChecker::new_with_path(tessdata_path);
|
||||||
(health_checker, temp_dir)
|
(health_checker, temp_dir)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -331,7 +331,7 @@ mod tests {
|
||||||
.expect("Failed to create mock file");
|
.expect("Failed to create mock file");
|
||||||
}
|
}
|
||||||
|
|
||||||
let health_checker = OcrHealthChecker::new(tessdata_path);
|
let health_checker = OcrHealthChecker::new_with_path(tessdata_path);
|
||||||
let languages = health_checker.get_available_languages().unwrap();
|
let languages = health_checker.get_available_languages().unwrap();
|
||||||
|
|
||||||
// Should only include .traineddata files
|
// Should only include .traineddata files
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue