diff --git a/frontend/src/components/OcrLanguageSelector/__tests__/OcrLanguageSelector.test.tsx b/frontend/src/components/OcrLanguageSelector/__tests__/OcrLanguageSelector.test.tsx
new file mode 100644
index 0000000..1036c1a
--- /dev/null
+++ b/frontend/src/components/OcrLanguageSelector/__tests__/OcrLanguageSelector.test.tsx
@@ -0,0 +1,276 @@
+import React from 'react';
+import { render, screen, fireEvent, waitFor } from '@testing-library/react';
+import { vi, describe, it, expect, beforeEach, afterEach } from 'vitest';
+import { ThemeProvider, createTheme } from '@mui/material/styles';
+import OcrLanguageSelector from '../OcrLanguageSelector';
+import { ocrService } from '../../../services/api';
+
+// Mock the API service
+vi.mock('../../../services/api', () => ({
+ ocrService: {
+ getAvailableLanguages: vi.fn(),
+ },
+}));
+
+const mockOcrService = vi.mocked(ocrService);
+
+const theme = createTheme();
+
+const renderWithTheme = (component: React.ReactElement) => {
+ return render(
+
+ {component}
+
+ );
+};
+
+describe('OcrLanguageSelector', () => {
+ const defaultProps = {
+ value: 'eng',
+ onChange: vi.fn(),
+ label: 'OCR Language',
+ };
+
+ const mockLanguagesResponse = {
+ data: {
+ languages: [
+ { code: 'eng', name: 'English' },
+ { code: 'spa', name: 'Spanish' },
+ { code: 'fra', name: 'French' },
+ { code: 'deu', name: 'German' },
+ ],
+ current_user_language: 'eng',
+ },
+ };
+
+ beforeEach(() => {
+ vi.clearAllMocks();
+ mockOcrService.getAvailableLanguages.mockResolvedValue(mockLanguagesResponse);
+ });
+
+ afterEach(() => {
+ vi.clearAllMocks();
+ });
+
+ it('renders with default props', async () => {
+ renderWithTheme();
+
+ expect(screen.getByLabelText('OCR Language')).toBeInTheDocument();
+
+ // Wait for languages to load
+ await waitFor(() => {
+ expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1);
+ });
+ });
+
+ it('displays loading state initially', () => {
+ renderWithTheme();
+
+ expect(screen.getByTestId('loading-languages')).toBeInTheDocument();
+ });
+
+ it('loads and displays available languages', async () => {
+ renderWithTheme();
+
+ await waitFor(() => {
+ expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1);
+ });
+
+ // Open the select dropdown
+ fireEvent.mouseDown(screen.getByRole('combobox'));
+
+ await waitFor(() => {
+ expect(screen.getByText('English')).toBeInTheDocument();
+ expect(screen.getByText('Spanish')).toBeInTheDocument();
+ expect(screen.getByText('French')).toBeInTheDocument();
+ expect(screen.getByText('German')).toBeInTheDocument();
+ });
+ });
+
+ it('shows current language indicator when enabled', async () => {
+ renderWithTheme(
+
+ );
+
+ await waitFor(() => {
+ expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1);
+ });
+
+ // Open the select dropdown
+ fireEvent.mouseDown(screen.getByRole('combobox'));
+
+ await waitFor(() => {
+ expect(screen.getByText('(Current)')).toBeInTheDocument();
+ });
+ });
+
+ it('calls onChange when language is selected', async () => {
+ const mockOnChange = vi.fn();
+ renderWithTheme(
+
+ );
+
+ await waitFor(() => {
+ expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1);
+ });
+
+ // Open the select dropdown
+ fireEvent.mouseDown(screen.getByRole('combobox'));
+
+ // Select Spanish
+ fireEvent.click(screen.getByText('Spanish'));
+
+ expect(mockOnChange).toHaveBeenCalledWith('spa');
+ });
+
+ it('displays error state when API call fails', async () => {
+ const mockError = new Error('Failed to fetch languages');
+ mockOcrService.getAvailableLanguages.mockRejectedValue(mockError);
+
+ renderWithTheme();
+
+ await waitFor(() => {
+ expect(screen.getByText('Failed to load languages')).toBeInTheDocument();
+ });
+ });
+
+ it('retries loading languages when retry button is clicked', async () => {
+ const mockError = new Error('Failed to fetch languages');
+ mockOcrService.getAvailableLanguages.mockRejectedValueOnce(mockError);
+ mockOcrService.getAvailableLanguages.mockResolvedValueOnce(mockLanguagesResponse);
+
+ renderWithTheme();
+
+ // Wait for error state
+ await waitFor(() => {
+ expect(screen.getByText('Failed to load languages')).toBeInTheDocument();
+ });
+
+ // Click retry button
+ fireEvent.click(screen.getByText('Retry'));
+
+ // Should call API again
+ await waitFor(() => {
+ expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(2);
+ });
+ });
+
+ it('renders with custom label', () => {
+ renderWithTheme(
+
+ );
+
+ expect(screen.getByLabelText('Custom Language Label')).toBeInTheDocument();
+ });
+
+ it('renders with helper text', () => {
+ renderWithTheme(
+
+ );
+
+ expect(screen.getByText('Choose your preferred language')).toBeInTheDocument();
+ });
+
+ it('respects size prop', () => {
+ renderWithTheme(
+
+ );
+
+ const select = screen.getByRole('combobox');
+ expect(select).toHaveClass('MuiInputBase-sizeSmall');
+ });
+
+ it('respects disabled prop', () => {
+ renderWithTheme(
+
+ );
+
+ const select = screen.getByRole('combobox');
+ expect(select).toBeDisabled();
+ });
+
+ it('handles empty language list gracefully', async () => {
+ mockOcrService.getAvailableLanguages.mockResolvedValue({
+ data: {
+ languages: [],
+ current_user_language: null,
+ },
+ });
+
+ renderWithTheme();
+
+ await waitFor(() => {
+ expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1);
+ });
+
+ // Open the select dropdown
+ fireEvent.mouseDown(screen.getByRole('combobox'));
+
+ await waitFor(() => {
+ expect(screen.getByText('No languages available')).toBeInTheDocument();
+ });
+ });
+
+ it('displays selected language correctly', async () => {
+ renderWithTheme(
+
+ );
+
+ await waitFor(() => {
+ expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1);
+ });
+
+ // The selected value should be displayed
+ expect(screen.getByDisplayValue('spa')).toBeInTheDocument();
+ });
+
+ it('handles network errors gracefully', async () => {
+ const networkError = new Error('Network Error');
+ networkError.name = 'NetworkError';
+ mockOcrService.getAvailableLanguages.mockRejectedValue(networkError);
+
+ renderWithTheme();
+
+ await waitFor(() => {
+ expect(screen.getByText('Failed to load languages')).toBeInTheDocument();
+ expect(screen.getByText('Check your internet connection')).toBeInTheDocument();
+ });
+ });
+
+ it('clears selection when value is empty string', async () => {
+ renderWithTheme(
+
+ );
+
+ await waitFor(() => {
+ expect(mockOcrService.getAvailableLanguages).toHaveBeenCalledTimes(1);
+ });
+
+ const select = screen.getByRole('combobox');
+ expect(select).toHaveValue('');
+ });
+});
\ No newline at end of file
diff --git a/frontend/src/components/OcrRetryDialog/__tests__/OcrRetryDialog.test.tsx b/frontend/src/components/OcrRetryDialog/__tests__/OcrRetryDialog.test.tsx
new file mode 100644
index 0000000..4b42191
--- /dev/null
+++ b/frontend/src/components/OcrRetryDialog/__tests__/OcrRetryDialog.test.tsx
@@ -0,0 +1,374 @@
+import React from 'react';
+import { render, screen, fireEvent, waitFor } from '@testing-library/react';
+import { vi, describe, it, expect, beforeEach, afterEach } from 'vitest';
+import { ThemeProvider, createTheme } from '@mui/material/styles';
+import OcrRetryDialog from '../OcrRetryDialog';
+import { ocrService } from '../../../services/api';
+
+// Mock the API service
+vi.mock('../../../services/api', () => ({
+ ocrService: {
+ getAvailableLanguages: vi.fn(),
+ retryWithLanguage: vi.fn(),
+ },
+}));
+
+// Mock the OcrLanguageSelector component
+vi.mock('../../OcrLanguageSelector', () => ({
+ default: ({ value, onChange, ...props }: any) => (
+
+
+
+ ),
+}));
+
+const mockOcrService = vi.mocked(ocrService);
+
+const theme = createTheme();
+
+const renderWithTheme = (component: React.ReactElement) => {
+ return render(
+
+ {component}
+
+ );
+};
+
+describe('OcrRetryDialog', () => {
+ const mockDocument = {
+ id: 'doc-123',
+ filename: 'test-document.pdf',
+ original_filename: 'test-document.pdf',
+ failure_category: 'Language Detection Failed',
+ ocr_error: 'Unable to detect text language',
+ retry_count: 2,
+ };
+
+ const defaultProps = {
+ open: true,
+ onClose: vi.fn(),
+ document: mockDocument,
+ onRetrySuccess: vi.fn(),
+ onRetryError: vi.fn(),
+ };
+
+ const mockRetryResponse = {
+ data: {
+ success: true,
+ message: 'OCR retry queued successfully',
+ estimated_wait_minutes: 5,
+ },
+ };
+
+ beforeEach(() => {
+ vi.clearAllMocks();
+ mockOcrService.retryWithLanguage.mockResolvedValue(mockRetryResponse);
+ });
+
+ afterEach(() => {
+ vi.clearAllMocks();
+ });
+
+ it('renders dialog when open is true', () => {
+ renderWithTheme();
+
+ expect(screen.getByText('Retry OCR Processing')).toBeInTheDocument();
+ expect(screen.getByText('Document: test-document.pdf')).toBeInTheDocument();
+ expect(screen.getByText('Previous attempts: 2')).toBeInTheDocument();
+ });
+
+ it('does not render dialog when open is false', () => {
+ renderWithTheme();
+
+ expect(screen.queryByText('Retry OCR Processing')).not.toBeInTheDocument();
+ });
+
+ it('does not render when document is null', () => {
+ renderWithTheme();
+
+ expect(screen.queryByText('Retry OCR Processing')).not.toBeInTheDocument();
+ });
+
+ it('displays document information correctly', () => {
+ renderWithTheme();
+
+ expect(screen.getByText('Document: test-document.pdf')).toBeInTheDocument();
+ expect(screen.getByText('Previous attempts: 2')).toBeInTheDocument();
+ expect(screen.getByText('Previous failure: Language Detection Failed')).toBeInTheDocument();
+ expect(screen.getByText('Unable to detect text language')).toBeInTheDocument();
+ });
+
+ it('renders language selector', () => {
+ renderWithTheme();
+
+ expect(screen.getByTestId('ocr-language-selector')).toBeInTheDocument();
+ expect(screen.getByText('OCR Language Selection')).toBeInTheDocument();
+ });
+
+ it('handles language selection', () => {
+ renderWithTheme();
+
+ const languageSelect = screen.getByTestId('language-select');
+ fireEvent.change(languageSelect, { target: { value: 'spa' } });
+
+ expect(languageSelect).toHaveValue('spa');
+ });
+
+ it('calls onRetrySuccess when retry succeeds', async () => {
+ const mockOnRetrySuccess = vi.fn();
+ renderWithTheme(
+
+ );
+
+ // Select a language
+ const languageSelect = screen.getByTestId('language-select');
+ fireEvent.change(languageSelect, { target: { value: 'spa' } });
+
+ // Click retry button
+ fireEvent.click(screen.getByText('Retry OCR'));
+
+ await waitFor(() => {
+ expect(mockOcrService.retryWithLanguage).toHaveBeenCalledWith('doc-123', 'spa');
+ expect(mockOnRetrySuccess).toHaveBeenCalledWith(
+ 'OCR retry queued for "test-document.pdf" with language "Spanish". Estimated wait time: 5 minutes.'
+ );
+ });
+ });
+
+ it('calls onRetrySuccess without language info when no language selected', async () => {
+ const mockOnRetrySuccess = vi.fn();
+ renderWithTheme(
+
+ );
+
+ // Click retry button without selecting language
+ fireEvent.click(screen.getByText('Retry OCR'));
+
+ await waitFor(() => {
+ expect(mockOcrService.retryWithLanguage).toHaveBeenCalledWith('doc-123', undefined);
+ expect(mockOnRetrySuccess).toHaveBeenCalledWith(
+ 'OCR retry queued for "test-document.pdf". Estimated wait time: 5 minutes.'
+ );
+ });
+ });
+
+ it('handles retry failure', async () => {
+ const mockError = new Error('Retry failed');
+ mockOcrService.retryWithLanguage.mockRejectedValue(mockError);
+ const mockOnRetryError = vi.fn();
+
+ renderWithTheme(
+
+ );
+
+ fireEvent.click(screen.getByText('Retry OCR'));
+
+ await waitFor(() => {
+ expect(mockOnRetryError).toHaveBeenCalledWith('Failed to retry OCR processing');
+ });
+ });
+
+ it('handles API error response', async () => {
+ const mockErrorResponse = {
+ response: {
+ data: {
+ message: 'Document not found',
+ },
+ },
+ };
+ mockOcrService.retryWithLanguage.mockRejectedValue(mockErrorResponse);
+ const mockOnRetryError = vi.fn();
+
+ renderWithTheme(
+
+ );
+
+ fireEvent.click(screen.getByText('Retry OCR'));
+
+ await waitFor(() => {
+ expect(mockOnRetryError).toHaveBeenCalledWith('Document not found');
+ });
+ });
+
+ it('handles unsuccessful retry response', async () => {
+ mockOcrService.retryWithLanguage.mockResolvedValue({
+ data: {
+ success: false,
+ message: 'Queue is full',
+ },
+ });
+ const mockOnRetryError = vi.fn();
+
+ renderWithTheme(
+
+ );
+
+ fireEvent.click(screen.getByText('Retry OCR'));
+
+ await waitFor(() => {
+ expect(mockOnRetryError).toHaveBeenCalledWith('Queue is full');
+ });
+ });
+
+ it('shows loading state during retry', async () => {
+ // Make the API call hang
+ mockOcrService.retryWithLanguage.mockImplementation(() => new Promise(() => {}));
+
+ renderWithTheme();
+
+ fireEvent.click(screen.getByText('Retry OCR'));
+
+ await waitFor(() => {
+ expect(screen.getByText('Retrying...')).toBeInTheDocument();
+ });
+
+ // Buttons should be disabled during retry
+ expect(screen.getByText('Cancel')).toBeDisabled();
+ expect(screen.getByText('Retrying...')).toBeDisabled();
+ });
+
+ it('prevents closing dialog during retry', async () => {
+ // Make the API call hang
+ mockOcrService.retryWithLanguage.mockImplementation(() => new Promise(() => {}));
+ const mockOnClose = vi.fn();
+
+ renderWithTheme(
+
+ );
+
+ fireEvent.click(screen.getByText('Retry OCR'));
+
+ // Try to close via cancel button
+ fireEvent.click(screen.getByText('Cancel'));
+
+ // Should not call onClose during retry
+ expect(mockOnClose).not.toHaveBeenCalled();
+ });
+
+ it('calls onClose when cancel is clicked', () => {
+ const mockOnClose = vi.fn();
+ renderWithTheme(
+
+ );
+
+ fireEvent.click(screen.getByText('Cancel'));
+
+ expect(mockOnClose).toHaveBeenCalledTimes(1);
+ });
+
+ it('clears selected language when dialog closes', () => {
+ const mockOnClose = vi.fn();
+ renderWithTheme(
+
+ );
+
+ // Select a language
+ const languageSelect = screen.getByTestId('language-select');
+ fireEvent.change(languageSelect, { target: { value: 'spa' } });
+
+ // Close dialog
+ fireEvent.click(screen.getByText('Cancel'));
+
+ expect(mockOnClose).toHaveBeenCalled();
+ });
+
+ it('closes dialog after successful retry', async () => {
+ const mockOnClose = vi.fn();
+ renderWithTheme(
+
+ );
+
+ fireEvent.click(screen.getByText('Retry OCR'));
+
+ await waitFor(() => {
+ expect(mockOnClose).toHaveBeenCalledTimes(1);
+ });
+ });
+
+ it('displays informational message about retry process', () => {
+ renderWithTheme();
+
+ expect(screen.getByText(/The retry will use enhanced OCR processing/)).toBeInTheDocument();
+ });
+
+ it('handles document without failure category', () => {
+ const documentWithoutFailure = {
+ ...mockDocument,
+ failure_category: '',
+ ocr_error: '',
+ };
+
+ renderWithTheme(
+
+ );
+
+ expect(screen.getByText('Document: test-document.pdf')).toBeInTheDocument();
+ expect(screen.queryByText('Previous failure:')).not.toBeInTheDocument();
+ });
+
+ it('handles missing estimated wait time in response', async () => {
+ mockOcrService.retryWithLanguage.mockResolvedValue({
+ data: {
+ success: true,
+ message: 'OCR retry queued successfully',
+ // No estimated_wait_minutes
+ },
+ });
+
+ const mockOnRetrySuccess = vi.fn();
+ renderWithTheme(
+
+ );
+
+ fireEvent.click(screen.getByText('Retry OCR'));
+
+ await waitFor(() => {
+ expect(mockOnRetrySuccess).toHaveBeenCalledWith(
+ 'OCR retry queued for "test-document.pdf". Estimated wait time: Unknown minutes.'
+ );
+ });
+ });
+});
\ No newline at end of file
diff --git a/frontend/src/pages/__tests__/SettingsPage.integration.test.tsx b/frontend/src/pages/__tests__/SettingsPage.integration.test.tsx
new file mode 100644
index 0000000..6ad4ec5
--- /dev/null
+++ b/frontend/src/pages/__tests__/SettingsPage.integration.test.tsx
@@ -0,0 +1,368 @@
+import React from 'react';
+import { render, screen, fireEvent, waitFor } from '@testing-library/react';
+import { vi, describe, it, expect, beforeEach, afterEach } from 'vitest';
+import { BrowserRouter } from 'react-router-dom';
+import { ThemeProvider, createTheme } from '@mui/material/styles';
+import SettingsPage from '../SettingsPage';
+import { AuthContext } from '../../contexts/AuthContext';
+import api, { ocrService } from '../../services/api';
+
+// Mock the API
+vi.mock('../../services/api', () => ({
+ default: {
+ get: vi.fn(),
+ post: vi.fn(),
+ put: vi.fn(),
+ },
+ ocrService: {
+ getAvailableLanguages: vi.fn(),
+ },
+ queueService: {
+ getQueueStats: vi.fn(),
+ },
+}));
+
+const mockedApi = vi.mocked(api);
+const mockedOcrService = vi.mocked(ocrService);
+
+const theme = createTheme();
+
+const mockAuthContext = {
+ user: {
+ id: 'user-123',
+ username: 'testuser',
+ email: 'test@example.com',
+ created_at: '2023-01-01T00:00:00Z',
+ },
+ login: vi.fn(),
+ logout: vi.fn(),
+ loading: false,
+};
+
+const renderWithProviders = (component: React.ReactElement) => {
+ return render(
+
+
+
+ {component}
+
+
+
+ );
+};
+
+describe('Settings Page - OCR Language Integration', () => {
+ const mockSettingsResponse = {
+ data: {
+ ocrLanguage: 'eng',
+ concurrentOcrJobs: 2,
+ ocrTimeoutSeconds: 300,
+ maxFileSizeMb: 50,
+ allowedFileTypes: ['pdf', 'png', 'jpg'],
+ autoRotateImages: true,
+ enableImagePreprocessing: true,
+ searchResultsPerPage: 20,
+ searchSnippetLength: 200,
+ fuzzySearchThreshold: 0.7,
+ retentionDays: null,
+ enableAutoCleanup: false,
+ enableCompression: true,
+ memoryLimitMb: 1024,
+ cpuPriority: 'normal',
+ enableBackgroundOcr: true,
+ ocrPageSegmentationMode: 3,
+ ocrEngineMode: 3,
+ ocrMinConfidence: 30,
+ ocrDpi: 300,
+ ocrEnhanceContrast: true,
+ ocrRemoveNoise: true,
+ ocrDetectOrientation: true,
+ ocrWhitelistChars: '',
+ ocrBlacklistChars: '',
+ ocrBrightnessBoost: 0,
+ ocrContrastMultiplier: 1.0,
+ ocrNoiseReductionLevel: 1,
+ ocrSharpeningStrength: 0,
+ ocrMorphologicalOperations: false,
+ ocrAdaptiveThresholdWindowSize: 15,
+ ocrHistogramEqualization: false,
+ ocrUpscaleFactor: 1.0,
+ ocrMaxImageWidth: 4000,
+ ocrMaxImageHeight: 4000,
+ saveProcessedImages: false,
+ ocrQualityThresholdBrightness: 50,
+ ocrQualityThresholdContrast: 20,
+ ocrQualityThresholdNoise: 80,
+ ocrQualityThresholdSharpness: 30,
+ ocrSkipEnhancement: false,
+ },
+ };
+
+ const mockLanguagesResponse = {
+ data: {
+ languages: [
+ { code: 'eng', name: 'English' },
+ { code: 'spa', name: 'Spanish' },
+ { code: 'fra', name: 'French' },
+ { code: 'deu', name: 'German' },
+ { code: 'ita', name: 'Italian' },
+ ],
+ current_user_language: 'eng',
+ },
+ };
+
+ const mockQueueStatsResponse = {
+ data: {
+ total_jobs: 0,
+ pending_jobs: 0,
+ processing_jobs: 0,
+ completed_jobs: 0,
+ failed_jobs: 0,
+ },
+ };
+
+ beforeEach(() => {
+ vi.clearAllMocks();
+ mockedApi.get.mockImplementation((url) => {
+ if (url === '/settings') return Promise.resolve(mockSettingsResponse);
+ if (url === '/labels?include_counts=true') return Promise.resolve({ data: [] });
+ return Promise.reject(new Error(`Unexpected GET request to ${url}`));
+ });
+ mockedOcrService.getAvailableLanguages.mockResolvedValue(mockLanguagesResponse);
+ vi.mocked(require('../../services/api').queueService.getQueueStats).mockResolvedValue(mockQueueStatsResponse);
+ });
+
+ afterEach(() => {
+ vi.clearAllMocks();
+ });
+
+ it('loads and displays current OCR language in settings', async () => {
+ renderWithProviders();
+
+ await waitFor(() => {
+ expect(mockedApi.get).toHaveBeenCalledWith('/settings');
+ expect(mockedOcrService.getAvailableLanguages).toHaveBeenCalled();
+ });
+
+ // Should display the OCR language selector
+ expect(screen.getByText('OCR Language')).toBeInTheDocument();
+ });
+
+ it('successfully changes OCR language and saves settings', async () => {
+ const mockUpdateResponse = { data: { success: true } };
+ mockedApi.put.mockResolvedValueOnce(mockUpdateResponse);
+
+ renderWithProviders();
+
+ // Wait for page to load
+ await waitFor(() => {
+ expect(screen.getByText('OCR Language')).toBeInTheDocument();
+ });
+
+ // Find and open the language selector
+ const languageSelector = screen.getByLabelText('OCR Language');
+ fireEvent.mouseDown(languageSelector);
+
+ // Wait for dropdown options to appear
+ await waitFor(() => {
+ expect(screen.getByText('Spanish')).toBeInTheDocument();
+ });
+
+ // Select Spanish
+ fireEvent.click(screen.getByText('Spanish'));
+
+ // Find and click the save button
+ const saveButton = screen.getByText('Save Changes');
+ fireEvent.click(saveButton);
+
+ // Verify the API call was made with updated settings
+ await waitFor(() => {
+ expect(mockedApi.put).toHaveBeenCalledWith('/settings', {
+ ...mockSettingsResponse.data,
+ ocrLanguage: 'spa',
+ });
+ });
+
+ // Should show success message
+ await waitFor(() => {
+ expect(screen.getByText('Settings saved successfully')).toBeInTheDocument();
+ });
+ });
+
+ it('handles OCR language loading errors gracefully', async () => {
+ mockedOcrService.getAvailableLanguages.mockRejectedValueOnce(new Error('Failed to load languages'));
+
+ renderWithProviders();
+
+ await waitFor(() => {
+ expect(mockedOcrService.getAvailableLanguages).toHaveBeenCalled();
+ });
+
+ // Should still render the page but with error state in language selector
+ expect(screen.getByText('OCR Language')).toBeInTheDocument();
+ });
+
+ it('handles settings save errors appropriately', async () => {
+ const mockError = new Error('Failed to save settings');
+ mockedApi.put.mockRejectedValueOnce(mockError);
+
+ renderWithProviders();
+
+ // Wait for page to load
+ await waitFor(() => {
+ expect(screen.getByText('OCR Language')).toBeInTheDocument();
+ });
+
+ // Change a setting
+ const languageSelector = screen.getByLabelText('OCR Language');
+ fireEvent.mouseDown(languageSelector);
+
+ await waitFor(() => {
+ expect(screen.getByText('French')).toBeInTheDocument();
+ });
+
+ fireEvent.click(screen.getByText('French'));
+
+ // Try to save
+ const saveButton = screen.getByText('Save Changes');
+ fireEvent.click(saveButton);
+
+ // Should show error message
+ await waitFor(() => {
+ expect(screen.getByText(/Failed to save settings/)).toBeInTheDocument();
+ });
+ });
+
+ it('preserves other settings when changing OCR language', async () => {
+ const mockUpdateResponse = { data: { success: true } };
+ mockedApi.put.mockResolvedValueOnce(mockUpdateResponse);
+
+ renderWithProviders();
+
+ // Wait for page to load
+ await waitFor(() => {
+ expect(screen.getByText('OCR Language')).toBeInTheDocument();
+ });
+
+ // Change OCR language
+ const languageSelector = screen.getByLabelText('OCR Language');
+ fireEvent.mouseDown(languageSelector);
+
+ await waitFor(() => {
+ expect(screen.getByText('German')).toBeInTheDocument();
+ });
+
+ fireEvent.click(screen.getByText('German'));
+
+ // Save settings
+ const saveButton = screen.getByText('Save Changes');
+ fireEvent.click(saveButton);
+
+ // Verify all original settings are preserved except OCR language
+ await waitFor(() => {
+ expect(mockedApi.put).toHaveBeenCalledWith('/settings', {
+ ...mockSettingsResponse.data,
+ ocrLanguage: 'deu',
+ });
+ });
+ });
+
+ it('shows loading state while fetching languages', async () => {
+ // Make the language fetch hang
+ mockedOcrService.getAvailableLanguages.mockImplementation(() => new Promise(() => {}));
+
+ renderWithProviders();
+
+ await waitFor(() => {
+ expect(screen.getByText('OCR Language')).toBeInTheDocument();
+ });
+
+ // Should show loading indicator in the language selector
+ expect(screen.getByTestId('loading-languages')).toBeInTheDocument();
+ });
+
+ it('handles empty language list', async () => {
+ mockedOcrService.getAvailableLanguages.mockResolvedValueOnce({
+ data: {
+ languages: [],
+ current_user_language: null,
+ },
+ });
+
+ renderWithProviders();
+
+ await waitFor(() => {
+ expect(mockedOcrService.getAvailableLanguages).toHaveBeenCalled();
+ });
+
+ // Should still render the language selector
+ expect(screen.getByText('OCR Language')).toBeInTheDocument();
+
+ // Open the dropdown
+ const languageSelector = screen.getByLabelText('OCR Language');
+ fireEvent.mouseDown(languageSelector);
+
+ // Should show "No languages available"
+ await waitFor(() => {
+ expect(screen.getByText('No languages available')).toBeInTheDocument();
+ });
+ });
+
+ it('indicates current user language in the dropdown', async () => {
+ renderWithProviders();
+
+ await waitFor(() => {
+ expect(screen.getByText('OCR Language')).toBeInTheDocument();
+ });
+
+ // Open the language selector
+ const languageSelector = screen.getByLabelText('OCR Language');
+ fireEvent.mouseDown(languageSelector);
+
+ // Should show current language indicator
+ await waitFor(() => {
+ expect(screen.getByText('(Current)')).toBeInTheDocument();
+ });
+ });
+
+ it('updates language selector when settings are reloaded', async () => {
+ const { rerender } = renderWithProviders();
+
+ // Initial load
+ await waitFor(() => {
+ expect(screen.getByText('OCR Language')).toBeInTheDocument();
+ });
+
+ // Update mock to return different language
+ const updatedSettingsResponse = {
+ ...mockSettingsResponse,
+ data: {
+ ...mockSettingsResponse.data,
+ ocrLanguage: 'spa',
+ },
+ };
+
+ mockedApi.get.mockImplementation((url) => {
+ if (url === '/settings') return Promise.resolve(updatedSettingsResponse);
+ if (url === '/labels?include_counts=true') return Promise.resolve({ data: [] });
+ return Promise.reject(new Error(`Unexpected GET request to ${url}`));
+ });
+
+ // Rerender component
+ rerender(
+
+
+
+
+
+
+
+ );
+
+ // Should reflect the updated language
+ await waitFor(() => {
+ const languageSelector = screen.getByLabelText('OCR Language');
+ expect(languageSelector).toHaveValue('spa');
+ });
+ });
+});
\ No newline at end of file
diff --git a/frontend/src/services/__tests__/api-ocr.test.ts b/frontend/src/services/__tests__/api-ocr.test.ts
new file mode 100644
index 0000000..434c099
--- /dev/null
+++ b/frontend/src/services/__tests__/api-ocr.test.ts
@@ -0,0 +1,335 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import axios from 'axios';
+import { ocrService } from '../api';
+
+// Mock axios
+vi.mock('axios');
+const mockedAxios = vi.mocked(axios);
+
+describe('OCR API Service', () => {
+ beforeEach(() => {
+ vi.clearAllMocks();
+ });
+
+ afterEach(() => {
+ vi.clearAllMocks();
+ });
+
+ describe('getAvailableLanguages', () => {
+ it('should fetch available languages successfully', async () => {
+ const mockResponse = {
+ data: {
+ languages: [
+ { code: 'eng', name: 'English' },
+ { code: 'spa', name: 'Spanish' },
+ { code: 'fra', name: 'French' },
+ ],
+ current_user_language: 'eng',
+ },
+ status: 200,
+ };
+
+ mockedAxios.get.mockResolvedValueOnce(mockResponse);
+
+ const result = await ocrService.getAvailableLanguages();
+
+ expect(mockedAxios.get).toHaveBeenCalledWith('/ocr/languages');
+ expect(result).toEqual(mockResponse);
+ });
+
+ it('should handle network errors', async () => {
+ const networkError = new Error('Network Error');
+ mockedAxios.get.mockRejectedValueOnce(networkError);
+
+ await expect(ocrService.getAvailableLanguages()).rejects.toThrow('Network Error');
+ expect(mockedAxios.get).toHaveBeenCalledWith('/ocr/languages');
+ });
+
+ it('should handle empty language list', async () => {
+ const mockResponse = {
+ data: {
+ languages: [],
+ current_user_language: null,
+ },
+ status: 200,
+ };
+
+ mockedAxios.get.mockResolvedValueOnce(mockResponse);
+
+ const result = await ocrService.getAvailableLanguages();
+
+ expect(result.data.languages).toEqual([]);
+ expect(result.data.current_user_language).toBeNull();
+ });
+ });
+
+ describe('getHealthStatus', () => {
+ it('should fetch OCR health status successfully', async () => {
+ const mockResponse = {
+ data: {
+ status: 'healthy',
+ tesseract_version: '5.3.0',
+ available_languages: ['eng', 'spa', 'fra'],
+ },
+ status: 200,
+ };
+
+ mockedAxios.get.mockResolvedValueOnce(mockResponse);
+
+ const result = await ocrService.getHealthStatus();
+
+ expect(mockedAxios.get).toHaveBeenCalledWith('/ocr/health');
+ expect(result).toEqual(mockResponse);
+ });
+
+ it('should handle unhealthy OCR service', async () => {
+ const mockResponse = {
+ data: {
+ status: 'unhealthy',
+ error: 'Tesseract not found',
+ },
+ status: 503,
+ };
+
+ mockedAxios.get.mockResolvedValueOnce(mockResponse);
+
+ const result = await ocrService.getHealthStatus();
+
+ expect(result.data.status).toBe('unhealthy');
+ expect(result.data.error).toBe('Tesseract not found');
+ });
+ });
+
+ describe('retryWithLanguage', () => {
+ const documentId = 'doc-123';
+
+ it('should retry OCR without language parameter', async () => {
+ const mockResponse = {
+ data: {
+ success: true,
+ message: 'OCR retry queued successfully',
+ queue_id: 'queue-456',
+ estimated_wait_minutes: 5,
+ },
+ status: 200,
+ };
+
+ mockedAxios.post.mockResolvedValueOnce(mockResponse);
+
+ const result = await ocrService.retryWithLanguage(documentId);
+
+ expect(mockedAxios.post).toHaveBeenCalledWith(
+ `/documents/${documentId}/retry-ocr`,
+ {}
+ );
+ expect(result).toEqual(mockResponse);
+ });
+
+ it('should retry OCR with language parameter', async () => {
+ const language = 'spa';
+ const mockResponse = {
+ data: {
+ success: true,
+ message: 'OCR retry queued successfully',
+ queue_id: 'queue-456',
+ estimated_wait_minutes: 3,
+ },
+ status: 200,
+ };
+
+ mockedAxios.post.mockResolvedValueOnce(mockResponse);
+
+ const result = await ocrService.retryWithLanguage(documentId, language);
+
+ expect(mockedAxios.post).toHaveBeenCalledWith(
+ `/documents/${documentId}/retry-ocr`,
+ { language: 'spa' }
+ );
+ expect(result).toEqual(mockResponse);
+ });
+
+ it('should handle retry failure', async () => {
+ const errorResponse = {
+ response: {
+ data: {
+ success: false,
+ message: 'Document not found',
+ },
+ status: 404,
+ },
+ };
+
+ mockedAxios.post.mockRejectedValueOnce(errorResponse);
+
+ await expect(ocrService.retryWithLanguage(documentId)).rejects.toEqual(errorResponse);
+ });
+
+ it('should handle queue full error', async () => {
+ const errorResponse = {
+ response: {
+ data: {
+ success: false,
+ message: 'OCR queue is currently full. Please try again later.',
+ },
+ status: 429,
+ },
+ };
+
+ mockedAxios.post.mockRejectedValueOnce(errorResponse);
+
+ await expect(ocrService.retryWithLanguage(documentId, 'eng')).rejects.toEqual(errorResponse);
+ });
+
+ it('should handle invalid language error', async () => {
+ const errorResponse = {
+ response: {
+ data: {
+ success: false,
+ message: 'Language "invalid" is not supported',
+ },
+ status: 400,
+ },
+ };
+
+ mockedAxios.post.mockRejectedValueOnce(errorResponse);
+
+ await expect(ocrService.retryWithLanguage(documentId, 'invalid')).rejects.toEqual(errorResponse);
+ });
+
+ it('should handle network timeout', async () => {
+ const timeoutError = new Error('timeout of 10000ms exceeded');
+ timeoutError.name = 'TimeoutError';
+
+ mockedAxios.post.mockRejectedValueOnce(timeoutError);
+
+ await expect(ocrService.retryWithLanguage(documentId)).rejects.toThrow('timeout of 10000ms exceeded');
+ });
+
+ it('should handle empty string language as undefined', async () => {
+ const mockResponse = {
+ data: {
+ success: true,
+ message: 'OCR retry queued successfully',
+ },
+ status: 200,
+ };
+
+ mockedAxios.post.mockResolvedValueOnce(mockResponse);
+
+ await ocrService.retryWithLanguage(documentId, '');
+
+ expect(mockedAxios.post).toHaveBeenCalledWith(
+ `/documents/${documentId}/retry-ocr`,
+ {}
+ );
+ });
+
+ it('should preserve language whitespace and special characters', async () => {
+ const language = 'chi_sim'; // Chinese Simplified
+ const mockResponse = {
+ data: {
+ success: true,
+ message: 'OCR retry queued successfully',
+ },
+ status: 200,
+ };
+
+ mockedAxios.post.mockResolvedValueOnce(mockResponse);
+
+ await ocrService.retryWithLanguage(documentId, language);
+
+ expect(mockedAxios.post).toHaveBeenCalledWith(
+ `/documents/${documentId}/retry-ocr`,
+ { language: 'chi_sim' }
+ );
+ });
+ });
+
+ describe('Error Handling', () => {
+ it('should handle 401 unauthorized errors', async () => {
+ const unauthorizedError = {
+ response: {
+ status: 401,
+ data: {
+ message: 'Unauthorized',
+ },
+ },
+ };
+
+ mockedAxios.get.mockRejectedValueOnce(unauthorizedError);
+
+ await expect(ocrService.getAvailableLanguages()).rejects.toEqual(unauthorizedError);
+ });
+
+ it('should handle 403 forbidden errors', async () => {
+ const forbiddenError = {
+ response: {
+ status: 403,
+ data: {
+ message: 'Insufficient permissions',
+ },
+ },
+ };
+
+ mockedAxios.get.mockRejectedValueOnce(forbiddenError);
+
+ await expect(ocrService.getHealthStatus()).rejects.toEqual(forbiddenError);
+ });
+
+ it('should handle 500 internal server errors', async () => {
+ const serverError = {
+ response: {
+ status: 500,
+ data: {
+ message: 'Internal server error',
+ },
+ },
+ };
+
+ mockedAxios.post.mockRejectedValueOnce(serverError);
+
+ await expect(ocrService.retryWithLanguage('doc-123')).rejects.toEqual(serverError);
+ });
+
+ it('should handle malformed response data', async () => {
+ const malformedResponse = {
+ data: null,
+ status: 200,
+ };
+
+ mockedAxios.get.mockResolvedValueOnce(malformedResponse);
+
+ const result = await ocrService.getAvailableLanguages();
+ expect(result.data).toBeNull();
+ });
+ });
+
+ describe('Request Configuration', () => {
+ it('should use correct base URL', async () => {
+ const mockResponse = { data: {}, status: 200 };
+ mockedAxios.get.mockResolvedValueOnce(mockResponse);
+
+ await ocrService.getAvailableLanguages();
+
+ expect(mockedAxios.get).toHaveBeenCalledWith('/ocr/languages');
+ });
+
+ it('should handle concurrent requests', async () => {
+ const mockResponse = { data: {}, status: 200 };
+ mockedAxios.get.mockResolvedValue(mockResponse);
+ mockedAxios.post.mockResolvedValue(mockResponse);
+
+ const requests = [
+ ocrService.getAvailableLanguages(),
+ ocrService.getHealthStatus(),
+ ocrService.retryWithLanguage('doc-1', 'eng'),
+ ocrService.retryWithLanguage('doc-2', 'spa'),
+ ];
+
+ await Promise.all(requests);
+
+ expect(mockedAxios.get).toHaveBeenCalledTimes(2);
+ expect(mockedAxios.post).toHaveBeenCalledTimes(2);
+ });
+ });
+});
\ No newline at end of file
diff --git a/src/ocr/tests.rs b/src/ocr/tests.rs
index 13f3920..5ae4738 100644
--- a/src/ocr/tests.rs
+++ b/src/ocr/tests.rs
@@ -8,6 +8,9 @@ mod tests {
use tempfile::TempDir;
use std::fs;
+ // Include language validation tests
+ mod language_validation_tests;
+
#[test]
fn test_ocr_error_types() {
// Test error creation and properties
diff --git a/src/ocr/tests/language_validation_tests.rs b/src/ocr/tests/language_validation_tests.rs
new file mode 100644
index 0000000..d8e766b
--- /dev/null
+++ b/src/ocr/tests/language_validation_tests.rs
@@ -0,0 +1,298 @@
+#[cfg(test)]
+mod language_validation_tests {
+ use super::super::health::{OcrHealthChecker, OcrError};
+ use std::path::Path;
+ use tempfile::TempDir;
+ use std::fs;
+
+ fn create_test_health_checker() -> (OcrHealthChecker, TempDir) {
+ let temp_dir = TempDir::new().expect("Failed to create temp directory");
+ let tessdata_path = temp_dir.path().join("tessdata");
+ fs::create_dir_all(&tessdata_path).expect("Failed to create tessdata directory");
+
+ // Create mock language files
+ let language_files = vec![
+ "eng.traineddata",
+ "spa.traineddata",
+ "fra.traineddata",
+ "deu.traineddata",
+ "chi_sim.traineddata",
+ ];
+
+ for file in language_files {
+ fs::write(tessdata_path.join(file), "mock data")
+ .expect("Failed to create mock language file");
+ }
+
+ let health_checker = OcrHealthChecker::new(tessdata_path);
+ (health_checker, temp_dir)
+ }
+
+ #[test]
+ fn test_get_available_languages_success() {
+ let (health_checker, _temp_dir) = create_test_health_checker();
+
+ let result = health_checker.get_available_languages();
+ assert!(result.is_ok());
+
+ let languages = result.unwrap();
+ assert_eq!(languages.len(), 5);
+ assert!(languages.contains(&"eng".to_string()));
+ assert!(languages.contains(&"spa".to_string()));
+ assert!(languages.contains(&"fra".to_string()));
+ assert!(languages.contains(&"deu".to_string()));
+ assert!(languages.contains(&"chi_sim".to_string()));
+ }
+
+ #[test]
+ fn test_get_available_languages_empty_directory() {
+ let temp_dir = TempDir::new().expect("Failed to create temp directory");
+ let tessdata_path = temp_dir.path().join("tessdata");
+ fs::create_dir_all(&tessdata_path).expect("Failed to create tessdata directory");
+
+ let health_checker = OcrHealthChecker::new(tessdata_path);
+ let result = health_checker.get_available_languages();
+
+ assert!(result.is_ok());
+ let languages = result.unwrap();
+ assert!(languages.is_empty());
+ }
+
+ #[test]
+ fn test_get_available_languages_nonexistent_directory() {
+ let temp_dir = TempDir::new().expect("Failed to create temp directory");
+ let nonexistent_path = temp_dir.path().join("nonexistent");
+
+ let health_checker = OcrHealthChecker::new(nonexistent_path);
+ let result = health_checker.get_available_languages();
+
+ assert!(result.is_err());
+ match result.unwrap_err() {
+ OcrError::TessdataPathNotFound { .. } => {},
+ _ => panic!("Expected TessdataPathNotFound error"),
+ }
+ }
+
+ #[test]
+ fn test_validate_language_success() {
+ let (health_checker, _temp_dir) = create_test_health_checker();
+
+ // Test valid languages
+ assert!(health_checker.validate_language("eng").is_ok());
+ assert!(health_checker.validate_language("spa").is_ok());
+ assert!(health_checker.validate_language("fra").is_ok());
+ assert!(health_checker.validate_language("deu").is_ok());
+ assert!(health_checker.validate_language("chi_sim").is_ok());
+ }
+
+ #[test]
+ fn test_validate_language_invalid() {
+ let (health_checker, _temp_dir) = create_test_health_checker();
+
+ // Test invalid languages
+ let result = health_checker.validate_language("invalid");
+ assert!(result.is_err());
+ match result.unwrap_err() {
+ OcrError::LanguageDataNotFound { lang } => {
+ assert_eq!(lang, "invalid");
+ },
+ _ => panic!("Expected LanguageDataNotFound error"),
+ }
+ }
+
+ #[test]
+ fn test_validate_language_empty_string() {
+ let (health_checker, _temp_dir) = create_test_health_checker();
+
+ let result = health_checker.validate_language("");
+ assert!(result.is_err());
+ match result.unwrap_err() {
+ OcrError::LanguageDataNotFound { lang } => {
+ assert_eq!(lang, "");
+ },
+ _ => panic!("Expected LanguageDataNotFound error"),
+ }
+ }
+
+ #[test]
+ fn test_validate_language_case_sensitive() {
+ let (health_checker, _temp_dir) = create_test_health_checker();
+
+ // Should be case sensitive
+ assert!(health_checker.validate_language("eng").is_ok());
+
+ let result = health_checker.validate_language("ENG");
+ assert!(result.is_err());
+ match result.unwrap_err() {
+ OcrError::LanguageDataNotFound { lang } => {
+ assert_eq!(lang, "ENG");
+ },
+ _ => panic!("Expected LanguageDataNotFound error"),
+ }
+ }
+
+ #[test]
+ fn test_validate_language_with_special_characters() {
+ let (health_checker, _temp_dir) = create_test_health_checker();
+
+ // chi_sim contains underscore
+ assert!(health_checker.validate_language("chi_sim").is_ok());
+
+ // Test invalid special characters
+ let result = health_checker.validate_language("chi-sim");
+ assert!(result.is_err());
+ }
+
+ #[test]
+ fn test_validate_language_whitespace() {
+ let (health_checker, _temp_dir) = create_test_health_checker();
+
+ // Test with leading/trailing whitespace
+ let result = health_checker.validate_language(" eng ");
+ assert!(result.is_err());
+
+ let result = health_checker.validate_language("eng ");
+ assert!(result.is_err());
+
+ let result = health_checker.validate_language(" eng");
+ assert!(result.is_err());
+ }
+
+ #[test]
+ fn test_get_language_display_name() {
+ let (health_checker, _temp_dir) = create_test_health_checker();
+
+ // Test known language codes
+ assert_eq!(health_checker.get_language_display_name("eng"), "English");
+ assert_eq!(health_checker.get_language_display_name("spa"), "Spanish");
+ assert_eq!(health_checker.get_language_display_name("fra"), "French");
+ assert_eq!(health_checker.get_language_display_name("deu"), "German");
+ assert_eq!(health_checker.get_language_display_name("chi_sim"), "Chinese (Simplified)");
+
+ // Test unknown language code (should return the code itself)
+ assert_eq!(health_checker.get_language_display_name("unknown"), "unknown");
+ }
+
+ #[test]
+ fn test_concurrent_language_validation() {
+ use std::sync::Arc;
+ use std::thread;
+
+ let (health_checker, _temp_dir) = create_test_health_checker();
+ let health_checker = Arc::new(health_checker);
+
+ let mut handles = vec![];
+
+ // Test concurrent validation of different languages
+ for lang in &["eng", "spa", "fra", "deu", "chi_sim"] {
+ let hc = Arc::clone(&health_checker);
+ let lang = lang.to_string();
+ let handle = thread::spawn(move || {
+ hc.validate_language(&lang)
+ });
+ handles.push(handle);
+ }
+
+ // All validations should succeed
+ for handle in handles {
+ let result = handle.join().expect("Thread panicked");
+ assert!(result.is_ok());
+ }
+ }
+
+ #[test]
+ fn test_languages_alphabetically_sorted() {
+ let (health_checker, _temp_dir) = create_test_health_checker();
+
+ let languages = health_checker.get_available_languages().unwrap();
+ let mut sorted_languages = languages.clone();
+ sorted_languages.sort();
+
+ assert_eq!(languages, sorted_languages, "Languages should be sorted alphabetically");
+ }
+
+ #[test]
+ fn test_ignore_non_traineddata_files() {
+ let temp_dir = TempDir::new().expect("Failed to create temp directory");
+ let tessdata_path = temp_dir.path().join("tessdata");
+ fs::create_dir_all(&tessdata_path).expect("Failed to create tessdata directory");
+
+ // Create mix of valid and invalid files
+ let files = vec![
+ "eng.traineddata", // Valid
+ "readme.txt", // Invalid - not .traineddata
+ "spa.traineddata", // Valid
+ "config.json", // Invalid - not .traineddata
+ "fra.backup", // Invalid - not .traineddata
+ "deu.traineddata", // Valid
+ ];
+
+ for file in files {
+ fs::write(tessdata_path.join(file), "mock data")
+ .expect("Failed to create mock file");
+ }
+
+ let health_checker = OcrHealthChecker::new(tessdata_path);
+ let languages = health_checker.get_available_languages().unwrap();
+
+ // Should only include .traineddata files
+ assert_eq!(languages.len(), 3);
+ assert!(languages.contains(&"eng".to_string()));
+ assert!(languages.contains(&"spa".to_string()));
+ assert!(languages.contains(&"deu".to_string()));
+ }
+
+ #[test]
+ fn test_handle_permission_errors() {
+ // This test simulates permission errors by using a non-readable directory
+ // Note: This may not work on all systems, particularly Windows
+ #[cfg(unix)]
+ {
+ use std::os::unix::fs::PermissionsExt;
+
+ let temp_dir = TempDir::new().expect("Failed to create temp directory");
+ let tessdata_path = temp_dir.path().join("tessdata");
+ fs::create_dir_all(&tessdata_path).expect("Failed to create tessdata directory");
+
+ // Remove read permissions
+ let mut perms = fs::metadata(&tessdata_path).unwrap().permissions();
+ perms.set_mode(0o000);
+ fs::set_permissions(&tessdata_path, perms).unwrap();
+
+ let health_checker = OcrHealthChecker::new(&tessdata_path);
+ let result = health_checker.get_available_languages();
+
+ // Should handle permission error gracefully
+ assert!(result.is_err());
+
+ // Restore permissions for cleanup
+ let mut perms = fs::metadata(&tessdata_path).unwrap().permissions();
+ perms.set_mode(0o755);
+ fs::set_permissions(&tessdata_path, perms).unwrap();
+ }
+ }
+
+ #[test]
+ fn test_validate_multiple_languages_batch() {
+ let (health_checker, _temp_dir) = create_test_health_checker();
+
+ let languages_to_test = vec![
+ ("eng", true),
+ ("spa", true),
+ ("fra", true),
+ ("invalid", false),
+ ("", false),
+ ("ENG", false),
+ ("chi_sim", true),
+ ];
+
+ for (lang, should_be_valid) in languages_to_test {
+ let result = health_checker.validate_language(lang);
+ if should_be_valid {
+ assert!(result.is_ok(), "Language '{}' should be valid", lang);
+ } else {
+ assert!(result.is_err(), "Language '{}' should be invalid", lang);
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/ocr/tests/mod.rs b/src/ocr/tests/mod.rs
new file mode 100644
index 0000000..a026801
--- /dev/null
+++ b/src/ocr/tests/mod.rs
@@ -0,0 +1 @@
+pub mod language_validation_tests;
\ No newline at end of file
diff --git a/tests/integration_ocr_language_endpoints.rs b/tests/integration_ocr_language_endpoints.rs
new file mode 100644
index 0000000..9cccf0d
--- /dev/null
+++ b/tests/integration_ocr_language_endpoints.rs
@@ -0,0 +1,529 @@
+use readur2::app::AppState;
+use readur2::config::Config;
+use readur2::db::Database;
+use readur2::ocr::health::OcrHealthChecker;
+use axum::http::StatusCode;
+use axum_test::TestServer;
+use serde_json::json;
+use std::sync::Arc;
+use tempfile::TempDir;
+use std::fs;
+use uuid::Uuid;
+
+struct TestHarness {
+ server: TestServer,
+ _temp_dir: TempDir,
+ user_id: Uuid,
+ token: String,
+}
+
+impl TestHarness {
+ async fn new() -> Self {
+ // Create temporary directory for tessdata
+ let temp_dir = TempDir::new().expect("Failed to create temp directory");
+ let tessdata_path = temp_dir.path().join("tessdata");
+ fs::create_dir_all(&tessdata_path).expect("Failed to create tessdata directory");
+
+ // Create mock language files
+ let language_files = vec![
+ "eng.traineddata",
+ "spa.traineddata",
+ "fra.traineddata",
+ "deu.traineddata",
+ "ita.traineddata",
+ "por.traineddata",
+ ];
+
+ for file in language_files {
+ fs::write(tessdata_path.join(file), "mock language data")
+ .expect("Failed to create mock language file");
+ }
+
+ // Set environment variable for tessdata path
+ std::env::set_var("TESSDATA_PREFIX", &tessdata_path);
+
+ // Create test database
+ let config = Config::from_env().expect("Failed to load config");
+ let db = Database::new(&config.database_url)
+ .await
+ .expect("Failed to connect to database");
+
+ // Create test user
+ let user_id = Uuid::new_v4();
+ let username = format!("testuser_{}", user_id);
+ let email = format!("{}@test.com", username);
+
+ sqlx::query(
+ "INSERT INTO users (id, username, email, password_hash) VALUES ($1, $2, $3, $4)"
+ )
+ .bind(user_id)
+ .bind(&username)
+ .bind(&email)
+ .bind("dummy_hash")
+ .execute(&db.pool)
+ .await
+ .expect("Failed to create test user");
+
+ // Create user settings
+ sqlx::query(
+ "INSERT INTO settings (user_id, ocr_language) VALUES ($1, $2)"
+ )
+ .bind(user_id)
+ .bind("eng")
+ .execute(&db.pool)
+ .await
+ .expect("Failed to create user settings");
+
+ // Create app state
+ let app_state = Arc::new(AppState {
+ db,
+ config,
+ ocr_health_checker: OcrHealthChecker::new(tessdata_path),
+ });
+
+ // Create test server
+ let app = readur2::app::create_app(app_state);
+ let server = TestServer::new(app).expect("Failed to create test server");
+
+ // Generate a test token (simplified for testing)
+ let token = format!("test_token_{}", user_id);
+
+ Self {
+ server,
+ _temp_dir: temp_dir,
+ user_id,
+ token,
+ }
+ }
+
+ async fn cleanup(&self) {
+ // Clean up test user
+ sqlx::query("DELETE FROM users WHERE id = $1")
+ .bind(self.user_id)
+ .execute(&self.server.into_inner().extract::>().unwrap().db.pool)
+ .await
+ .expect("Failed to cleanup test user");
+ }
+}
+
+#[tokio::test]
+async fn test_get_available_languages_success() {
+ let harness = TestHarness::new().await;
+
+ let response = harness
+ .server
+ .get("/api/ocr/languages")
+ .add_header("Authorization", &format!("Bearer {}", harness.token))
+ .await;
+
+ assert_eq!(response.status_code(), StatusCode::OK);
+
+ let body: serde_json::Value = response.json();
+ assert!(body.get("languages").is_some());
+
+ let languages = body["languages"].as_array().unwrap();
+ assert!(languages.len() >= 6); // We created 6 mock languages
+
+ // Check that languages have the expected structure
+ for lang in languages {
+ assert!(lang.get("code").is_some());
+ assert!(lang.get("name").is_some());
+ }
+
+ // Check that English is included
+ let has_english = languages.iter().any(|lang| {
+ lang.get("code").unwrap().as_str().unwrap() == "eng"
+ });
+ assert!(has_english);
+
+ harness.cleanup().await;
+}
+
+#[tokio::test]
+async fn test_get_available_languages_unauthorized() {
+ let harness = TestHarness::new().await;
+
+ let response = harness
+ .server
+ .get("/api/ocr/languages")
+ .await;
+
+ assert_eq!(response.status_code(), StatusCode::UNAUTHORIZED);
+
+ harness.cleanup().await;
+}
+
+#[tokio::test]
+async fn test_get_available_languages_includes_current_user_language() {
+ let harness = TestHarness::new().await;
+
+ let response = harness
+ .server
+ .get("/api/ocr/languages")
+ .add_header("Authorization", &format!("Bearer {}", harness.token))
+ .await;
+
+ assert_eq!(response.status_code(), StatusCode::OK);
+
+ let body: serde_json::Value = response.json();
+ assert_eq!(body["current_user_language"].as_str().unwrap(), "eng");
+
+ harness.cleanup().await;
+}
+
+#[tokio::test]
+async fn test_retry_ocr_with_language_success() {
+ let harness = TestHarness::new().await;
+
+ // First, create a test document
+ let document_id = Uuid::new_v4();
+ sqlx::query(
+ "INSERT INTO documents (id, user_id, filename, original_filename, file_size, mime_type, ocr_status, created_at, updated_at)
+ VALUES ($1, $2, $3, $4, $5, $6, $7, NOW(), NOW())"
+ )
+ .bind(document_id)
+ .bind(harness.user_id)
+ .bind("test.pdf")
+ .bind("test.pdf")
+ .bind(1024i64)
+ .bind("application/pdf")
+ .bind("failed")
+ .execute(&harness.server.into_inner().extract::>().unwrap().db.pool)
+ .await
+ .expect("Failed to create test document");
+
+ let retry_request = json!({
+ "language": "spa"
+ });
+
+ let response = harness
+ .server
+ .post(&format!("/documents/{}/retry-ocr", document_id))
+ .add_header("Authorization", &format!("Bearer {}", harness.token))
+ .add_header("Content-Type", "application/json")
+ .json(&retry_request)
+ .await;
+
+ assert_eq!(response.status_code(), StatusCode::OK);
+
+ let body: serde_json::Value = response.json();
+ assert_eq!(body["success"].as_bool().unwrap(), true);
+ assert!(body.get("message").is_some());
+
+ harness.cleanup().await;
+}
+
+#[tokio::test]
+async fn test_retry_ocr_without_language_uses_default() {
+ let harness = TestHarness::new().await;
+
+ // Create a test document
+ let document_id = Uuid::new_v4();
+ sqlx::query(
+ "INSERT INTO documents (id, user_id, filename, original_filename, file_size, mime_type, ocr_status, created_at, updated_at)
+ VALUES ($1, $2, $3, $4, $5, $6, $7, NOW(), NOW())"
+ )
+ .bind(document_id)
+ .bind(harness.user_id)
+ .bind("test.pdf")
+ .bind("test.pdf")
+ .bind(1024i64)
+ .bind("application/pdf")
+ .bind("failed")
+ .execute(&harness.server.into_inner().extract::>().unwrap().db.pool)
+ .await
+ .expect("Failed to create test document");
+
+ let retry_request = json!({});
+
+ let response = harness
+ .server
+ .post(&format!("/documents/{}/retry-ocr", document_id))
+ .add_header("Authorization", &format!("Bearer {}", harness.token))
+ .add_header("Content-Type", "application/json")
+ .json(&retry_request)
+ .await;
+
+ assert_eq!(response.status_code(), StatusCode::OK);
+
+ let body: serde_json::Value = response.json();
+ assert_eq!(body["success"].as_bool().unwrap(), true);
+
+ harness.cleanup().await;
+}
+
+#[tokio::test]
+async fn test_retry_ocr_with_invalid_language() {
+ let harness = TestHarness::new().await;
+
+ // Create a test document
+ let document_id = Uuid::new_v4();
+ sqlx::query(
+ "INSERT INTO documents (id, user_id, filename, original_filename, file_size, mime_type, ocr_status, created_at, updated_at)
+ VALUES ($1, $2, $3, $4, $5, $6, $7, NOW(), NOW())"
+ )
+ .bind(document_id)
+ .bind(harness.user_id)
+ .bind("test.pdf")
+ .bind("test.pdf")
+ .bind(1024i64)
+ .bind("application/pdf")
+ .bind("failed")
+ .execute(&harness.server.into_inner().extract::>().unwrap().db.pool)
+ .await
+ .expect("Failed to create test document");
+
+ let retry_request = json!({
+ "language": "invalid_lang"
+ });
+
+ let response = harness
+ .server
+ .post(&format!("/documents/{}/retry-ocr", document_id))
+ .add_header("Authorization", &format!("Bearer {}", harness.token))
+ .add_header("Content-Type", "application/json")
+ .json(&retry_request)
+ .await;
+
+ assert_eq!(response.status_code(), StatusCode::BAD_REQUEST);
+
+ let body: serde_json::Value = response.json();
+ assert!(body.get("error").is_some());
+
+ harness.cleanup().await;
+}
+
+#[tokio::test]
+async fn test_retry_ocr_nonexistent_document() {
+ let harness = TestHarness::new().await;
+
+ let nonexistent_id = Uuid::new_v4();
+ let retry_request = json!({
+ "language": "spa"
+ });
+
+ let response = harness
+ .server
+ .post(&format!("/documents/{}/retry-ocr", nonexistent_id))
+ .add_header("Authorization", &format!("Bearer {}", harness.token))
+ .add_header("Content-Type", "application/json")
+ .json(&retry_request)
+ .await;
+
+ assert_eq!(response.status_code(), StatusCode::NOT_FOUND);
+
+ harness.cleanup().await;
+}
+
+#[tokio::test]
+async fn test_retry_ocr_unauthorized_user() {
+ let harness = TestHarness::new().await;
+
+ // Create a document owned by a different user
+ let other_user_id = Uuid::new_v4();
+ let document_id = Uuid::new_v4();
+
+ sqlx::query(
+ "INSERT INTO users (id, username, email, password_hash) VALUES ($1, $2, $3, $4)"
+ )
+ .bind(other_user_id)
+ .bind("otheruser")
+ .bind("other@test.com")
+ .bind("dummy_hash")
+ .execute(&harness.server.into_inner().extract::>().unwrap().db.pool)
+ .await
+ .expect("Failed to create other user");
+
+ sqlx::query(
+ "INSERT INTO documents (id, user_id, filename, original_filename, file_size, mime_type, ocr_status, created_at, updated_at)
+ VALUES ($1, $2, $3, $4, $5, $6, $7, NOW(), NOW())"
+ )
+ .bind(document_id)
+ .bind(other_user_id)
+ .bind("test.pdf")
+ .bind("test.pdf")
+ .bind(1024i64)
+ .bind("application/pdf")
+ .bind("failed")
+ .execute(&harness.server.into_inner().extract::>().unwrap().db.pool)
+ .await
+ .expect("Failed to create test document");
+
+ let retry_request = json!({
+ "language": "spa"
+ });
+
+ let response = harness
+ .server
+ .post(&format!("/documents/{}/retry-ocr", document_id))
+ .add_header("Authorization", &format!("Bearer {}", harness.token))
+ .add_header("Content-Type", "application/json")
+ .json(&retry_request)
+ .await;
+
+ assert_eq!(response.status_code(), StatusCode::FORBIDDEN);
+
+ // Cleanup other user
+ sqlx::query("DELETE FROM users WHERE id = $1")
+ .bind(other_user_id)
+ .execute(&harness.server.into_inner().extract::>().unwrap().db.pool)
+ .await
+ .expect("Failed to cleanup other user");
+
+ harness.cleanup().await;
+}
+
+#[tokio::test]
+async fn test_document_upload_with_language_validation() {
+ let harness = TestHarness::new().await;
+
+ // Create a multipart form with a document and language
+ let file_content = b"Mock PDF content";
+ let form = reqwest::multipart::Form::new()
+ .part("file", reqwest::multipart::Part::bytes(file_content.to_vec())
+ .file_name("test.pdf")
+ .mime_str("application/pdf").unwrap())
+ .part("language", reqwest::multipart::Part::text("spa"));
+
+ let response = harness
+ .server
+ .post("/documents")
+ .add_header("Authorization", &format!("Bearer {}", harness.token))
+ .multipart(form)
+ .await;
+
+ // Should succeed with valid language
+ assert_eq!(response.status_code(), StatusCode::OK);
+
+ harness.cleanup().await;
+}
+
+#[tokio::test]
+async fn test_document_upload_with_invalid_language() {
+ let harness = TestHarness::new().await;
+
+ // Create a multipart form with invalid language
+ let file_content = b"Mock PDF content";
+ let form = reqwest::multipart::Form::new()
+ .part("file", reqwest::multipart::Part::bytes(file_content.to_vec())
+ .file_name("test.pdf")
+ .mime_str("application/pdf").unwrap())
+ .part("language", reqwest::multipart::Part::text("invalid_lang"));
+
+ let response = harness
+ .server
+ .post("/documents")
+ .add_header("Authorization", &format!("Bearer {}", harness.token))
+ .multipart(form)
+ .await;
+
+ // Should fail with invalid language
+ assert_eq!(response.status_code(), StatusCode::BAD_REQUEST);
+
+ harness.cleanup().await;
+}
+
+#[tokio::test]
+async fn test_settings_update_with_ocr_language() {
+ let harness = TestHarness::new().await;
+
+ let settings_update = json!({
+ "ocrLanguage": "fra",
+ "concurrentOcrJobs": 2,
+ "ocrTimeoutSeconds": 300
+ });
+
+ let response = harness
+ .server
+ .put("/settings")
+ .add_header("Authorization", &format!("Bearer {}", harness.token))
+ .add_header("Content-Type", "application/json")
+ .json(&settings_update)
+ .await;
+
+ assert_eq!(response.status_code(), StatusCode::OK);
+
+ // Verify the setting was updated
+ let get_response = harness
+ .server
+ .get("/settings")
+ .add_header("Authorization", &format!("Bearer {}", harness.token))
+ .await;
+
+ assert_eq!(get_response.status_code(), StatusCode::OK);
+
+ let body: serde_json::Value = get_response.json();
+ assert_eq!(body["ocrLanguage"].as_str().unwrap(), "fra");
+
+ harness.cleanup().await;
+}
+
+#[tokio::test]
+async fn test_settings_update_with_invalid_ocr_language() {
+ let harness = TestHarness::new().await;
+
+ let settings_update = json!({
+ "ocrLanguage": "invalid_lang",
+ "concurrentOcrJobs": 2
+ });
+
+ let response = harness
+ .server
+ .put("/settings")
+ .add_header("Authorization", &format!("Bearer {}", harness.token))
+ .add_header("Content-Type", "application/json")
+ .json(&settings_update)
+ .await;
+
+ // Should fail with invalid language
+ assert_eq!(response.status_code(), StatusCode::BAD_REQUEST);
+
+ harness.cleanup().await;
+}
+
+#[tokio::test]
+async fn test_ocr_health_endpoint() {
+ let harness = TestHarness::new().await;
+
+ let response = harness
+ .server
+ .get("/api/ocr/health")
+ .add_header("Authorization", &format!("Bearer {}", harness.token))
+ .await;
+
+ assert_eq!(response.status_code(), StatusCode::OK);
+
+ let body: serde_json::Value = response.json();
+ assert!(body.get("status").is_some());
+ assert!(body.get("available_languages").is_some());
+
+ harness.cleanup().await;
+}
+
+#[tokio::test]
+async fn test_concurrent_language_requests() {
+ let harness = TestHarness::new().await;
+
+ // Make multiple concurrent requests to the languages endpoint
+ let mut handles = vec![];
+
+ for _ in 0..5 {
+ let server_clone = harness.server.clone();
+ let token_clone = harness.token.clone();
+ let handle = tokio::spawn(async move {
+ server_clone
+ .get("/api/ocr/languages")
+ .add_header("Authorization", &format!("Bearer {}", token_clone))
+ .await
+ });
+ handles.push(handle);
+ }
+
+ // All requests should succeed
+ for handle in handles {
+ let response = handle.await.expect("Task panicked");
+ assert_eq!(response.status_code(), StatusCode::OK);
+ }
+
+ harness.cleanup().await;
+}
\ No newline at end of file