From afd01e607577b3a07ddfda8250c548cdb512e005 Mon Sep 17 00:00:00 2001 From: perf3ct Date: Fri, 13 Jun 2025 20:11:22 +0000 Subject: [PATCH] fix(server): at least the watch folder doesn't blow up now --- .../src/components/__tests__/Login.test.tsx | 55 ++----------- frontend/src/test/test-utils.tsx | 80 +++++++++++++++++++ src/db.rs | 10 +++ src/enhanced_ocr.rs | 54 ++++++++++++- src/watcher.rs | 47 ++++++++++- 5 files changed, 193 insertions(+), 53 deletions(-) create mode 100644 frontend/src/test/test-utils.tsx diff --git a/frontend/src/components/__tests__/Login.test.tsx b/frontend/src/components/__tests__/Login.test.tsx index 9066e2b..b5b743d 100644 --- a/frontend/src/components/__tests__/Login.test.tsx +++ b/frontend/src/components/__tests__/Login.test.tsx @@ -1,22 +1,7 @@ -import { render, screen, fireEvent, waitFor } from '@testing-library/react' -import { vi } from 'vitest' -import { BrowserRouter } from 'react-router-dom' +import { screen, fireEvent, waitFor, vi } from '@testing-library/react' +import { renderWithMockAuth } from '../../test/test-utils' import Login from '../Login' -// Mock the auth context -const mockLogin = vi.fn() - -vi.mock('../../contexts/AuthContext', () => ({ - useAuth: () => ({ - login: mockLogin, - user: null, - loading: false, - register: vi.fn(), - logout: vi.fn(), - }), - AuthProvider: ({ children }: any) => <>{children}, -})) - // Mock the API service vi.mock('../../services/api', () => ({ api: { @@ -26,11 +11,7 @@ vi.mock('../../services/api', () => ({ }, })) -const LoginWrapper = ({ children }: { children: React.ReactNode }) => ( - - {children} - -) +const mockLogin = vi.fn() describe('Login', () => { beforeEach(() => { @@ -38,11 +19,7 @@ describe('Login', () => { }) test('renders login form', () => { - render( - - - - ) + renderWithMockAuth(, { login: mockLogin }) expect(screen.getByText('Sign in to Readur')).toBeInTheDocument() expect(screen.getByPlaceholderText('Username')).toBeInTheDocument() @@ -54,11 +31,7 @@ describe('Login', () => { test('handles form submission with valid credentials', async () => { mockLogin.mockResolvedValue(undefined) - render( - - - - ) + renderWithMockAuth(, { login: mockLogin }) const usernameInput = screen.getByPlaceholderText('Username') const passwordInput = screen.getByPlaceholderText('Password') @@ -79,11 +52,7 @@ describe('Login', () => { response: { data: { message: errorMessage } }, }) - render( - - - - ) + renderWithMockAuth(, { login: mockLogin }) const usernameInput = screen.getByPlaceholderText('Username') const passwordInput = screen.getByPlaceholderText('Password') @@ -101,11 +70,7 @@ describe('Login', () => { test('shows loading state during submission', async () => { mockLogin.mockImplementation(() => new Promise(() => {})) // Never resolves - render( - - - - ) + renderWithMockAuth(, { login: mockLogin }) const usernameInput = screen.getByPlaceholderText('Username') const passwordInput = screen.getByPlaceholderText('Password') @@ -122,11 +87,7 @@ describe('Login', () => { }) test('requires username and password', () => { - render( - - - - ) + renderWithMockAuth(, { login: mockLogin }) const usernameInput = screen.getByPlaceholderText('Username') const passwordInput = screen.getByPlaceholderText('Password') diff --git a/frontend/src/test/test-utils.tsx b/frontend/src/test/test-utils.tsx new file mode 100644 index 0000000..992b1ee --- /dev/null +++ b/frontend/src/test/test-utils.tsx @@ -0,0 +1,80 @@ +import React from 'react' +import { render, RenderOptions } from '@testing-library/react' +import { BrowserRouter } from 'react-router-dom' +import { vi } from 'vitest' + +interface User { + id: string + username: string + email: string +} + +interface MockAuthContextType { + user: User | null + loading: boolean + login: (username: string, password: string) => Promise + register: (username: string, email: string, password: string) => Promise + logout: () => void +} + +// Create a mock AuthProvider for testing +export const MockAuthProvider = ({ + children, + mockValues = {} +}: { + children: React.ReactNode + mockValues?: Partial +}) => { + const defaultMocks = { + user: null, + loading: false, + login: vi.fn(), + register: vi.fn(), + logout: vi.fn(), + ...mockValues + } + + // Mock the useAuth hook + const AuthContext = React.createContext(defaultMocks) + + return ( + + {children} + + ) +} + +// Create a custom render function that includes providers +const AllTheProviders = ({ children }: { children: React.ReactNode }) => { + return ( + + + {children} + + + ) +} + +export const renderWithProviders = ( + ui: React.ReactElement, + options?: Omit +) => render(ui, { wrapper: AllTheProviders, ...options }) + +export const renderWithMockAuth = ( + ui: React.ReactElement, + mockAuthValues?: Partial, + options?: Omit +) => { + const Wrapper = ({ children }: { children: React.ReactNode }) => ( + + + {children} + + + ) + + return render(ui, { wrapper: Wrapper, ...options }) +} + +// re-export everything +export * from '@testing-library/react' \ No newline at end of file diff --git a/src/db.rs b/src/db.rs index a845813..66d13ec 100644 --- a/src/db.rs +++ b/src/db.rs @@ -125,6 +125,14 @@ impl Database { ocr_detect_orientation BOOLEAN DEFAULT TRUE, ocr_whitelist_chars TEXT, ocr_blacklist_chars TEXT, + webdav_enabled BOOLEAN DEFAULT FALSE, + webdav_server_url TEXT, + webdav_username TEXT, + webdav_password TEXT, + webdav_watch_folders TEXT[] DEFAULT ARRAY['/Documents']::TEXT[], + webdav_file_extensions TEXT[] DEFAULT ARRAY['pdf', 'png', 'jpg', 'jpeg', 'tiff', 'bmp', 'txt']::TEXT[], + webdav_auto_sync BOOLEAN DEFAULT FALSE, + webdav_sync_interval_minutes INTEGER DEFAULT 60, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW() ) @@ -939,6 +947,8 @@ impl Database { cpu_priority, enable_background_ocr, ocr_page_segmentation_mode, ocr_engine_mode, ocr_min_confidence, ocr_dpi, ocr_enhance_contrast, ocr_remove_noise, ocr_detect_orientation, ocr_whitelist_chars, ocr_blacklist_chars, + webdav_enabled, webdav_server_url, webdav_username, webdav_password, + webdav_watch_folders, webdav_file_extensions, webdav_auto_sync, webdav_sync_interval_minutes, created_at, updated_at FROM settings WHERE user_id = $1"# ) diff --git a/src/enhanced_ocr.rs b/src/enhanced_ocr.rs index c30ec7f..effdf9c 100644 --- a/src/enhanced_ocr.rs +++ b/src/enhanced_ocr.rs @@ -504,16 +504,21 @@ impl EnhancedOcrService { let bytes = std::fs::read(file_path)?; - // Validate PDF header - if bytes.len() < 5 || !bytes.starts_with(b"%PDF-") { + // Check if it's a valid PDF (handles leading null bytes) + if !is_valid_pdf(&bytes) { return Err(anyhow!( "Invalid PDF file: Missing or corrupted PDF header. File size: {} bytes, Header: {:?}", bytes.len(), - bytes.get(0..20).unwrap_or(&[]).iter().map(|&b| b as char).collect::() + bytes.get(0..50).unwrap_or(&[]).iter().map(|&b| { + if b >= 32 && b <= 126 { b as char } else { '.' } + }).collect::() )); } - let text = match pdf_extract::extract_text_from_mem(&bytes) { + // Clean the PDF data (remove leading null bytes) + let clean_bytes = clean_pdf_data(&bytes); + + let text = match pdf_extract::extract_text_from_mem(&clean_bytes) { Ok(text) => text, Err(e) => { // Provide more detailed error information @@ -631,4 +636,45 @@ impl EnhancedOcrService { pub fn validate_ocr_quality(&self, _result: &OcrResult, _settings: &Settings) -> bool { false } +} + +/// Check if the given bytes represent a valid PDF file +/// Handles PDFs with leading null bytes or whitespace +fn is_valid_pdf(data: &[u8]) -> bool { + if data.len() < 5 { + return false; + } + + // Find the first occurrence of "%PDF-" in the first 1KB of the file + // Some PDFs have leading null bytes or other metadata + let search_limit = data.len().min(1024); + let search_data = &data[0..search_limit]; + + for i in 0..=search_limit.saturating_sub(5) { + if &search_data[i..i+5] == b"%PDF-" { + return true; + } + } + + false +} + +/// Remove leading null bytes and return clean PDF data +/// Returns the original data if no PDF header is found +fn clean_pdf_data(data: &[u8]) -> Vec { + if data.len() < 5 { + return data.to_vec(); + } + + // Find the first occurrence of "%PDF-" in the first 1KB + let search_limit = data.len().min(1024); + + for i in 0..=search_limit.saturating_sub(5) { + if &data[i..i+5] == b"%PDF-" { + return data[i..].to_vec(); + } + } + + // If no PDF header found, return original data + data.to_vec() } \ No newline at end of file diff --git a/src/watcher.rs b/src/watcher.rs index 147ac0a..b733d69 100644 --- a/src/watcher.rs +++ b/src/watcher.rs @@ -297,12 +297,14 @@ async fn process_file( // Validate PDF files before processing if mime_type == "application/pdf" { - if file_data.len() < 5 || !file_data.starts_with(b"%PDF-") { + if !is_valid_pdf(&file_data) { warn!( "Skipping invalid PDF file: {} (size: {} bytes, header: {:?})", filename, file_data.len(), - file_data.get(0..20).unwrap_or(&[]).iter().map(|&b| b as char).collect::() + file_data.get(0..50).unwrap_or(&[]).iter().map(|&b| { + if b >= 32 && b <= 126 { b as char } else { '.' } + }).collect::() ); return Ok(()); } @@ -369,4 +371,45 @@ fn calculate_priority(file_size: i64, mime_type: &str) -> i32 { }; (base_priority + type_boost).min(10) +} + +/// Check if the given bytes represent a valid PDF file +/// Handles PDFs with leading null bytes or whitespace +fn is_valid_pdf(data: &[u8]) -> bool { + if data.len() < 5 { + return false; + } + + // Find the first occurrence of "%PDF-" in the first 1KB of the file + // Some PDFs have leading null bytes or other metadata + let search_limit = data.len().min(1024); + let search_data = &data[0..search_limit]; + + for i in 0..=search_limit.saturating_sub(5) { + if &search_data[i..i+5] == b"%PDF-" { + return true; + } + } + + false +} + +/// Remove leading null bytes and return clean PDF data +/// Returns the original data if no PDF header is found +fn clean_pdf_data(data: &[u8]) -> &[u8] { + if data.len() < 5 { + return data; + } + + // Find the first occurrence of "%PDF-" in the first 1KB + let search_limit = data.len().min(1024); + + for i in 0..=search_limit.saturating_sub(5) { + if &data[i..i+5] == b"%PDF-" { + return &data[i..]; + } + } + + // If no PDF header found, return original data + data } \ No newline at end of file