diff --git a/frontend/e2e/search.spec.ts b/frontend/e2e/search.spec.ts index a308e81..07bdb3f 100644 --- a/frontend/e2e/search.spec.ts +++ b/frontend/e2e/search.spec.ts @@ -19,8 +19,8 @@ test.describe('Search Functionality', () => { test('should perform basic search', async ({ authenticatedPage: page }) => { const searchInput = page.locator('input[type="search"], input[placeholder*="search" i], [data-testid="search-input"]').first(); - // Enter search query - await searchInput.fill(SEARCH_QUERIES.simple); + // Search for known OCR content from test images + await searchInput.fill(SEARCH_QUERIES.simple); // "Test 1" // Wait for search API call const searchResponse = helpers.waitForApiCall(API_ENDPOINTS.search); @@ -40,8 +40,8 @@ test.describe('Search Functionality', () => { test('should show search suggestions', async ({ authenticatedPage: page }) => { const searchInput = page.locator('input[type="search"], input[placeholder*="search" i], [data-testid="search-input"]').first(); - // Start typing to trigger suggestions - await searchInput.type('test', { delay: 100 }); + // Start typing "Test" to trigger suggestions based on OCR content + await searchInput.type('Test', { delay: 100 }); // Should show suggestion dropdown await expect(page.locator('[data-testid="search-suggestions"], .suggestions, .autocomplete')).toBeVisible({ @@ -52,8 +52,8 @@ test.describe('Search Functionality', () => { test('should filter search results', async ({ authenticatedPage: page }) => { const searchInput = page.locator('input[type="search"], input[placeholder*="search" i], [data-testid="search-input"]').first(); - // Perform initial search - await searchInput.fill(SEARCH_QUERIES.simple); + // Search for content that should match multiple test images + await searchInput.fill(SEARCH_QUERIES.content); // "some text from text" await searchInput.press('Enter'); await helpers.waitForLoadingToComplete(); @@ -63,10 +63,10 @@ test.describe('Search Functionality', () => { if (await filterButton.isVisible()) { await filterButton.click(); - // Select document type filter - const pdfFilter = page.locator('input[type="checkbox"][value="pdf"], label:has-text("PDF")'); - if (await pdfFilter.isVisible()) { - await pdfFilter.check(); + // Select image type filter (since our test files are images) + const imageFilter = page.locator('input[type="checkbox"][value="image"], input[type="checkbox"][value="png"], label:has-text("Image")'); + if (await imageFilter.isVisible()) { + await imageFilter.check(); // Should update search results await helpers.waitForApiCall(API_ENDPOINTS.search); diff --git a/frontend/e2e/upload.spec.ts b/frontend/e2e/upload.spec.ts index e035efb..0ab90f0 100644 --- a/frontend/e2e/upload.spec.ts +++ b/frontend/e2e/upload.spec.ts @@ -1,5 +1,5 @@ import { test, expect } from './fixtures/auth'; -import { TEST_FILES, TIMEOUTS, API_ENDPOINTS } from './utils/test-data'; +import { TEST_FILES, TIMEOUTS, API_ENDPOINTS, EXPECTED_OCR_CONTENT } from './utils/test-data'; import { TestHelpers } from './utils/test-helpers'; test.describe('Document Upload', () => { @@ -20,8 +20,8 @@ test.describe('Document Upload', () => { // Find file input - try multiple selectors const fileInput = page.locator('input[type="file"]').first(); - // Upload a test file - await fileInput.setInputFiles(TEST_FILES.image); + // Upload test1.png with known OCR content + await fileInput.setInputFiles(TEST_FILES.test1); // Wait for upload API call const uploadResponse = helpers.waitForApiCall(API_ENDPOINTS.upload, TIMEOUTS.upload); @@ -45,25 +45,25 @@ test.describe('Document Upload', () => { test('should upload multiple documents', async ({ authenticatedPage: page }) => { const fileInput = page.locator('input[type="file"]').first(); - // Upload multiple files - await fileInput.setInputFiles([TEST_FILES.image, TEST_FILES.multiline]); + // Upload multiple test images with different formats + await fileInput.setInputFiles([TEST_FILES.test1, TEST_FILES.test2, TEST_FILES.test3]); const uploadButton = page.locator('button:has-text("Upload"), [data-testid="upload-button"]'); if (await uploadButton.isVisible()) { await uploadButton.click(); } - // Wait for both uploads to complete + // Wait for all uploads to complete await helpers.waitForLoadingToComplete(); // Should show multiple uploaded documents const uploadedFiles = page.locator('[data-testid="uploaded-files"] > *, .uploaded-file'); - await expect(uploadedFiles).toHaveCount(2, { timeout: TIMEOUTS.medium }); + await expect(uploadedFiles).toHaveCount(3, { timeout: TIMEOUTS.medium }); }); test('should show upload progress', async ({ authenticatedPage: page }) => { const fileInput = page.locator('input[type="file"]').first(); - await fileInput.setInputFiles(TEST_FILES.image); + await fileInput.setInputFiles(TEST_FILES.test4); const uploadButton = page.locator('button:has-text("Upload"), [data-testid="upload-button"]'); if (await uploadButton.isVisible()) { @@ -140,7 +140,7 @@ test.describe('Document Upload', () => { test('should show OCR processing status', async ({ authenticatedPage: page }) => { const fileInput = page.locator('input[type="file"]').first(); - await fileInput.setInputFiles(TEST_FILES.image); + await fileInput.setInputFiles(TEST_FILES.test5); const uploadButton = page.locator('button:has-text("Upload"), [data-testid="upload-button"]'); if (await uploadButton.isVisible()) { @@ -155,6 +155,42 @@ test.describe('Document Upload', () => { }); }); + test('should process OCR and extract correct text content', async ({ authenticatedPage: page }) => { + const fileInput = page.locator('input[type="file"]').first(); + + // Upload test6.jpeg with known content + await fileInput.setInputFiles(TEST_FILES.test6); + + const uploadButton = page.locator('button:has-text("Upload"), [data-testid="upload-button"]'); + if (await uploadButton.isVisible()) { + await uploadButton.click(); + } + + await helpers.waitForLoadingToComplete(); + + // Wait for OCR to complete + await expect(page.locator(':has-text("OCR Complete"), :has-text("Processed"), [data-testid="ocr-complete"]')).toBeVisible({ + timeout: TIMEOUTS.ocr + }); + + // Navigate to document details to verify OCR content + const uploadedDocument = page.locator('[data-testid="uploaded-files"] > *, .uploaded-file').first(); + if (await uploadedDocument.isVisible()) { + await uploadedDocument.click(); + + // Should navigate to document details page + await page.waitForURL(/\/documents\/[^\/]+/, { timeout: TIMEOUTS.medium }); + + // Check that OCR content is visible and contains expected text + const documentContent = page.locator('[data-testid="document-content"], .document-text, .ocr-content'); + if (await documentContent.isVisible()) { + const content = await documentContent.textContent(); + expect(content).toContain('Test 6'); + expect(content).toContain('This is some text from text 6'); + } + } + }); + test('should allow drag and drop upload', async ({ authenticatedPage: page }) => { // Look for dropzone const dropzone = page.locator('[data-testid="dropzone"], .dropzone, .upload-area'); diff --git a/frontend/e2e/utils/test-data.ts b/frontend/e2e/utils/test-data.ts index f1dd6f8..df71c00 100644 --- a/frontend/e2e/utils/test-data.ts +++ b/frontend/e2e/utils/test-data.ts @@ -10,18 +10,30 @@ export const TEST_USERS = { }; export const TEST_FILES = { - pdf: 'test_data/sample.pdf', - image: 'test_data/hello_ocr.png', - text: 'test_data/sample.txt', - multiline: 'test_data/multiline.png', - numbers: 'test_data/numbers.png' + // Real test images with known OCR content + test1: '../tests/test_images/test1.png', // "Test 1\nThis is some text from text 1" + test2: '../tests/test_images/test2.jpg', // "Test 2\nThis is some text from text 2" + test3: '../tests/test_images/test3.jpeg', // "Test 3\nThis is some text from text 3" + test4: '../tests/test_images/test4.png', // "Test 4\nThis is some text from text 4" + test5: '../tests/test_images/test5.jpg', // "Test 5\nThis is some text from text 5" + test6: '../tests/test_images/test6.jpeg', // "Test 6\nThis is some text from text 6" + test7: '../tests/test_images/test7.png', // "Test 7\nThis is some text from text 7" + test8: '../tests/test_images/test8.jpeg', // "Test 8\nThis is some text from text 8" + test9: '../tests/test_images/test9.png', // "Test 9\nThis is some text from text 9" + + // Backwards compatibility + image: '../tests/test_images/test1.png', + multiline: '../tests/test_images/test2.jpg', + text: 'test_data/sample.txt' }; export const SEARCH_QUERIES = { - simple: 'test document', + simple: 'Test 1', // Will match test1.png OCR content + content: 'some text from text', // Will match multiple test images + specific: 'Test 3', // Will match test3.jpeg specifically advanced: { - title: 'important', - content: 'contract', + title: 'Test', + content: 'some text', dateFrom: '2024-01-01', dateTo: '2024-12-31' }, @@ -29,6 +41,19 @@ export const SEARCH_QUERIES = { noResults: 'xyzabc123nonexistent' }; +// Expected OCR content for test images +export const EXPECTED_OCR_CONTENT = { + test1: 'Test 1\nThis is some text from text 1', + test2: 'Test 2\nThis is some text from text 2', + test3: 'Test 3\nThis is some text from text 3', + test4: 'Test 4\nThis is some text from text 4', + test5: 'Test 5\nThis is some text from text 5', + test6: 'Test 6\nThis is some text from text 6', + test7: 'Test 7\nThis is some text from text 7', + test8: 'Test 8\nThis is some text from text 8', + test9: 'Test 9\nThis is some text from text 9' +}; + export const API_ENDPOINTS = { login: '/api/auth/login', upload: '/api/documents/upload', diff --git a/src/lib.rs b/src/lib.rs index 094e133..2230738 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,6 +30,9 @@ pub mod webdav_xml_parser; #[cfg(test)] mod tests; +#[cfg(test)] +pub mod test_utils; + use axum::{http::StatusCode, Json}; use config::Config; use db::Database; diff --git a/src/test_utils.rs b/src/test_utils.rs new file mode 100644 index 0000000..cda6248 --- /dev/null +++ b/src/test_utils.rs @@ -0,0 +1,137 @@ +//! Test utilities for loading and working with test images and data +//! +//! This module provides utilities for loading test images from the tests/test_images/ +//! directory and working with them in unit and integration tests. + +use std::path::Path; + +/// Test image information with expected OCR content +#[derive(Debug, Clone)] +pub struct TestImage { + pub filename: &'static str, + pub path: String, + pub mime_type: &'static str, + pub expected_content: &'static str, +} + +impl TestImage { + pub fn new(filename: &'static str, mime_type: &'static str, expected_content: &'static str) -> Self { + Self { + filename, + path: format!("tests/test_images/{}", filename), + mime_type, + expected_content, + } + } + + pub fn exists(&self) -> bool { + Path::new(&self.path).exists() + } + + pub async fn load_data(&self) -> Result, std::io::Error> { + tokio::fs::read(&self.path).await + } +} + +/// Get all available test images with their expected OCR content +pub fn get_test_images() -> Vec { + vec![ + TestImage::new("test1.png", "image/png", "Test 1\nThis is some text from text 1"), + TestImage::new("test2.jpg", "image/jpeg", "Test 2\nThis is some text from text 2"), + TestImage::new("test3.jpeg", "image/jpeg", "Test 3\nThis is some text from text 3"), + TestImage::new("test4.png", "image/png", "Test 4\nThis is some text from text 4"), + TestImage::new("test5.jpg", "image/jpeg", "Test 5\nThis is some text from text 5"), + TestImage::new("test6.jpeg", "image/jpeg", "Test 6\nThis is some text from text 6"), + TestImage::new("test7.png", "image/png", "Test 7\nThis is some text from text 7"), + TestImage::new("test8.jpeg", "image/jpeg", "Test 8\nThis is some text from text 8"), + TestImage::new("test9.png", "image/png", "Test 9\nThis is some text from text 9"), + ] +} + +/// Get a specific test image by number (1-9) +pub fn get_test_image(number: u8) -> Option { + if number < 1 || number > 9 { + return None; + } + + get_test_images().into_iter().nth((number - 1) as usize) +} + +/// Load test image data by filename +pub async fn load_test_image(filename: &str) -> Result, std::io::Error> { + let path = format!("tests/test_images/{}", filename); + tokio::fs::read(path).await +} + +/// Check if test images directory exists and is accessible +pub fn test_images_available() -> bool { + Path::new("tests/test_images").exists() +} + +/// Get available test images (only those that exist on filesystem) +pub fn get_available_test_images() -> Vec { + get_test_images() + .into_iter() + .filter(|img| img.exists()) + .collect() +} + +/// Skip test macro for conditional testing based on test image availability +macro_rules! skip_if_no_test_images { + () => { + if !crate::test_utils::test_images_available() { + println!("Skipping test: test images directory not available"); + return; + } + }; +} + +/// Skip test macro for specific test image +macro_rules! skip_if_test_image_missing { + ($image:expr) => { + if !$image.exists() { + println!("Skipping test: {} not found", $image.filename); + return; + } + }; +} + +pub use skip_if_no_test_images; +pub use skip_if_test_image_missing; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_image_paths_are_valid() { + let images = get_test_images(); + assert_eq!(images.len(), 9); + + for (i, image) in images.iter().enumerate() { + assert_eq!(image.filename, format!("test{}.{}", i + 1, + if image.mime_type == "image/png" { "png" } + else if image.filename.ends_with(".jpg") { "jpg" } + else { "jpeg" } + )); + assert!(image.expected_content.starts_with(&format!("Test {}", i + 1))); + } + } + + #[test] + fn test_get_specific_image() { + let image1 = get_test_image(1).unwrap(); + assert_eq!(image1.filename, "test1.png"); + assert_eq!(image1.mime_type, "image/png"); + assert!(image1.expected_content.contains("Test 1")); + + let image5 = get_test_image(5).unwrap(); + assert_eq!(image5.filename, "test5.jpg"); + assert_eq!(image5.mime_type, "image/jpeg"); + assert!(image5.expected_content.contains("Test 5")); + + // Invalid numbers should return None + assert!(get_test_image(0).is_none()); + assert!(get_test_image(10).is_none()); + } +} \ No newline at end of file diff --git a/tests/file_processing_pipeline_tests.rs b/tests/file_processing_pipeline_tests.rs index 5e040fa..b656f15 100644 --- a/tests/file_processing_pipeline_tests.rs +++ b/tests/file_processing_pipeline_tests.rs @@ -985,4 +985,235 @@ async fn test_concurrent_file_processing() { assert!(success_rate >= 0.8, "At least 80% of files should complete processing (not timeout)"); println!("šŸŽ‰ Concurrent file processing test passed!"); +} + +#[tokio::test] +async fn test_real_test_images_processing() { + println!("šŸ–¼ļø Testing real test images processing..."); + + // Check if test images are available + if !readur::test_utils::test_images_available() { + println!("āš ļø Test images not available - skipping real image processing test"); + return; + } + + let mut client = FileProcessingTestClient::new(); + client.setup_user().await + .expect("Failed to setup test user"); + + println!("āœ… User setup complete"); + + let available_images = readur::test_utils::get_available_test_images(); + + if available_images.is_empty() { + println!("āš ļø No test images found - skipping test"); + return; + } + + println!("šŸ“‹ Found {} test images to process", available_images.len()); + + let mut processed_results = Vec::new(); + + // Process each available test image + for test_image in available_images.iter().take(3) { // Limit to first 3 for faster testing + println!("šŸ“¤ Processing test image: {}", test_image.filename); + + // Load the image data + let image_data = match test_image.load_data().await { + Ok(data) => data, + Err(e) => { + println!("āš ļø Failed to load {}: {}", test_image.filename, e); + continue; + } + }; + + println!("āœ… Loaded {} ({} bytes, {})", + test_image.filename, image_data.len(), test_image.mime_type); + + // Upload the image + let upload_start = std::time::Instant::now(); + let document = match client.upload_binary_file( + image_data, + test_image.filename, + test_image.mime_type + ).await { + Ok(doc) => doc, + Err(e) => { + println!("āš ļø Failed to upload {}: {}", test_image.filename, e); + continue; + } + }; + + let upload_time = upload_start.elapsed(); + println!("āœ… {} uploaded in {:?}: {}", test_image.filename, upload_time, document.id); + + // Wait for OCR processing + let processing_start = std::time::Instant::now(); + match client.wait_for_processing(&document.id.to_string()).await { + Ok(processed_doc) => { + let processing_time = processing_start.elapsed(); + println!("āœ… {} processed in {:?}: status = {:?}", + test_image.filename, processing_time, processed_doc.ocr_status); + + // Get OCR results and verify content + if let Ok(ocr_results) = client.get_ocr_results(&document.id.to_string()).await { + if let Some(ocr_text) = ocr_results["ocr_text"].as_str() { + let normalized_ocr = ocr_text.trim().to_lowercase(); + let normalized_expected = test_image.expected_content.trim().to_lowercase(); + + println!("šŸ” OCR extracted: '{}'", ocr_text); + println!("šŸŽÆ Expected: '{}'", test_image.expected_content); + + // Check if OCR content matches expectations + let test_number = test_image.filename.chars() + .filter(|c| c.is_numeric()) + .collect::(); + + let content_matches = if !test_number.is_empty() { + normalized_ocr.contains(&format!("test {}", test_number)) || + normalized_ocr.contains(&test_number) + } else { + false + }; + + let has_text_content = normalized_ocr.contains("text") || + normalized_ocr.contains("some"); + + processed_results.push(( + test_image.filename.to_string(), + upload_time, + processing_time, + processed_doc.ocr_status.clone(), + ocr_text.to_string(), + content_matches, + has_text_content, + )); + + if content_matches && has_text_content { + println!("āœ… OCR content verification PASSED for {}", test_image.filename); + } else { + println!("āš ļø OCR content verification PARTIAL for {} (number: {}, text: {})", + test_image.filename, content_matches, has_text_content); + } + } else { + println!("āš ļø No OCR text found for {}", test_image.filename); + processed_results.push(( + test_image.filename.to_string(), + upload_time, + processing_time, + processed_doc.ocr_status.clone(), + "".to_string(), + false, + false, + )); + } + } else { + println!("āš ļø Failed to get OCR results for {}", test_image.filename); + processed_results.push(( + test_image.filename.to_string(), + upload_time, + processing_time, + processed_doc.ocr_status.clone(), + "".to_string(), + false, + false, + )); + } + } + Err(e) => { + println!("āš ļø Processing failed for {}: {}", test_image.filename, e); + processed_results.push(( + test_image.filename.to_string(), + upload_time, + Duration::ZERO, + Some("failed".to_string()), + "".to_string(), + false, + false, + )); + } + } + + // Add small delay between uploads to avoid overwhelming the system + tokio::time::sleep(Duration::from_millis(500)).await; + } + + // Analyze results + println!("šŸ“Š Real Test Images Processing Results:"); + println!(" {:<12} {:<10} {:<12} {:<10} {:<8} {:<8} {}", + "Image", "Upload", "Processing", "Status", "Number", "Text", "OCR Content"); + println!(" {}", "-".repeat(80)); + + let mut successful_ocr = 0; + let mut failed_ocr = 0; + let mut partial_matches = 0; + + for (filename, upload_time, processing_time, status, ocr_text, number_match, text_match) in &processed_results { + let status_str = status.as_deref().unwrap_or("unknown"); + let ocr_preview = if ocr_text.len() > 30 { + format!("{}...", &ocr_text[..30]) + } else { + ocr_text.clone() + }; + + println!(" {:<12} {:<10?} {:<12?} {:<10} {:<8} {:<8} {}", + filename, upload_time, processing_time, status_str, + if *number_match { "āœ…" } else { "āŒ" }, + if *text_match { "āœ…" } else { "āŒ" }, + ocr_preview); + + if status_str == "completed" { + if *number_match && *text_match { + successful_ocr += 1; + } else if *number_match || *text_match { + partial_matches += 1; + } else { + failed_ocr += 1; + } + } + } + + let total_processed = processed_results.len(); + + println!("\nšŸ“ˆ Summary:"); + println!(" Total processed: {}", total_processed); + println!(" Successful OCR: {}", successful_ocr); + println!(" Partial matches: {}", partial_matches); + println!(" Failed OCR: {}", failed_ocr); + + if total_processed > 0 { + let success_rate = (successful_ocr + partial_matches) as f64 / total_processed as f64 * 100.0; + println!(" Success rate: {:.1}%", success_rate); + + // Calculate average processing time for successful cases + let successful_processing_times: Vec<_> = processed_results.iter() + .filter(|(_, _, _, status, _, number, text)| { + status.as_deref() == Some("completed") && (*number || *text) + }) + .map(|(_, _, processing_time, _, _, _, _)| *processing_time) + .collect(); + + if !successful_processing_times.is_empty() { + let avg_processing_time = successful_processing_times.iter().sum::() + / successful_processing_times.len() as u32; + println!(" Average processing time: {:?}", avg_processing_time); + } + } + + // Test assertions + assert!(!processed_results.is_empty(), "At least some test images should be processed"); + + // At least 50% should have some level of OCR success (either partial or full) + let success_count = successful_ocr + partial_matches; + assert!(success_count > 0, "At least some test images should have successful OCR"); + + if total_processed >= 2 { + let min_success_rate = 0.5; // 50% minimum success rate + let actual_success_rate = success_count as f64 / total_processed as f64; + assert!(actual_success_rate >= min_success_rate, + "OCR success rate should be at least {}% but was {:.1}%", + min_success_rate * 100.0, actual_success_rate * 100.0); + } + + println!("šŸŽ‰ Real test images processing test completed!"); } \ No newline at end of file diff --git a/tests/test_image_ocr_tests.rs b/tests/test_image_ocr_tests.rs new file mode 100644 index 0000000..5e4d6ba --- /dev/null +++ b/tests/test_image_ocr_tests.rs @@ -0,0 +1,279 @@ +//! Integration tests for OCR processing using real test images +//! +//! This test suite uses the actual test images from tests/test_images/ +//! to verify OCR functionality with known content. + +use readur::test_utils::{get_test_images, get_available_test_images, get_test_image, skip_if_no_test_images}; +use readur::ocr::OcrService; +use std::path::Path; + +#[tokio::test] +async fn test_ocr_with_all_available_test_images() { + skip_if_no_test_images!(); + + let available_images = get_available_test_images(); + + if available_images.is_empty() { + println!("No test images found - skipping OCR tests"); + return; + } + + println!("Testing OCR with {} available test images", available_images.len()); + + for test_image in available_images { + println!("Testing OCR with {}", test_image.filename); + + // Load the image data + let image_data = match test_image.load_data().await { + Ok(data) => data, + Err(e) => { + println!("Failed to load {}: {}", test_image.filename, e); + continue; + } + }; + + // Create a temporary file for OCR processing + let temp_path = format!("./temp_test_{}", test_image.filename); + if let Err(e) = tokio::fs::write(&temp_path, &image_data).await { + println!("Failed to write temp file for {}: {}", test_image.filename, e); + continue; + } + + // Test OCR processing + let ocr_service = OcrService::new(); + let result = ocr_service.extract_text(&temp_path, test_image.mime_type).await; + + // Clean up temp file + let _ = tokio::fs::remove_file(&temp_path).await; + + match result { + Ok(extracted_text) => { + println!("āœ… OCR Success for {}: '{}'", test_image.filename, extracted_text); + + // Verify the extracted text contains expected content + let normalized_extracted = extracted_text.trim().to_lowercase(); + let normalized_expected = test_image.expected_content.trim().to_lowercase(); + + // Check for key parts of expected content + let test_number = test_image.filename.chars() + .filter(|c| c.is_numeric()) + .collect::(); + + if !test_number.is_empty() { + assert!( + normalized_extracted.contains(&format!("test {}", test_number)) || + normalized_extracted.contains(&test_number), + "OCR result '{}' should contain test number '{}' for image {}", + extracted_text, test_number, test_image.filename + ); + } + + // Check for presence of "text" keyword + assert!( + normalized_extracted.contains("text") || normalized_extracted.contains("some"), + "OCR result '{}' should contain expected text content for image {}", + extracted_text, test_image.filename + ); + } + Err(e) => { + println!("āš ļø OCR Failed for {}: {}", test_image.filename, e); + // Don't fail the test immediately - log the error but continue + // This allows us to see which images work and which don't + } + } + } +} + +#[tokio::test] +async fn test_ocr_with_specific_test_images() { + skip_if_no_test_images!(); + + // Test specific images that should definitely work + let test_cases = vec![1, 2, 3]; // Test with first 3 images + + for test_num in test_cases { + let test_image = match get_test_image(test_num) { + Some(img) => img, + None => continue, + }; + + if !test_image.exists() { + println!("Skipping test{}: file not found", test_num); + continue; + } + + println!("Running OCR test for {}", test_image.filename); + + // Load image data + let image_data = test_image.load_data().await + .expect("Should be able to load test image"); + + assert!(!image_data.is_empty(), "Test image should not be empty"); + + // Verify file format based on MIME type + match test_image.mime_type { + "image/png" => { + assert!(image_data.starts_with(&[0x89, 0x50, 0x4E, 0x47]), + "PNG file should start with PNG signature"); + } + "image/jpeg" => { + assert!(image_data.starts_with(&[0xFF, 0xD8, 0xFF]), + "JPEG file should start with JPEG signature"); + } + _ => {} + } + + println!("Image {} loaded successfully: {} bytes, type: {}", + test_image.filename, image_data.len(), test_image.mime_type); + } +} + +#[tokio::test] +async fn test_ocr_error_handling_with_corrupted_image() { + skip_if_no_test_images!(); + + // Create a corrupted image file + let corrupted_data = vec![0xFF; 100]; // Invalid image data + let temp_path = "./temp_corrupted_test.png"; + + tokio::fs::write(temp_path, &corrupted_data).await + .expect("Should be able to write corrupted test file"); + + let ocr_service = OcrService::new(); + let result = ocr_service.extract_text(temp_path, "image/png").await; + + // Clean up + let _ = tokio::fs::remove_file(temp_path).await; + + // Should handle the error gracefully + match result { + Ok(text) => { + println!("Unexpected success with corrupted image: '{}'", text); + // Some OCR systems might return empty text instead of error + } + Err(e) => { + println!("Expected error with corrupted image: {}", e); + // This is the expected behavior + } + } +} + +#[tokio::test] +async fn test_multiple_image_formats() { + skip_if_no_test_images!(); + + let images = get_available_test_images(); + let mut png_count = 0; + let mut jpeg_count = 0; + + for image in &images { + match image.mime_type { + "image/png" => png_count += 1, + "image/jpeg" => jpeg_count += 1, + _ => {} + } + } + + println!("Available test images: {} PNG, {} JPEG", png_count, jpeg_count); + + // Ensure we have at least one of each format for comprehensive testing + if png_count > 0 && jpeg_count > 0 { + println!("āœ… Both PNG and JPEG formats available for testing"); + } else { + println!("āš ļø Limited format coverage: PNG={}, JPEG={}", png_count, jpeg_count); + } + + // Test at least one of each format if available + for image in images.iter().take(2) { + if image.exists() { + println!("Testing format: {} ({})", image.mime_type, image.filename); + + let image_data = image.load_data().await + .expect("Should load test image"); + + assert!(!image_data.is_empty(), "Image data should not be empty"); + assert!(image_data.len() > 100, "Image should be reasonably sized"); + } + } +} + +#[tokio::test] +#[ignore = "Long running test - run with: cargo test test_ocr_performance -- --ignored"] +async fn test_ocr_performance_with_test_images() { + skip_if_no_test_images!(); + + let available_images = get_available_test_images(); + + if available_images.is_empty() { + println!("No test images available for performance testing"); + return; + } + + let start_time = std::time::Instant::now(); + let mut successful_ocr = 0; + let mut failed_ocr = 0; + + for test_image in available_images { + let image_start = std::time::Instant::now(); + + // Load image + let image_data = match test_image.load_data().await { + Ok(data) => data, + Err(_) => { + failed_ocr += 1; + continue; + } + }; + + // Write to temp file + let temp_path = format!("./temp_perf_{}", test_image.filename); + if tokio::fs::write(&temp_path, &image_data).await.is_err() { + failed_ocr += 1; + continue; + } + + // Run OCR + let ocr_service = OcrService::new(); + let result = ocr_service.extract_text(&temp_path, test_image.mime_type).await; + + // Clean up + let _ = tokio::fs::remove_file(&temp_path).await; + + let duration = image_start.elapsed(); + + match result { + Ok(text) => { + successful_ocr += 1; + println!("āœ… {} processed in {:?}: '{}'", + test_image.filename, duration, text.chars().take(50).collect::()); + } + Err(e) => { + failed_ocr += 1; + println!("āŒ {} failed in {:?}: {}", + test_image.filename, duration, e); + } + } + } + + let total_duration = start_time.elapsed(); + let total_images = successful_ocr + failed_ocr; + + println!("\nšŸ“Š OCR Performance Summary:"); + println!("Total images: {}", total_images); + println!("Successful: {}", successful_ocr); + println!("Failed: {}", failed_ocr); + println!("Total time: {:?}", total_duration); + + if total_images > 0 { + println!("Average time per image: {:?}", total_duration / total_images); + let success_rate = (successful_ocr as f64 / total_images as f64) * 100.0; + println!("Success rate: {:.1}%", success_rate); + } + + // Performance assertions + if successful_ocr > 0 { + let avg_time_per_image = total_duration / successful_ocr; + assert!(avg_time_per_image.as_secs() < 30, + "OCR should complete within 30 seconds per image on average"); + } +} \ No newline at end of file