feat(tests): add actual images as part of e2e and testing

2025-06-17 21:26:39 +00:00 · 2025-06-17 21:26:39 +00:00 · f905c220e0
parent efbd15774a
commit f905c220e0
7 changed files with 738 additions and 27 deletions
--- a/frontend/e2e/search.spec.ts
+++ b/frontend/e2e/search.spec.ts
@ -19,8 +19,8 @@ test.describe('Search Functionality', () => {
  test('should perform basic search', async ({ authenticatedPage: page }) => {
    const searchInput = page.locator('input[type="search"], input[placeholder*="search" i], [data-testid="search-input"]').first();
    
-    // Enter search query
-    await searchInput.fill(SEARCH_QUERIES.simple);
+    // Search for known OCR content from test images
+    await searchInput.fill(SEARCH_QUERIES.simple);  // "Test 1"
    
    // Wait for search API call
    const searchResponse = helpers.waitForApiCall(API_ENDPOINTS.search);
@ -40,8 +40,8 @@ test.describe('Search Functionality', () => {
  test('should show search suggestions', async ({ authenticatedPage: page }) => {
    const searchInput = page.locator('input[type="search"], input[placeholder*="search" i], [data-testid="search-input"]').first();
    
-    // Start typing to trigger suggestions
-    await searchInput.type('test', { delay: 100 });
+    // Start typing "Test" to trigger suggestions based on OCR content
+    await searchInput.type('Test', { delay: 100 });
    
    // Should show suggestion dropdown
    await expect(page.locator('[data-testid="search-suggestions"], .suggestions, .autocomplete')).toBeVisible({ 
@ -52,8 +52,8 @@ test.describe('Search Functionality', () => {
  test('should filter search results', async ({ authenticatedPage: page }) => {
    const searchInput = page.locator('input[type="search"], input[placeholder*="search" i], [data-testid="search-input"]').first();
    
-    // Perform initial search
-    await searchInput.fill(SEARCH_QUERIES.simple);
+    // Search for content that should match multiple test images
+    await searchInput.fill(SEARCH_QUERIES.content);  // "some text from text"
    await searchInput.press('Enter');
    
    await helpers.waitForLoadingToComplete();
@ -63,10 +63,10 @@ test.describe('Search Functionality', () => {
    if (await filterButton.isVisible()) {
      await filterButton.click();
      
-      // Select document type filter
-      const pdfFilter = page.locator('input[type="checkbox"][value="pdf"], label:has-text("PDF")');
-      if (await pdfFilter.isVisible()) {
-        await pdfFilter.check();
+      // Select image type filter (since our test files are images)
+      const imageFilter = page.locator('input[type="checkbox"][value="image"], input[type="checkbox"][value="png"], label:has-text("Image")');
+      if (await imageFilter.isVisible()) {
+        await imageFilter.check();
        
        // Should update search results
        await helpers.waitForApiCall(API_ENDPOINTS.search);
--- a/frontend/e2e/upload.spec.ts
+++ b/frontend/e2e/upload.spec.ts
@ -1,5 +1,5 @@
 import { test, expect } from './fixtures/auth';
-import { TEST_FILES, TIMEOUTS, API_ENDPOINTS } from './utils/test-data';
+import { TEST_FILES, TIMEOUTS, API_ENDPOINTS, EXPECTED_OCR_CONTENT } from './utils/test-data';
 import { TestHelpers } from './utils/test-helpers';

 test.describe('Document Upload', () => {
@ -20,8 +20,8 @@ test.describe('Document Upload', () => {
    // Find file input - try multiple selectors
    const fileInput = page.locator('input[type="file"]').first();
    
-    // Upload a test file
-    await fileInput.setInputFiles(TEST_FILES.image);
+    // Upload test1.png with known OCR content
+    await fileInput.setInputFiles(TEST_FILES.test1);
    
    // Wait for upload API call
    const uploadResponse = helpers.waitForApiCall(API_ENDPOINTS.upload, TIMEOUTS.upload);
@ -45,25 +45,25 @@ test.describe('Document Upload', () => {
  test('should upload multiple documents', async ({ authenticatedPage: page }) => {
    const fileInput = page.locator('input[type="file"]').first();
    
-    // Upload multiple files
-    await fileInput.setInputFiles([TEST_FILES.image, TEST_FILES.multiline]);
+    // Upload multiple test images with different formats
+    await fileInput.setInputFiles([TEST_FILES.test1, TEST_FILES.test2, TEST_FILES.test3]);
    
    const uploadButton = page.locator('button:has-text("Upload"), [data-testid="upload-button"]');
    if (await uploadButton.isVisible()) {
      await uploadButton.click();
    }
    
-    // Wait for both uploads to complete
+    // Wait for all uploads to complete
    await helpers.waitForLoadingToComplete();
    
    // Should show multiple uploaded documents
    const uploadedFiles = page.locator('[data-testid="uploaded-files"] > *, .uploaded-file');
-    await expect(uploadedFiles).toHaveCount(2, { timeout: TIMEOUTS.medium });
+    await expect(uploadedFiles).toHaveCount(3, { timeout: TIMEOUTS.medium });
  });

  test('should show upload progress', async ({ authenticatedPage: page }) => {
    const fileInput = page.locator('input[type="file"]').first();
-    await fileInput.setInputFiles(TEST_FILES.image);
+    await fileInput.setInputFiles(TEST_FILES.test4);
    
    const uploadButton = page.locator('button:has-text("Upload"), [data-testid="upload-button"]');
    if (await uploadButton.isVisible()) {
@ -140,7 +140,7 @@ test.describe('Document Upload', () => {

  test('should show OCR processing status', async ({ authenticatedPage: page }) => {
    const fileInput = page.locator('input[type="file"]').first();
-    await fileInput.setInputFiles(TEST_FILES.image);
+    await fileInput.setInputFiles(TEST_FILES.test5);
    
    const uploadButton = page.locator('button:has-text("Upload"), [data-testid="upload-button"]');
    if (await uploadButton.isVisible()) {
@ -155,6 +155,42 @@ test.describe('Document Upload', () => {
    });
  });

+  test('should process OCR and extract correct text content', async ({ authenticatedPage: page }) => {
+    const fileInput = page.locator('input[type="file"]').first();
+    
+    // Upload test6.jpeg with known content
+    await fileInput.setInputFiles(TEST_FILES.test6);
+    
+    const uploadButton = page.locator('button:has-text("Upload"), [data-testid="upload-button"]');
+    if (await uploadButton.isVisible()) {
+      await uploadButton.click();
+    }
+    
+    await helpers.waitForLoadingToComplete();
+    
+    // Wait for OCR to complete
+    await expect(page.locator(':has-text("OCR Complete"), :has-text("Processed"), [data-testid="ocr-complete"]')).toBeVisible({ 
+      timeout: TIMEOUTS.ocr 
+    });
+    
+    // Navigate to document details to verify OCR content
+    const uploadedDocument = page.locator('[data-testid="uploaded-files"] > *, .uploaded-file').first();
+    if (await uploadedDocument.isVisible()) {
+      await uploadedDocument.click();
+      
+      // Should navigate to document details page
+      await page.waitForURL(/\/documents\/[^\/]+/, { timeout: TIMEOUTS.medium });
+      
+      // Check that OCR content is visible and contains expected text
+      const documentContent = page.locator('[data-testid="document-content"], .document-text, .ocr-content');
+      if (await documentContent.isVisible()) {
+        const content = await documentContent.textContent();
+        expect(content).toContain('Test 6');
+        expect(content).toContain('This is some text from text 6');
+      }
+    }
+  });
+
  test('should allow drag and drop upload', async ({ authenticatedPage: page }) => {
    // Look for dropzone
    const dropzone = page.locator('[data-testid="dropzone"], .dropzone, .upload-area');
--- a/frontend/e2e/utils/test-data.ts
+++ b/frontend/e2e/utils/test-data.ts
@ -10,18 +10,30 @@ export const TEST_USERS = {
 };

 export const TEST_FILES = {
-  pdf: 'test_data/sample.pdf',
-  image: 'test_data/hello_ocr.png',
-  text: 'test_data/sample.txt',
-  multiline: 'test_data/multiline.png',
-  numbers: 'test_data/numbers.png'
+  // Real test images with known OCR content
+  test1: '../tests/test_images/test1.png',     // "Test 1\nThis is some text from text 1"
+  test2: '../tests/test_images/test2.jpg',     // "Test 2\nThis is some text from text 2"
+  test3: '../tests/test_images/test3.jpeg',    // "Test 3\nThis is some text from text 3"
+  test4: '../tests/test_images/test4.png',     // "Test 4\nThis is some text from text 4"
+  test5: '../tests/test_images/test5.jpg',     // "Test 5\nThis is some text from text 5"
+  test6: '../tests/test_images/test6.jpeg',    // "Test 6\nThis is some text from text 6"
+  test7: '../tests/test_images/test7.png',     // "Test 7\nThis is some text from text 7"
+  test8: '../tests/test_images/test8.jpeg',    // "Test 8\nThis is some text from text 8"
+  test9: '../tests/test_images/test9.png',     // "Test 9\nThis is some text from text 9"
+  
+  // Backwards compatibility
+  image: '../tests/test_images/test1.png',
+  multiline: '../tests/test_images/test2.jpg',
+  text: 'test_data/sample.txt'
 };

 export const SEARCH_QUERIES = {
-  simple: 'test document',
+  simple: 'Test 1',  // Will match test1.png OCR content
+  content: 'some text from text',  // Will match multiple test images
+  specific: 'Test 3',  // Will match test3.jpeg specifically
  advanced: {
-    title: 'important',
-    content: 'contract',
+    title: 'Test',
+    content: 'some text',
    dateFrom: '2024-01-01',
    dateTo: '2024-12-31'
  },
@ -29,6 +41,19 @@ export const SEARCH_QUERIES = {
  noResults: 'xyzabc123nonexistent'
 };

+// Expected OCR content for test images
+export const EXPECTED_OCR_CONTENT = {
+  test1: 'Test 1\nThis is some text from text 1',
+  test2: 'Test 2\nThis is some text from text 2',
+  test3: 'Test 3\nThis is some text from text 3',
+  test4: 'Test 4\nThis is some text from text 4',
+  test5: 'Test 5\nThis is some text from text 5',
+  test6: 'Test 6\nThis is some text from text 6',
+  test7: 'Test 7\nThis is some text from text 7',
+  test8: 'Test 8\nThis is some text from text 8',
+  test9: 'Test 9\nThis is some text from text 9'
+};
+
 export const API_ENDPOINTS = {
  login: '/api/auth/login',
  upload: '/api/documents/upload',
--- a/src/lib.rs
+++ b/src/lib.rs
@ -30,6 +30,9 @@ pub mod webdav_xml_parser;
 #[cfg(test)]
 mod tests;

+#[cfg(test)]
+pub mod test_utils;
+
 use axum::{http::StatusCode, Json};
 use config::Config;
 use db::Database;
--- a/src/test_utils.rs
+++ b/src/test_utils.rs
@ -0,0 +1,137 @@
+//! Test utilities for loading and working with test images and data
+//! 
+//! This module provides utilities for loading test images from the tests/test_images/
+//! directory and working with them in unit and integration tests.
+
+use std::path::Path;
+
+/// Test image information with expected OCR content
+#[derive(Debug, Clone)]
+pub struct TestImage {
+    pub filename: &'static str,
+    pub path: String,
+    pub mime_type: &'static str,
+    pub expected_content: &'static str,
+}
+
+impl TestImage {
+    pub fn new(filename: &'static str, mime_type: &'static str, expected_content: &'static str) -> Self {
+        Self {
+            filename,
+            path: format!("tests/test_images/{}", filename),
+            mime_type,
+            expected_content,
+        }
+    }
+    
+    pub fn exists(&self) -> bool {
+        Path::new(&self.path).exists()
+    }
+    
+    pub async fn load_data(&self) -> Result<Vec<u8>, std::io::Error> {
+        tokio::fs::read(&self.path).await
+    }
+}
+
+/// Get all available test images with their expected OCR content
+pub fn get_test_images() -> Vec<TestImage> {
+    vec![
+        TestImage::new("test1.png", "image/png", "Test 1\nThis is some text from text 1"),
+        TestImage::new("test2.jpg", "image/jpeg", "Test 2\nThis is some text from text 2"),
+        TestImage::new("test3.jpeg", "image/jpeg", "Test 3\nThis is some text from text 3"),
+        TestImage::new("test4.png", "image/png", "Test 4\nThis is some text from text 4"),
+        TestImage::new("test5.jpg", "image/jpeg", "Test 5\nThis is some text from text 5"),
+        TestImage::new("test6.jpeg", "image/jpeg", "Test 6\nThis is some text from text 6"),
+        TestImage::new("test7.png", "image/png", "Test 7\nThis is some text from text 7"),
+        TestImage::new("test8.jpeg", "image/jpeg", "Test 8\nThis is some text from text 8"),
+        TestImage::new("test9.png", "image/png", "Test 9\nThis is some text from text 9"),
+    ]
+}
+
+/// Get a specific test image by number (1-9)
+pub fn get_test_image(number: u8) -> Option<TestImage> {
+    if number < 1 || number > 9 {
+        return None;
+    }
+    
+    get_test_images().into_iter().nth((number - 1) as usize)
+}
+
+/// Load test image data by filename
+pub async fn load_test_image(filename: &str) -> Result<Vec<u8>, std::io::Error> {
+    let path = format!("tests/test_images/{}", filename);
+    tokio::fs::read(path).await
+}
+
+/// Check if test images directory exists and is accessible
+pub fn test_images_available() -> bool {
+    Path::new("tests/test_images").exists()
+}
+
+/// Get available test images (only those that exist on filesystem)
+pub fn get_available_test_images() -> Vec<TestImage> {
+    get_test_images()
+        .into_iter()
+        .filter(|img| img.exists())
+        .collect()
+}
+
+/// Skip test macro for conditional testing based on test image availability
+macro_rules! skip_if_no_test_images {
+    () => {
+        if !crate::test_utils::test_images_available() {
+            println!("Skipping test: test images directory not available");
+            return;
+        }
+    };
+}
+
+/// Skip test macro for specific test image
+macro_rules! skip_if_test_image_missing {
+    ($image:expr) => {
+        if !$image.exists() {
+            println!("Skipping test: {} not found", $image.filename);
+            return;
+        }
+    };
+}
+
+pub use skip_if_no_test_images;
+pub use skip_if_test_image_missing;
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_image_paths_are_valid() {
+        let images = get_test_images();
+        assert_eq!(images.len(), 9);
+        
+        for (i, image) in images.iter().enumerate() {
+            assert_eq!(image.filename, format!("test{}.{}", i + 1, 
+                if image.mime_type == "image/png" { "png" } 
+                else if image.filename.ends_with(".jpg") { "jpg" }
+                else { "jpeg" }
+            ));
+            assert!(image.expected_content.starts_with(&format!("Test {}", i + 1)));
+        }
+    }
+
+    #[test]
+    fn test_get_specific_image() {
+        let image1 = get_test_image(1).unwrap();
+        assert_eq!(image1.filename, "test1.png");
+        assert_eq!(image1.mime_type, "image/png");
+        assert!(image1.expected_content.contains("Test 1"));
+
+        let image5 = get_test_image(5).unwrap();
+        assert_eq!(image5.filename, "test5.jpg");
+        assert_eq!(image5.mime_type, "image/jpeg");
+        assert!(image5.expected_content.contains("Test 5"));
+
+        // Invalid numbers should return None
+        assert!(get_test_image(0).is_none());
+        assert!(get_test_image(10).is_none());
+    }
+}
--- a/tests/file_processing_pipeline_tests.rs
+++ b/tests/file_processing_pipeline_tests.rs
@ -985,4 +985,235 @@ async fn test_concurrent_file_processing() {
    assert!(success_rate >= 0.8, "At least 80% of files should complete processing (not timeout)");
    
    println!("🎉 Concurrent file processing test passed!");
+}
+
+#[tokio::test]
+async fn test_real_test_images_processing() {
+    println!("🖼️  Testing real test images processing...");
+    
+    // Check if test images are available
+    if !readur::test_utils::test_images_available() {
+        println!("⚠️  Test images not available - skipping real image processing test");
+        return;
+    }
+    
+    let mut client = FileProcessingTestClient::new();
+    client.setup_user().await
+        .expect("Failed to setup test user");
+    
+    println!("✅ User setup complete");
+    
+    let available_images = readur::test_utils::get_available_test_images();
+    
+    if available_images.is_empty() {
+        println!("⚠️  No test images found - skipping test");
+        return;
+    }
+    
+    println!("📋 Found {} test images to process", available_images.len());
+    
+    let mut processed_results = Vec::new();
+    
+    // Process each available test image
+    for test_image in available_images.iter().take(3) { // Limit to first 3 for faster testing
+        println!("📤 Processing test image: {}", test_image.filename);
+        
+        // Load the image data
+        let image_data = match test_image.load_data().await {
+            Ok(data) => data,
+            Err(e) => {
+                println!("⚠️  Failed to load {}: {}", test_image.filename, e);
+                continue;
+            }
+        };
+        
+        println!("✅ Loaded {} ({} bytes, {})", 
+            test_image.filename, image_data.len(), test_image.mime_type);
+        
+        // Upload the image
+        let upload_start = std::time::Instant::now();
+        let document = match client.upload_binary_file(
+            image_data, 
+            test_image.filename, 
+            test_image.mime_type
+        ).await {
+            Ok(doc) => doc,
+            Err(e) => {
+                println!("⚠️  Failed to upload {}: {}", test_image.filename, e);
+                continue;
+            }
+        };
+        
+        let upload_time = upload_start.elapsed();
+        println!("✅ {} uploaded in {:?}: {}", test_image.filename, upload_time, document.id);
+        
+        // Wait for OCR processing
+        let processing_start = std::time::Instant::now();
+        match client.wait_for_processing(&document.id.to_string()).await {
+            Ok(processed_doc) => {
+                let processing_time = processing_start.elapsed();
+                println!("✅ {} processed in {:?}: status = {:?}", 
+                    test_image.filename, processing_time, processed_doc.ocr_status);
+                
+                // Get OCR results and verify content
+                if let Ok(ocr_results) = client.get_ocr_results(&document.id.to_string()).await {
+                    if let Some(ocr_text) = ocr_results["ocr_text"].as_str() {
+                        let normalized_ocr = ocr_text.trim().to_lowercase();
+                        let normalized_expected = test_image.expected_content.trim().to_lowercase();
+                        
+                        println!("🔍 OCR extracted: '{}'", ocr_text);
+                        println!("🎯 Expected: '{}'", test_image.expected_content);
+                        
+                        // Check if OCR content matches expectations
+                        let test_number = test_image.filename.chars()
+                            .filter(|c| c.is_numeric())
+                            .collect::<String>();
+                        
+                        let content_matches = if !test_number.is_empty() {
+                            normalized_ocr.contains(&format!("test {}", test_number)) ||
+                            normalized_ocr.contains(&test_number)
+                        } else {
+                            false
+                        };
+                        
+                        let has_text_content = normalized_ocr.contains("text") || 
+                                             normalized_ocr.contains("some");
+                        
+                        processed_results.push((
+                            test_image.filename.to_string(),
+                            upload_time,
+                            processing_time,
+                            processed_doc.ocr_status.clone(),
+                            ocr_text.to_string(),
+                            content_matches,
+                            has_text_content,
+                        ));
+                        
+                        if content_matches && has_text_content {
+                            println!("✅ OCR content verification PASSED for {}", test_image.filename);
+                        } else {
+                            println!("⚠️  OCR content verification PARTIAL for {} (number: {}, text: {})", 
+                                test_image.filename, content_matches, has_text_content);
+                        }
+                    } else {
+                        println!("⚠️  No OCR text found for {}", test_image.filename);
+                        processed_results.push((
+                            test_image.filename.to_string(),
+                            upload_time,
+                            processing_time,
+                            processed_doc.ocr_status.clone(),
+                            "".to_string(),
+                            false,
+                            false,
+                        ));
+                    }
+                } else {
+                    println!("⚠️  Failed to get OCR results for {}", test_image.filename);
+                    processed_results.push((
+                        test_image.filename.to_string(),
+                        upload_time,
+                        processing_time,
+                        processed_doc.ocr_status.clone(),
+                        "".to_string(),
+                        false,
+                        false,
+                    ));
+                }
+            }
+            Err(e) => {
+                println!("⚠️  Processing failed for {}: {}", test_image.filename, e);
+                processed_results.push((
+                    test_image.filename.to_string(),
+                    upload_time,
+                    Duration::ZERO,
+                    Some("failed".to_string()),
+                    "".to_string(),
+                    false,
+                    false,
+                ));
+            }
+        }
+        
+        // Add small delay between uploads to avoid overwhelming the system
+        tokio::time::sleep(Duration::from_millis(500)).await;
+    }
+    
+    // Analyze results
+    println!("📊 Real Test Images Processing Results:");
+    println!("  {:<12} {:<10} {:<12} {:<10} {:<8} {:<8} {}", 
+        "Image", "Upload", "Processing", "Status", "Number", "Text", "OCR Content");
+    println!("  {}", "-".repeat(80));
+    
+    let mut successful_ocr = 0;
+    let mut failed_ocr = 0;
+    let mut partial_matches = 0;
+    
+    for (filename, upload_time, processing_time, status, ocr_text, number_match, text_match) in &processed_results {
+        let status_str = status.as_deref().unwrap_or("unknown");
+        let ocr_preview = if ocr_text.len() > 30 {
+            format!("{}...", &ocr_text[..30])
+        } else {
+            ocr_text.clone()
+        };
+        
+        println!("  {:<12} {:<10?} {:<12?} {:<10} {:<8} {:<8} {}", 
+            filename, upload_time, processing_time, status_str, 
+            if *number_match { "✅" } else { "❌" },
+            if *text_match { "✅" } else { "❌" },
+            ocr_preview);
+        
+        if status_str == "completed" {
+            if *number_match && *text_match {
+                successful_ocr += 1;
+            } else if *number_match || *text_match {
+                partial_matches += 1;
+            } else {
+                failed_ocr += 1;
+            }
+        }
+    }
+    
+    let total_processed = processed_results.len();
+    
+    println!("\n📈 Summary:");
+    println!("  Total processed: {}", total_processed);
+    println!("  Successful OCR: {}", successful_ocr);
+    println!("  Partial matches: {}", partial_matches);
+    println!("  Failed OCR: {}", failed_ocr);
+    
+    if total_processed > 0 {
+        let success_rate = (successful_ocr + partial_matches) as f64 / total_processed as f64 * 100.0;
+        println!("  Success rate: {:.1}%", success_rate);
+        
+        // Calculate average processing time for successful cases
+        let successful_processing_times: Vec<_> = processed_results.iter()
+            .filter(|(_, _, _, status, _, number, text)| {
+                status.as_deref() == Some("completed") && (*number || *text)
+            })
+            .map(|(_, _, processing_time, _, _, _, _)| *processing_time)
+            .collect();
+        
+        if !successful_processing_times.is_empty() {
+            let avg_processing_time = successful_processing_times.iter().sum::<Duration>() 
+                / successful_processing_times.len() as u32;
+            println!("  Average processing time: {:?}", avg_processing_time);
+        }
+    }
+    
+    // Test assertions
+    assert!(!processed_results.is_empty(), "At least some test images should be processed");
+    
+    // At least 50% should have some level of OCR success (either partial or full)
+    let success_count = successful_ocr + partial_matches;
+    assert!(success_count > 0, "At least some test images should have successful OCR");
+    
+    if total_processed >= 2 {
+        let min_success_rate = 0.5; // 50% minimum success rate
+        let actual_success_rate = success_count as f64 / total_processed as f64;
+        assert!(actual_success_rate >= min_success_rate, 
+            "OCR success rate should be at least {}% but was {:.1}%", 
+            min_success_rate * 100.0, actual_success_rate * 100.0);
+    }
+    
+    println!("🎉 Real test images processing test completed!");
 }
--- a/tests/test_image_ocr_tests.rs
+++ b/tests/test_image_ocr_tests.rs
@ -0,0 +1,279 @@
+//! Integration tests for OCR processing using real test images
+//! 
+//! This test suite uses the actual test images from tests/test_images/
+//! to verify OCR functionality with known content.
+
+use readur::test_utils::{get_test_images, get_available_test_images, get_test_image, skip_if_no_test_images};
+use readur::ocr::OcrService;
+use std::path::Path;
+
+#[tokio::test]
+async fn test_ocr_with_all_available_test_images() {
+    skip_if_no_test_images!();
+    
+    let available_images = get_available_test_images();
+    
+    if available_images.is_empty() {
+        println!("No test images found - skipping OCR tests");
+        return;
+    }
+    
+    println!("Testing OCR with {} available test images", available_images.len());
+    
+    for test_image in available_images {
+        println!("Testing OCR with {}", test_image.filename);
+        
+        // Load the image data
+        let image_data = match test_image.load_data().await {
+            Ok(data) => data,
+            Err(e) => {
+                println!("Failed to load {}: {}", test_image.filename, e);
+                continue;
+            }
+        };
+        
+        // Create a temporary file for OCR processing
+        let temp_path = format!("./temp_test_{}", test_image.filename);
+        if let Err(e) = tokio::fs::write(&temp_path, &image_data).await {
+            println!("Failed to write temp file for {}: {}", test_image.filename, e);
+            continue;
+        }
+        
+        // Test OCR processing
+        let ocr_service = OcrService::new();
+        let result = ocr_service.extract_text(&temp_path, test_image.mime_type).await;
+        
+        // Clean up temp file
+        let _ = tokio::fs::remove_file(&temp_path).await;
+        
+        match result {
+            Ok(extracted_text) => {
+                println!("✅ OCR Success for {}: '{}'", test_image.filename, extracted_text);
+                
+                // Verify the extracted text contains expected content
+                let normalized_extracted = extracted_text.trim().to_lowercase();
+                let normalized_expected = test_image.expected_content.trim().to_lowercase();
+                
+                // Check for key parts of expected content
+                let test_number = test_image.filename.chars()
+                    .filter(|c| c.is_numeric())
+                    .collect::<String>();
+                
+                if !test_number.is_empty() {
+                    assert!(
+                        normalized_extracted.contains(&format!("test {}", test_number)) ||
+                        normalized_extracted.contains(&test_number),
+                        "OCR result '{}' should contain test number '{}' for image {}",
+                        extracted_text, test_number, test_image.filename
+                    );
+                }
+                
+                // Check for presence of "text" keyword
+                assert!(
+                    normalized_extracted.contains("text") || normalized_extracted.contains("some"),
+                    "OCR result '{}' should contain expected text content for image {}",
+                    extracted_text, test_image.filename
+                );
+            }
+            Err(e) => {
+                println!("⚠️  OCR Failed for {}: {}", test_image.filename, e);
+                // Don't fail the test immediately - log the error but continue
+                // This allows us to see which images work and which don't
+            }
+        }
+    }
+}
+
+#[tokio::test]
+async fn test_ocr_with_specific_test_images() {
+    skip_if_no_test_images!();
+    
+    // Test specific images that should definitely work
+    let test_cases = vec![1, 2, 3]; // Test with first 3 images
+    
+    for test_num in test_cases {
+        let test_image = match get_test_image(test_num) {
+            Some(img) => img,
+            None => continue,
+        };
+        
+        if !test_image.exists() {
+            println!("Skipping test{}: file not found", test_num);
+            continue;
+        }
+        
+        println!("Running OCR test for {}", test_image.filename);
+        
+        // Load image data
+        let image_data = test_image.load_data().await
+            .expect("Should be able to load test image");
+        
+        assert!(!image_data.is_empty(), "Test image should not be empty");
+        
+        // Verify file format based on MIME type
+        match test_image.mime_type {
+            "image/png" => {
+                assert!(image_data.starts_with(&[0x89, 0x50, 0x4E, 0x47]), 
+                    "PNG file should start with PNG signature");
+            }
+            "image/jpeg" => {
+                assert!(image_data.starts_with(&[0xFF, 0xD8, 0xFF]), 
+                    "JPEG file should start with JPEG signature");
+            }
+            _ => {}
+        }
+        
+        println!("Image {} loaded successfully: {} bytes, type: {}", 
+            test_image.filename, image_data.len(), test_image.mime_type);
+    }
+}
+
+#[tokio::test]
+async fn test_ocr_error_handling_with_corrupted_image() {
+    skip_if_no_test_images!();
+    
+    // Create a corrupted image file
+    let corrupted_data = vec![0xFF; 100]; // Invalid image data
+    let temp_path = "./temp_corrupted_test.png";
+    
+    tokio::fs::write(temp_path, &corrupted_data).await
+        .expect("Should be able to write corrupted test file");
+    
+    let ocr_service = OcrService::new();
+    let result = ocr_service.extract_text(temp_path, "image/png").await;
+    
+    // Clean up
+    let _ = tokio::fs::remove_file(temp_path).await;
+    
+    // Should handle the error gracefully
+    match result {
+        Ok(text) => {
+            println!("Unexpected success with corrupted image: '{}'", text);
+            // Some OCR systems might return empty text instead of error
+        }
+        Err(e) => {
+            println!("Expected error with corrupted image: {}", e);
+            // This is the expected behavior
+        }
+    }
+}
+
+#[tokio::test]
+async fn test_multiple_image_formats() {
+    skip_if_no_test_images!();
+    
+    let images = get_available_test_images();
+    let mut png_count = 0;
+    let mut jpeg_count = 0;
+    
+    for image in &images {
+        match image.mime_type {
+            "image/png" => png_count += 1,
+            "image/jpeg" => jpeg_count += 1,
+            _ => {}
+        }
+    }
+    
+    println!("Available test images: {} PNG, {} JPEG", png_count, jpeg_count);
+    
+    // Ensure we have at least one of each format for comprehensive testing
+    if png_count > 0 && jpeg_count > 0 {
+        println!("✅ Both PNG and JPEG formats available for testing");
+    } else {
+        println!("⚠️  Limited format coverage: PNG={}, JPEG={}", png_count, jpeg_count);
+    }
+    
+    // Test at least one of each format if available
+    for image in images.iter().take(2) {
+        if image.exists() {
+            println!("Testing format: {} ({})", image.mime_type, image.filename);
+            
+            let image_data = image.load_data().await
+                .expect("Should load test image");
+            
+            assert!(!image_data.is_empty(), "Image data should not be empty");
+            assert!(image_data.len() > 100, "Image should be reasonably sized");
+        }
+    }
+}
+
+#[tokio::test]
+#[ignore = "Long running test - run with: cargo test test_ocr_performance -- --ignored"]
+async fn test_ocr_performance_with_test_images() {
+    skip_if_no_test_images!();
+    
+    let available_images = get_available_test_images();
+    
+    if available_images.is_empty() {
+        println!("No test images available for performance testing");
+        return;
+    }
+    
+    let start_time = std::time::Instant::now();
+    let mut successful_ocr = 0;
+    let mut failed_ocr = 0;
+    
+    for test_image in available_images {
+        let image_start = std::time::Instant::now();
+        
+        // Load image
+        let image_data = match test_image.load_data().await {
+            Ok(data) => data,
+            Err(_) => {
+                failed_ocr += 1;
+                continue;
+            }
+        };
+        
+        // Write to temp file
+        let temp_path = format!("./temp_perf_{}", test_image.filename);
+        if tokio::fs::write(&temp_path, &image_data).await.is_err() {
+            failed_ocr += 1;
+            continue;
+        }
+        
+        // Run OCR
+        let ocr_service = OcrService::new();
+        let result = ocr_service.extract_text(&temp_path, test_image.mime_type).await;
+        
+        // Clean up
+        let _ = tokio::fs::remove_file(&temp_path).await;
+        
+        let duration = image_start.elapsed();
+        
+        match result {
+            Ok(text) => {
+                successful_ocr += 1;
+                println!("✅ {} processed in {:?}: '{}'", 
+                    test_image.filename, duration, text.chars().take(50).collect::<String>());
+            }
+            Err(e) => {
+                failed_ocr += 1;
+                println!("❌ {} failed in {:?}: {}", 
+                    test_image.filename, duration, e);
+            }
+        }
+    }
+    
+    let total_duration = start_time.elapsed();
+    let total_images = successful_ocr + failed_ocr;
+    
+    println!("\n📊 OCR Performance Summary:");
+    println!("Total images: {}", total_images);
+    println!("Successful: {}", successful_ocr);
+    println!("Failed: {}", failed_ocr);
+    println!("Total time: {:?}", total_duration);
+    
+    if total_images > 0 {
+        println!("Average time per image: {:?}", total_duration / total_images);
+        let success_rate = (successful_ocr as f64 / total_images as f64) * 100.0;
+        println!("Success rate: {:.1}%", success_rate);
+    }
+    
+    // Performance assertions
+    if successful_ocr > 0 {
+        let avg_time_per_image = total_duration / successful_ocr;
+        assert!(avg_time_per_image.as_secs() < 30, 
+            "OCR should complete within 30 seconds per image on average");
+    }
+}