feat(tests): add actual images as part of e2e and testing

This commit is contained in:
perf3ct 2025-06-17 21:26:39 +00:00
parent efbd15774a
commit f905c220e0
7 changed files with 738 additions and 27 deletions

View File

@ -19,8 +19,8 @@ test.describe('Search Functionality', () => {
test('should perform basic search', async ({ authenticatedPage: page }) => {
const searchInput = page.locator('input[type="search"], input[placeholder*="search" i], [data-testid="search-input"]').first();
// Enter search query
await searchInput.fill(SEARCH_QUERIES.simple);
// Search for known OCR content from test images
await searchInput.fill(SEARCH_QUERIES.simple); // "Test 1"
// Wait for search API call
const searchResponse = helpers.waitForApiCall(API_ENDPOINTS.search);
@ -40,8 +40,8 @@ test.describe('Search Functionality', () => {
test('should show search suggestions', async ({ authenticatedPage: page }) => {
const searchInput = page.locator('input[type="search"], input[placeholder*="search" i], [data-testid="search-input"]').first();
// Start typing to trigger suggestions
await searchInput.type('test', { delay: 100 });
// Start typing "Test" to trigger suggestions based on OCR content
await searchInput.type('Test', { delay: 100 });
// Should show suggestion dropdown
await expect(page.locator('[data-testid="search-suggestions"], .suggestions, .autocomplete')).toBeVisible({
@ -52,8 +52,8 @@ test.describe('Search Functionality', () => {
test('should filter search results', async ({ authenticatedPage: page }) => {
const searchInput = page.locator('input[type="search"], input[placeholder*="search" i], [data-testid="search-input"]').first();
// Perform initial search
await searchInput.fill(SEARCH_QUERIES.simple);
// Search for content that should match multiple test images
await searchInput.fill(SEARCH_QUERIES.content); // "some text from text"
await searchInput.press('Enter');
await helpers.waitForLoadingToComplete();
@ -63,10 +63,10 @@ test.describe('Search Functionality', () => {
if (await filterButton.isVisible()) {
await filterButton.click();
// Select document type filter
const pdfFilter = page.locator('input[type="checkbox"][value="pdf"], label:has-text("PDF")');
if (await pdfFilter.isVisible()) {
await pdfFilter.check();
// Select image type filter (since our test files are images)
const imageFilter = page.locator('input[type="checkbox"][value="image"], input[type="checkbox"][value="png"], label:has-text("Image")');
if (await imageFilter.isVisible()) {
await imageFilter.check();
// Should update search results
await helpers.waitForApiCall(API_ENDPOINTS.search);

View File

@ -1,5 +1,5 @@
import { test, expect } from './fixtures/auth';
import { TEST_FILES, TIMEOUTS, API_ENDPOINTS } from './utils/test-data';
import { TEST_FILES, TIMEOUTS, API_ENDPOINTS, EXPECTED_OCR_CONTENT } from './utils/test-data';
import { TestHelpers } from './utils/test-helpers';
test.describe('Document Upload', () => {
@ -20,8 +20,8 @@ test.describe('Document Upload', () => {
// Find file input - try multiple selectors
const fileInput = page.locator('input[type="file"]').first();
// Upload a test file
await fileInput.setInputFiles(TEST_FILES.image);
// Upload test1.png with known OCR content
await fileInput.setInputFiles(TEST_FILES.test1);
// Wait for upload API call
const uploadResponse = helpers.waitForApiCall(API_ENDPOINTS.upload, TIMEOUTS.upload);
@ -45,25 +45,25 @@ test.describe('Document Upload', () => {
test('should upload multiple documents', async ({ authenticatedPage: page }) => {
const fileInput = page.locator('input[type="file"]').first();
// Upload multiple files
await fileInput.setInputFiles([TEST_FILES.image, TEST_FILES.multiline]);
// Upload multiple test images with different formats
await fileInput.setInputFiles([TEST_FILES.test1, TEST_FILES.test2, TEST_FILES.test3]);
const uploadButton = page.locator('button:has-text("Upload"), [data-testid="upload-button"]');
if (await uploadButton.isVisible()) {
await uploadButton.click();
}
// Wait for both uploads to complete
// Wait for all uploads to complete
await helpers.waitForLoadingToComplete();
// Should show multiple uploaded documents
const uploadedFiles = page.locator('[data-testid="uploaded-files"] > *, .uploaded-file');
await expect(uploadedFiles).toHaveCount(2, { timeout: TIMEOUTS.medium });
await expect(uploadedFiles).toHaveCount(3, { timeout: TIMEOUTS.medium });
});
test('should show upload progress', async ({ authenticatedPage: page }) => {
const fileInput = page.locator('input[type="file"]').first();
await fileInput.setInputFiles(TEST_FILES.image);
await fileInput.setInputFiles(TEST_FILES.test4);
const uploadButton = page.locator('button:has-text("Upload"), [data-testid="upload-button"]');
if (await uploadButton.isVisible()) {
@ -140,7 +140,7 @@ test.describe('Document Upload', () => {
test('should show OCR processing status', async ({ authenticatedPage: page }) => {
const fileInput = page.locator('input[type="file"]').first();
await fileInput.setInputFiles(TEST_FILES.image);
await fileInput.setInputFiles(TEST_FILES.test5);
const uploadButton = page.locator('button:has-text("Upload"), [data-testid="upload-button"]');
if (await uploadButton.isVisible()) {
@ -155,6 +155,42 @@ test.describe('Document Upload', () => {
});
});
test('should process OCR and extract correct text content', async ({ authenticatedPage: page }) => {
const fileInput = page.locator('input[type="file"]').first();
// Upload test6.jpeg with known content
await fileInput.setInputFiles(TEST_FILES.test6);
const uploadButton = page.locator('button:has-text("Upload"), [data-testid="upload-button"]');
if (await uploadButton.isVisible()) {
await uploadButton.click();
}
await helpers.waitForLoadingToComplete();
// Wait for OCR to complete
await expect(page.locator(':has-text("OCR Complete"), :has-text("Processed"), [data-testid="ocr-complete"]')).toBeVisible({
timeout: TIMEOUTS.ocr
});
// Navigate to document details to verify OCR content
const uploadedDocument = page.locator('[data-testid="uploaded-files"] > *, .uploaded-file').first();
if (await uploadedDocument.isVisible()) {
await uploadedDocument.click();
// Should navigate to document details page
await page.waitForURL(/\/documents\/[^\/]+/, { timeout: TIMEOUTS.medium });
// Check that OCR content is visible and contains expected text
const documentContent = page.locator('[data-testid="document-content"], .document-text, .ocr-content');
if (await documentContent.isVisible()) {
const content = await documentContent.textContent();
expect(content).toContain('Test 6');
expect(content).toContain('This is some text from text 6');
}
}
});
test('should allow drag and drop upload', async ({ authenticatedPage: page }) => {
// Look for dropzone
const dropzone = page.locator('[data-testid="dropzone"], .dropzone, .upload-area');

View File

@ -10,18 +10,30 @@ export const TEST_USERS = {
};
export const TEST_FILES = {
pdf: 'test_data/sample.pdf',
image: 'test_data/hello_ocr.png',
text: 'test_data/sample.txt',
multiline: 'test_data/multiline.png',
numbers: 'test_data/numbers.png'
// Real test images with known OCR content
test1: '../tests/test_images/test1.png', // "Test 1\nThis is some text from text 1"
test2: '../tests/test_images/test2.jpg', // "Test 2\nThis is some text from text 2"
test3: '../tests/test_images/test3.jpeg', // "Test 3\nThis is some text from text 3"
test4: '../tests/test_images/test4.png', // "Test 4\nThis is some text from text 4"
test5: '../tests/test_images/test5.jpg', // "Test 5\nThis is some text from text 5"
test6: '../tests/test_images/test6.jpeg', // "Test 6\nThis is some text from text 6"
test7: '../tests/test_images/test7.png', // "Test 7\nThis is some text from text 7"
test8: '../tests/test_images/test8.jpeg', // "Test 8\nThis is some text from text 8"
test9: '../tests/test_images/test9.png', // "Test 9\nThis is some text from text 9"
// Backwards compatibility
image: '../tests/test_images/test1.png',
multiline: '../tests/test_images/test2.jpg',
text: 'test_data/sample.txt'
};
export const SEARCH_QUERIES = {
simple: 'test document',
simple: 'Test 1', // Will match test1.png OCR content
content: 'some text from text', // Will match multiple test images
specific: 'Test 3', // Will match test3.jpeg specifically
advanced: {
title: 'important',
content: 'contract',
title: 'Test',
content: 'some text',
dateFrom: '2024-01-01',
dateTo: '2024-12-31'
},
@ -29,6 +41,19 @@ export const SEARCH_QUERIES = {
noResults: 'xyzabc123nonexistent'
};
// Expected OCR content for test images
export const EXPECTED_OCR_CONTENT = {
test1: 'Test 1\nThis is some text from text 1',
test2: 'Test 2\nThis is some text from text 2',
test3: 'Test 3\nThis is some text from text 3',
test4: 'Test 4\nThis is some text from text 4',
test5: 'Test 5\nThis is some text from text 5',
test6: 'Test 6\nThis is some text from text 6',
test7: 'Test 7\nThis is some text from text 7',
test8: 'Test 8\nThis is some text from text 8',
test9: 'Test 9\nThis is some text from text 9'
};
export const API_ENDPOINTS = {
login: '/api/auth/login',
upload: '/api/documents/upload',

View File

@ -30,6 +30,9 @@ pub mod webdav_xml_parser;
#[cfg(test)]
mod tests;
#[cfg(test)]
pub mod test_utils;
use axum::{http::StatusCode, Json};
use config::Config;
use db::Database;

137
src/test_utils.rs Normal file
View File

@ -0,0 +1,137 @@
//! Test utilities for loading and working with test images and data
//!
//! This module provides utilities for loading test images from the tests/test_images/
//! directory and working with them in unit and integration tests.
use std::path::Path;
/// Test image information with expected OCR content
#[derive(Debug, Clone)]
pub struct TestImage {
pub filename: &'static str,
pub path: String,
pub mime_type: &'static str,
pub expected_content: &'static str,
}
impl TestImage {
pub fn new(filename: &'static str, mime_type: &'static str, expected_content: &'static str) -> Self {
Self {
filename,
path: format!("tests/test_images/{}", filename),
mime_type,
expected_content,
}
}
pub fn exists(&self) -> bool {
Path::new(&self.path).exists()
}
pub async fn load_data(&self) -> Result<Vec<u8>, std::io::Error> {
tokio::fs::read(&self.path).await
}
}
/// Get all available test images with their expected OCR content
pub fn get_test_images() -> Vec<TestImage> {
vec![
TestImage::new("test1.png", "image/png", "Test 1\nThis is some text from text 1"),
TestImage::new("test2.jpg", "image/jpeg", "Test 2\nThis is some text from text 2"),
TestImage::new("test3.jpeg", "image/jpeg", "Test 3\nThis is some text from text 3"),
TestImage::new("test4.png", "image/png", "Test 4\nThis is some text from text 4"),
TestImage::new("test5.jpg", "image/jpeg", "Test 5\nThis is some text from text 5"),
TestImage::new("test6.jpeg", "image/jpeg", "Test 6\nThis is some text from text 6"),
TestImage::new("test7.png", "image/png", "Test 7\nThis is some text from text 7"),
TestImage::new("test8.jpeg", "image/jpeg", "Test 8\nThis is some text from text 8"),
TestImage::new("test9.png", "image/png", "Test 9\nThis is some text from text 9"),
]
}
/// Get a specific test image by number (1-9)
pub fn get_test_image(number: u8) -> Option<TestImage> {
if number < 1 || number > 9 {
return None;
}
get_test_images().into_iter().nth((number - 1) as usize)
}
/// Load test image data by filename
pub async fn load_test_image(filename: &str) -> Result<Vec<u8>, std::io::Error> {
let path = format!("tests/test_images/{}", filename);
tokio::fs::read(path).await
}
/// Check if test images directory exists and is accessible
pub fn test_images_available() -> bool {
Path::new("tests/test_images").exists()
}
/// Get available test images (only those that exist on filesystem)
pub fn get_available_test_images() -> Vec<TestImage> {
get_test_images()
.into_iter()
.filter(|img| img.exists())
.collect()
}
/// Skip test macro for conditional testing based on test image availability
macro_rules! skip_if_no_test_images {
() => {
if !crate::test_utils::test_images_available() {
println!("Skipping test: test images directory not available");
return;
}
};
}
/// Skip test macro for specific test image
macro_rules! skip_if_test_image_missing {
($image:expr) => {
if !$image.exists() {
println!("Skipping test: {} not found", $image.filename);
return;
}
};
}
pub use skip_if_no_test_images;
pub use skip_if_test_image_missing;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_image_paths_are_valid() {
let images = get_test_images();
assert_eq!(images.len(), 9);
for (i, image) in images.iter().enumerate() {
assert_eq!(image.filename, format!("test{}.{}", i + 1,
if image.mime_type == "image/png" { "png" }
else if image.filename.ends_with(".jpg") { "jpg" }
else { "jpeg" }
));
assert!(image.expected_content.starts_with(&format!("Test {}", i + 1)));
}
}
#[test]
fn test_get_specific_image() {
let image1 = get_test_image(1).unwrap();
assert_eq!(image1.filename, "test1.png");
assert_eq!(image1.mime_type, "image/png");
assert!(image1.expected_content.contains("Test 1"));
let image5 = get_test_image(5).unwrap();
assert_eq!(image5.filename, "test5.jpg");
assert_eq!(image5.mime_type, "image/jpeg");
assert!(image5.expected_content.contains("Test 5"));
// Invalid numbers should return None
assert!(get_test_image(0).is_none());
assert!(get_test_image(10).is_none());
}
}

View File

@ -985,4 +985,235 @@ async fn test_concurrent_file_processing() {
assert!(success_rate >= 0.8, "At least 80% of files should complete processing (not timeout)");
println!("🎉 Concurrent file processing test passed!");
}
#[tokio::test]
async fn test_real_test_images_processing() {
println!("🖼️ Testing real test images processing...");
// Check if test images are available
if !readur::test_utils::test_images_available() {
println!("⚠️ Test images not available - skipping real image processing test");
return;
}
let mut client = FileProcessingTestClient::new();
client.setup_user().await
.expect("Failed to setup test user");
println!("✅ User setup complete");
let available_images = readur::test_utils::get_available_test_images();
if available_images.is_empty() {
println!("⚠️ No test images found - skipping test");
return;
}
println!("📋 Found {} test images to process", available_images.len());
let mut processed_results = Vec::new();
// Process each available test image
for test_image in available_images.iter().take(3) { // Limit to first 3 for faster testing
println!("📤 Processing test image: {}", test_image.filename);
// Load the image data
let image_data = match test_image.load_data().await {
Ok(data) => data,
Err(e) => {
println!("⚠️ Failed to load {}: {}", test_image.filename, e);
continue;
}
};
println!("✅ Loaded {} ({} bytes, {})",
test_image.filename, image_data.len(), test_image.mime_type);
// Upload the image
let upload_start = std::time::Instant::now();
let document = match client.upload_binary_file(
image_data,
test_image.filename,
test_image.mime_type
).await {
Ok(doc) => doc,
Err(e) => {
println!("⚠️ Failed to upload {}: {}", test_image.filename, e);
continue;
}
};
let upload_time = upload_start.elapsed();
println!("{} uploaded in {:?}: {}", test_image.filename, upload_time, document.id);
// Wait for OCR processing
let processing_start = std::time::Instant::now();
match client.wait_for_processing(&document.id.to_string()).await {
Ok(processed_doc) => {
let processing_time = processing_start.elapsed();
println!("{} processed in {:?}: status = {:?}",
test_image.filename, processing_time, processed_doc.ocr_status);
// Get OCR results and verify content
if let Ok(ocr_results) = client.get_ocr_results(&document.id.to_string()).await {
if let Some(ocr_text) = ocr_results["ocr_text"].as_str() {
let normalized_ocr = ocr_text.trim().to_lowercase();
let normalized_expected = test_image.expected_content.trim().to_lowercase();
println!("🔍 OCR extracted: '{}'", ocr_text);
println!("🎯 Expected: '{}'", test_image.expected_content);
// Check if OCR content matches expectations
let test_number = test_image.filename.chars()
.filter(|c| c.is_numeric())
.collect::<String>();
let content_matches = if !test_number.is_empty() {
normalized_ocr.contains(&format!("test {}", test_number)) ||
normalized_ocr.contains(&test_number)
} else {
false
};
let has_text_content = normalized_ocr.contains("text") ||
normalized_ocr.contains("some");
processed_results.push((
test_image.filename.to_string(),
upload_time,
processing_time,
processed_doc.ocr_status.clone(),
ocr_text.to_string(),
content_matches,
has_text_content,
));
if content_matches && has_text_content {
println!("✅ OCR content verification PASSED for {}", test_image.filename);
} else {
println!("⚠️ OCR content verification PARTIAL for {} (number: {}, text: {})",
test_image.filename, content_matches, has_text_content);
}
} else {
println!("⚠️ No OCR text found for {}", test_image.filename);
processed_results.push((
test_image.filename.to_string(),
upload_time,
processing_time,
processed_doc.ocr_status.clone(),
"".to_string(),
false,
false,
));
}
} else {
println!("⚠️ Failed to get OCR results for {}", test_image.filename);
processed_results.push((
test_image.filename.to_string(),
upload_time,
processing_time,
processed_doc.ocr_status.clone(),
"".to_string(),
false,
false,
));
}
}
Err(e) => {
println!("⚠️ Processing failed for {}: {}", test_image.filename, e);
processed_results.push((
test_image.filename.to_string(),
upload_time,
Duration::ZERO,
Some("failed".to_string()),
"".to_string(),
false,
false,
));
}
}
// Add small delay between uploads to avoid overwhelming the system
tokio::time::sleep(Duration::from_millis(500)).await;
}
// Analyze results
println!("📊 Real Test Images Processing Results:");
println!(" {:<12} {:<10} {:<12} {:<10} {:<8} {:<8} {}",
"Image", "Upload", "Processing", "Status", "Number", "Text", "OCR Content");
println!(" {}", "-".repeat(80));
let mut successful_ocr = 0;
let mut failed_ocr = 0;
let mut partial_matches = 0;
for (filename, upload_time, processing_time, status, ocr_text, number_match, text_match) in &processed_results {
let status_str = status.as_deref().unwrap_or("unknown");
let ocr_preview = if ocr_text.len() > 30 {
format!("{}...", &ocr_text[..30])
} else {
ocr_text.clone()
};
println!(" {:<12} {:<10?} {:<12?} {:<10} {:<8} {:<8} {}",
filename, upload_time, processing_time, status_str,
if *number_match { "" } else { "" },
if *text_match { "" } else { "" },
ocr_preview);
if status_str == "completed" {
if *number_match && *text_match {
successful_ocr += 1;
} else if *number_match || *text_match {
partial_matches += 1;
} else {
failed_ocr += 1;
}
}
}
let total_processed = processed_results.len();
println!("\n📈 Summary:");
println!(" Total processed: {}", total_processed);
println!(" Successful OCR: {}", successful_ocr);
println!(" Partial matches: {}", partial_matches);
println!(" Failed OCR: {}", failed_ocr);
if total_processed > 0 {
let success_rate = (successful_ocr + partial_matches) as f64 / total_processed as f64 * 100.0;
println!(" Success rate: {:.1}%", success_rate);
// Calculate average processing time for successful cases
let successful_processing_times: Vec<_> = processed_results.iter()
.filter(|(_, _, _, status, _, number, text)| {
status.as_deref() == Some("completed") && (*number || *text)
})
.map(|(_, _, processing_time, _, _, _, _)| *processing_time)
.collect();
if !successful_processing_times.is_empty() {
let avg_processing_time = successful_processing_times.iter().sum::<Duration>()
/ successful_processing_times.len() as u32;
println!(" Average processing time: {:?}", avg_processing_time);
}
}
// Test assertions
assert!(!processed_results.is_empty(), "At least some test images should be processed");
// At least 50% should have some level of OCR success (either partial or full)
let success_count = successful_ocr + partial_matches;
assert!(success_count > 0, "At least some test images should have successful OCR");
if total_processed >= 2 {
let min_success_rate = 0.5; // 50% minimum success rate
let actual_success_rate = success_count as f64 / total_processed as f64;
assert!(actual_success_rate >= min_success_rate,
"OCR success rate should be at least {}% but was {:.1}%",
min_success_rate * 100.0, actual_success_rate * 100.0);
}
println!("🎉 Real test images processing test completed!");
}

View File

@ -0,0 +1,279 @@
//! Integration tests for OCR processing using real test images
//!
//! This test suite uses the actual test images from tests/test_images/
//! to verify OCR functionality with known content.
use readur::test_utils::{get_test_images, get_available_test_images, get_test_image, skip_if_no_test_images};
use readur::ocr::OcrService;
use std::path::Path;
#[tokio::test]
async fn test_ocr_with_all_available_test_images() {
skip_if_no_test_images!();
let available_images = get_available_test_images();
if available_images.is_empty() {
println!("No test images found - skipping OCR tests");
return;
}
println!("Testing OCR with {} available test images", available_images.len());
for test_image in available_images {
println!("Testing OCR with {}", test_image.filename);
// Load the image data
let image_data = match test_image.load_data().await {
Ok(data) => data,
Err(e) => {
println!("Failed to load {}: {}", test_image.filename, e);
continue;
}
};
// Create a temporary file for OCR processing
let temp_path = format!("./temp_test_{}", test_image.filename);
if let Err(e) = tokio::fs::write(&temp_path, &image_data).await {
println!("Failed to write temp file for {}: {}", test_image.filename, e);
continue;
}
// Test OCR processing
let ocr_service = OcrService::new();
let result = ocr_service.extract_text(&temp_path, test_image.mime_type).await;
// Clean up temp file
let _ = tokio::fs::remove_file(&temp_path).await;
match result {
Ok(extracted_text) => {
println!("✅ OCR Success for {}: '{}'", test_image.filename, extracted_text);
// Verify the extracted text contains expected content
let normalized_extracted = extracted_text.trim().to_lowercase();
let normalized_expected = test_image.expected_content.trim().to_lowercase();
// Check for key parts of expected content
let test_number = test_image.filename.chars()
.filter(|c| c.is_numeric())
.collect::<String>();
if !test_number.is_empty() {
assert!(
normalized_extracted.contains(&format!("test {}", test_number)) ||
normalized_extracted.contains(&test_number),
"OCR result '{}' should contain test number '{}' for image {}",
extracted_text, test_number, test_image.filename
);
}
// Check for presence of "text" keyword
assert!(
normalized_extracted.contains("text") || normalized_extracted.contains("some"),
"OCR result '{}' should contain expected text content for image {}",
extracted_text, test_image.filename
);
}
Err(e) => {
println!("⚠️ OCR Failed for {}: {}", test_image.filename, e);
// Don't fail the test immediately - log the error but continue
// This allows us to see which images work and which don't
}
}
}
}
#[tokio::test]
async fn test_ocr_with_specific_test_images() {
skip_if_no_test_images!();
// Test specific images that should definitely work
let test_cases = vec![1, 2, 3]; // Test with first 3 images
for test_num in test_cases {
let test_image = match get_test_image(test_num) {
Some(img) => img,
None => continue,
};
if !test_image.exists() {
println!("Skipping test{}: file not found", test_num);
continue;
}
println!("Running OCR test for {}", test_image.filename);
// Load image data
let image_data = test_image.load_data().await
.expect("Should be able to load test image");
assert!(!image_data.is_empty(), "Test image should not be empty");
// Verify file format based on MIME type
match test_image.mime_type {
"image/png" => {
assert!(image_data.starts_with(&[0x89, 0x50, 0x4E, 0x47]),
"PNG file should start with PNG signature");
}
"image/jpeg" => {
assert!(image_data.starts_with(&[0xFF, 0xD8, 0xFF]),
"JPEG file should start with JPEG signature");
}
_ => {}
}
println!("Image {} loaded successfully: {} bytes, type: {}",
test_image.filename, image_data.len(), test_image.mime_type);
}
}
#[tokio::test]
async fn test_ocr_error_handling_with_corrupted_image() {
skip_if_no_test_images!();
// Create a corrupted image file
let corrupted_data = vec![0xFF; 100]; // Invalid image data
let temp_path = "./temp_corrupted_test.png";
tokio::fs::write(temp_path, &corrupted_data).await
.expect("Should be able to write corrupted test file");
let ocr_service = OcrService::new();
let result = ocr_service.extract_text(temp_path, "image/png").await;
// Clean up
let _ = tokio::fs::remove_file(temp_path).await;
// Should handle the error gracefully
match result {
Ok(text) => {
println!("Unexpected success with corrupted image: '{}'", text);
// Some OCR systems might return empty text instead of error
}
Err(e) => {
println!("Expected error with corrupted image: {}", e);
// This is the expected behavior
}
}
}
#[tokio::test]
async fn test_multiple_image_formats() {
skip_if_no_test_images!();
let images = get_available_test_images();
let mut png_count = 0;
let mut jpeg_count = 0;
for image in &images {
match image.mime_type {
"image/png" => png_count += 1,
"image/jpeg" => jpeg_count += 1,
_ => {}
}
}
println!("Available test images: {} PNG, {} JPEG", png_count, jpeg_count);
// Ensure we have at least one of each format for comprehensive testing
if png_count > 0 && jpeg_count > 0 {
println!("✅ Both PNG and JPEG formats available for testing");
} else {
println!("⚠️ Limited format coverage: PNG={}, JPEG={}", png_count, jpeg_count);
}
// Test at least one of each format if available
for image in images.iter().take(2) {
if image.exists() {
println!("Testing format: {} ({})", image.mime_type, image.filename);
let image_data = image.load_data().await
.expect("Should load test image");
assert!(!image_data.is_empty(), "Image data should not be empty");
assert!(image_data.len() > 100, "Image should be reasonably sized");
}
}
}
#[tokio::test]
#[ignore = "Long running test - run with: cargo test test_ocr_performance -- --ignored"]
async fn test_ocr_performance_with_test_images() {
skip_if_no_test_images!();
let available_images = get_available_test_images();
if available_images.is_empty() {
println!("No test images available for performance testing");
return;
}
let start_time = std::time::Instant::now();
let mut successful_ocr = 0;
let mut failed_ocr = 0;
for test_image in available_images {
let image_start = std::time::Instant::now();
// Load image
let image_data = match test_image.load_data().await {
Ok(data) => data,
Err(_) => {
failed_ocr += 1;
continue;
}
};
// Write to temp file
let temp_path = format!("./temp_perf_{}", test_image.filename);
if tokio::fs::write(&temp_path, &image_data).await.is_err() {
failed_ocr += 1;
continue;
}
// Run OCR
let ocr_service = OcrService::new();
let result = ocr_service.extract_text(&temp_path, test_image.mime_type).await;
// Clean up
let _ = tokio::fs::remove_file(&temp_path).await;
let duration = image_start.elapsed();
match result {
Ok(text) => {
successful_ocr += 1;
println!("{} processed in {:?}: '{}'",
test_image.filename, duration, text.chars().take(50).collect::<String>());
}
Err(e) => {
failed_ocr += 1;
println!("{} failed in {:?}: {}",
test_image.filename, duration, e);
}
}
}
let total_duration = start_time.elapsed();
let total_images = successful_ocr + failed_ocr;
println!("\n📊 OCR Performance Summary:");
println!("Total images: {}", total_images);
println!("Successful: {}", successful_ocr);
println!("Failed: {}", failed_ocr);
println!("Total time: {:?}", total_duration);
if total_images > 0 {
println!("Average time per image: {:?}", total_duration / total_images);
let success_rate = (successful_ocr as f64 / total_images as f64) * 100.0;
println!("Success rate: {:.1}%", success_rate);
}
// Performance assertions
if successful_ocr > 0 {
let avg_time_per_image = total_duration / successful_ocr;
assert!(avg_time_per_image.as_secs() < 30,
"OCR should complete within 30 seconds per image on average");
}
}