feat(tests): add actual images as part of e2e and testing
This commit is contained in:
parent
efbd15774a
commit
f905c220e0
|
|
@ -19,8 +19,8 @@ test.describe('Search Functionality', () => {
|
|||
test('should perform basic search', async ({ authenticatedPage: page }) => {
|
||||
const searchInput = page.locator('input[type="search"], input[placeholder*="search" i], [data-testid="search-input"]').first();
|
||||
|
||||
// Enter search query
|
||||
await searchInput.fill(SEARCH_QUERIES.simple);
|
||||
// Search for known OCR content from test images
|
||||
await searchInput.fill(SEARCH_QUERIES.simple); // "Test 1"
|
||||
|
||||
// Wait for search API call
|
||||
const searchResponse = helpers.waitForApiCall(API_ENDPOINTS.search);
|
||||
|
|
@ -40,8 +40,8 @@ test.describe('Search Functionality', () => {
|
|||
test('should show search suggestions', async ({ authenticatedPage: page }) => {
|
||||
const searchInput = page.locator('input[type="search"], input[placeholder*="search" i], [data-testid="search-input"]').first();
|
||||
|
||||
// Start typing to trigger suggestions
|
||||
await searchInput.type('test', { delay: 100 });
|
||||
// Start typing "Test" to trigger suggestions based on OCR content
|
||||
await searchInput.type('Test', { delay: 100 });
|
||||
|
||||
// Should show suggestion dropdown
|
||||
await expect(page.locator('[data-testid="search-suggestions"], .suggestions, .autocomplete')).toBeVisible({
|
||||
|
|
@ -52,8 +52,8 @@ test.describe('Search Functionality', () => {
|
|||
test('should filter search results', async ({ authenticatedPage: page }) => {
|
||||
const searchInput = page.locator('input[type="search"], input[placeholder*="search" i], [data-testid="search-input"]').first();
|
||||
|
||||
// Perform initial search
|
||||
await searchInput.fill(SEARCH_QUERIES.simple);
|
||||
// Search for content that should match multiple test images
|
||||
await searchInput.fill(SEARCH_QUERIES.content); // "some text from text"
|
||||
await searchInput.press('Enter');
|
||||
|
||||
await helpers.waitForLoadingToComplete();
|
||||
|
|
@ -63,10 +63,10 @@ test.describe('Search Functionality', () => {
|
|||
if (await filterButton.isVisible()) {
|
||||
await filterButton.click();
|
||||
|
||||
// Select document type filter
|
||||
const pdfFilter = page.locator('input[type="checkbox"][value="pdf"], label:has-text("PDF")');
|
||||
if (await pdfFilter.isVisible()) {
|
||||
await pdfFilter.check();
|
||||
// Select image type filter (since our test files are images)
|
||||
const imageFilter = page.locator('input[type="checkbox"][value="image"], input[type="checkbox"][value="png"], label:has-text("Image")');
|
||||
if (await imageFilter.isVisible()) {
|
||||
await imageFilter.check();
|
||||
|
||||
// Should update search results
|
||||
await helpers.waitForApiCall(API_ENDPOINTS.search);
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import { test, expect } from './fixtures/auth';
|
||||
import { TEST_FILES, TIMEOUTS, API_ENDPOINTS } from './utils/test-data';
|
||||
import { TEST_FILES, TIMEOUTS, API_ENDPOINTS, EXPECTED_OCR_CONTENT } from './utils/test-data';
|
||||
import { TestHelpers } from './utils/test-helpers';
|
||||
|
||||
test.describe('Document Upload', () => {
|
||||
|
|
@ -20,8 +20,8 @@ test.describe('Document Upload', () => {
|
|||
// Find file input - try multiple selectors
|
||||
const fileInput = page.locator('input[type="file"]').first();
|
||||
|
||||
// Upload a test file
|
||||
await fileInput.setInputFiles(TEST_FILES.image);
|
||||
// Upload test1.png with known OCR content
|
||||
await fileInput.setInputFiles(TEST_FILES.test1);
|
||||
|
||||
// Wait for upload API call
|
||||
const uploadResponse = helpers.waitForApiCall(API_ENDPOINTS.upload, TIMEOUTS.upload);
|
||||
|
|
@ -45,25 +45,25 @@ test.describe('Document Upload', () => {
|
|||
test('should upload multiple documents', async ({ authenticatedPage: page }) => {
|
||||
const fileInput = page.locator('input[type="file"]').first();
|
||||
|
||||
// Upload multiple files
|
||||
await fileInput.setInputFiles([TEST_FILES.image, TEST_FILES.multiline]);
|
||||
// Upload multiple test images with different formats
|
||||
await fileInput.setInputFiles([TEST_FILES.test1, TEST_FILES.test2, TEST_FILES.test3]);
|
||||
|
||||
const uploadButton = page.locator('button:has-text("Upload"), [data-testid="upload-button"]');
|
||||
if (await uploadButton.isVisible()) {
|
||||
await uploadButton.click();
|
||||
}
|
||||
|
||||
// Wait for both uploads to complete
|
||||
// Wait for all uploads to complete
|
||||
await helpers.waitForLoadingToComplete();
|
||||
|
||||
// Should show multiple uploaded documents
|
||||
const uploadedFiles = page.locator('[data-testid="uploaded-files"] > *, .uploaded-file');
|
||||
await expect(uploadedFiles).toHaveCount(2, { timeout: TIMEOUTS.medium });
|
||||
await expect(uploadedFiles).toHaveCount(3, { timeout: TIMEOUTS.medium });
|
||||
});
|
||||
|
||||
test('should show upload progress', async ({ authenticatedPage: page }) => {
|
||||
const fileInput = page.locator('input[type="file"]').first();
|
||||
await fileInput.setInputFiles(TEST_FILES.image);
|
||||
await fileInput.setInputFiles(TEST_FILES.test4);
|
||||
|
||||
const uploadButton = page.locator('button:has-text("Upload"), [data-testid="upload-button"]');
|
||||
if (await uploadButton.isVisible()) {
|
||||
|
|
@ -140,7 +140,7 @@ test.describe('Document Upload', () => {
|
|||
|
||||
test('should show OCR processing status', async ({ authenticatedPage: page }) => {
|
||||
const fileInput = page.locator('input[type="file"]').first();
|
||||
await fileInput.setInputFiles(TEST_FILES.image);
|
||||
await fileInput.setInputFiles(TEST_FILES.test5);
|
||||
|
||||
const uploadButton = page.locator('button:has-text("Upload"), [data-testid="upload-button"]');
|
||||
if (await uploadButton.isVisible()) {
|
||||
|
|
@ -155,6 +155,42 @@ test.describe('Document Upload', () => {
|
|||
});
|
||||
});
|
||||
|
||||
test('should process OCR and extract correct text content', async ({ authenticatedPage: page }) => {
|
||||
const fileInput = page.locator('input[type="file"]').first();
|
||||
|
||||
// Upload test6.jpeg with known content
|
||||
await fileInput.setInputFiles(TEST_FILES.test6);
|
||||
|
||||
const uploadButton = page.locator('button:has-text("Upload"), [data-testid="upload-button"]');
|
||||
if (await uploadButton.isVisible()) {
|
||||
await uploadButton.click();
|
||||
}
|
||||
|
||||
await helpers.waitForLoadingToComplete();
|
||||
|
||||
// Wait for OCR to complete
|
||||
await expect(page.locator(':has-text("OCR Complete"), :has-text("Processed"), [data-testid="ocr-complete"]')).toBeVisible({
|
||||
timeout: TIMEOUTS.ocr
|
||||
});
|
||||
|
||||
// Navigate to document details to verify OCR content
|
||||
const uploadedDocument = page.locator('[data-testid="uploaded-files"] > *, .uploaded-file').first();
|
||||
if (await uploadedDocument.isVisible()) {
|
||||
await uploadedDocument.click();
|
||||
|
||||
// Should navigate to document details page
|
||||
await page.waitForURL(/\/documents\/[^\/]+/, { timeout: TIMEOUTS.medium });
|
||||
|
||||
// Check that OCR content is visible and contains expected text
|
||||
const documentContent = page.locator('[data-testid="document-content"], .document-text, .ocr-content');
|
||||
if (await documentContent.isVisible()) {
|
||||
const content = await documentContent.textContent();
|
||||
expect(content).toContain('Test 6');
|
||||
expect(content).toContain('This is some text from text 6');
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
test('should allow drag and drop upload', async ({ authenticatedPage: page }) => {
|
||||
// Look for dropzone
|
||||
const dropzone = page.locator('[data-testid="dropzone"], .dropzone, .upload-area');
|
||||
|
|
|
|||
|
|
@ -10,18 +10,30 @@ export const TEST_USERS = {
|
|||
};
|
||||
|
||||
export const TEST_FILES = {
|
||||
pdf: 'test_data/sample.pdf',
|
||||
image: 'test_data/hello_ocr.png',
|
||||
text: 'test_data/sample.txt',
|
||||
multiline: 'test_data/multiline.png',
|
||||
numbers: 'test_data/numbers.png'
|
||||
// Real test images with known OCR content
|
||||
test1: '../tests/test_images/test1.png', // "Test 1\nThis is some text from text 1"
|
||||
test2: '../tests/test_images/test2.jpg', // "Test 2\nThis is some text from text 2"
|
||||
test3: '../tests/test_images/test3.jpeg', // "Test 3\nThis is some text from text 3"
|
||||
test4: '../tests/test_images/test4.png', // "Test 4\nThis is some text from text 4"
|
||||
test5: '../tests/test_images/test5.jpg', // "Test 5\nThis is some text from text 5"
|
||||
test6: '../tests/test_images/test6.jpeg', // "Test 6\nThis is some text from text 6"
|
||||
test7: '../tests/test_images/test7.png', // "Test 7\nThis is some text from text 7"
|
||||
test8: '../tests/test_images/test8.jpeg', // "Test 8\nThis is some text from text 8"
|
||||
test9: '../tests/test_images/test9.png', // "Test 9\nThis is some text from text 9"
|
||||
|
||||
// Backwards compatibility
|
||||
image: '../tests/test_images/test1.png',
|
||||
multiline: '../tests/test_images/test2.jpg',
|
||||
text: 'test_data/sample.txt'
|
||||
};
|
||||
|
||||
export const SEARCH_QUERIES = {
|
||||
simple: 'test document',
|
||||
simple: 'Test 1', // Will match test1.png OCR content
|
||||
content: 'some text from text', // Will match multiple test images
|
||||
specific: 'Test 3', // Will match test3.jpeg specifically
|
||||
advanced: {
|
||||
title: 'important',
|
||||
content: 'contract',
|
||||
title: 'Test',
|
||||
content: 'some text',
|
||||
dateFrom: '2024-01-01',
|
||||
dateTo: '2024-12-31'
|
||||
},
|
||||
|
|
@ -29,6 +41,19 @@ export const SEARCH_QUERIES = {
|
|||
noResults: 'xyzabc123nonexistent'
|
||||
};
|
||||
|
||||
// Expected OCR content for test images
|
||||
export const EXPECTED_OCR_CONTENT = {
|
||||
test1: 'Test 1\nThis is some text from text 1',
|
||||
test2: 'Test 2\nThis is some text from text 2',
|
||||
test3: 'Test 3\nThis is some text from text 3',
|
||||
test4: 'Test 4\nThis is some text from text 4',
|
||||
test5: 'Test 5\nThis is some text from text 5',
|
||||
test6: 'Test 6\nThis is some text from text 6',
|
||||
test7: 'Test 7\nThis is some text from text 7',
|
||||
test8: 'Test 8\nThis is some text from text 8',
|
||||
test9: 'Test 9\nThis is some text from text 9'
|
||||
};
|
||||
|
||||
export const API_ENDPOINTS = {
|
||||
login: '/api/auth/login',
|
||||
upload: '/api/documents/upload',
|
||||
|
|
|
|||
|
|
@ -30,6 +30,9 @@ pub mod webdav_xml_parser;
|
|||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod test_utils;
|
||||
|
||||
use axum::{http::StatusCode, Json};
|
||||
use config::Config;
|
||||
use db::Database;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,137 @@
|
|||
//! Test utilities for loading and working with test images and data
|
||||
//!
|
||||
//! This module provides utilities for loading test images from the tests/test_images/
|
||||
//! directory and working with them in unit and integration tests.
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
/// Test image information with expected OCR content
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TestImage {
|
||||
pub filename: &'static str,
|
||||
pub path: String,
|
||||
pub mime_type: &'static str,
|
||||
pub expected_content: &'static str,
|
||||
}
|
||||
|
||||
impl TestImage {
|
||||
pub fn new(filename: &'static str, mime_type: &'static str, expected_content: &'static str) -> Self {
|
||||
Self {
|
||||
filename,
|
||||
path: format!("tests/test_images/{}", filename),
|
||||
mime_type,
|
||||
expected_content,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn exists(&self) -> bool {
|
||||
Path::new(&self.path).exists()
|
||||
}
|
||||
|
||||
pub async fn load_data(&self) -> Result<Vec<u8>, std::io::Error> {
|
||||
tokio::fs::read(&self.path).await
|
||||
}
|
||||
}
|
||||
|
||||
/// Get all available test images with their expected OCR content
|
||||
pub fn get_test_images() -> Vec<TestImage> {
|
||||
vec![
|
||||
TestImage::new("test1.png", "image/png", "Test 1\nThis is some text from text 1"),
|
||||
TestImage::new("test2.jpg", "image/jpeg", "Test 2\nThis is some text from text 2"),
|
||||
TestImage::new("test3.jpeg", "image/jpeg", "Test 3\nThis is some text from text 3"),
|
||||
TestImage::new("test4.png", "image/png", "Test 4\nThis is some text from text 4"),
|
||||
TestImage::new("test5.jpg", "image/jpeg", "Test 5\nThis is some text from text 5"),
|
||||
TestImage::new("test6.jpeg", "image/jpeg", "Test 6\nThis is some text from text 6"),
|
||||
TestImage::new("test7.png", "image/png", "Test 7\nThis is some text from text 7"),
|
||||
TestImage::new("test8.jpeg", "image/jpeg", "Test 8\nThis is some text from text 8"),
|
||||
TestImage::new("test9.png", "image/png", "Test 9\nThis is some text from text 9"),
|
||||
]
|
||||
}
|
||||
|
||||
/// Get a specific test image by number (1-9)
|
||||
pub fn get_test_image(number: u8) -> Option<TestImage> {
|
||||
if number < 1 || number > 9 {
|
||||
return None;
|
||||
}
|
||||
|
||||
get_test_images().into_iter().nth((number - 1) as usize)
|
||||
}
|
||||
|
||||
/// Load test image data by filename
|
||||
pub async fn load_test_image(filename: &str) -> Result<Vec<u8>, std::io::Error> {
|
||||
let path = format!("tests/test_images/{}", filename);
|
||||
tokio::fs::read(path).await
|
||||
}
|
||||
|
||||
/// Check if test images directory exists and is accessible
|
||||
pub fn test_images_available() -> bool {
|
||||
Path::new("tests/test_images").exists()
|
||||
}
|
||||
|
||||
/// Get available test images (only those that exist on filesystem)
|
||||
pub fn get_available_test_images() -> Vec<TestImage> {
|
||||
get_test_images()
|
||||
.into_iter()
|
||||
.filter(|img| img.exists())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Skip test macro for conditional testing based on test image availability
|
||||
macro_rules! skip_if_no_test_images {
|
||||
() => {
|
||||
if !crate::test_utils::test_images_available() {
|
||||
println!("Skipping test: test images directory not available");
|
||||
return;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// Skip test macro for specific test image
|
||||
macro_rules! skip_if_test_image_missing {
|
||||
($image:expr) => {
|
||||
if !$image.exists() {
|
||||
println!("Skipping test: {} not found", $image.filename);
|
||||
return;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub use skip_if_no_test_images;
|
||||
pub use skip_if_test_image_missing;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_image_paths_are_valid() {
|
||||
let images = get_test_images();
|
||||
assert_eq!(images.len(), 9);
|
||||
|
||||
for (i, image) in images.iter().enumerate() {
|
||||
assert_eq!(image.filename, format!("test{}.{}", i + 1,
|
||||
if image.mime_type == "image/png" { "png" }
|
||||
else if image.filename.ends_with(".jpg") { "jpg" }
|
||||
else { "jpeg" }
|
||||
));
|
||||
assert!(image.expected_content.starts_with(&format!("Test {}", i + 1)));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_specific_image() {
|
||||
let image1 = get_test_image(1).unwrap();
|
||||
assert_eq!(image1.filename, "test1.png");
|
||||
assert_eq!(image1.mime_type, "image/png");
|
||||
assert!(image1.expected_content.contains("Test 1"));
|
||||
|
||||
let image5 = get_test_image(5).unwrap();
|
||||
assert_eq!(image5.filename, "test5.jpg");
|
||||
assert_eq!(image5.mime_type, "image/jpeg");
|
||||
assert!(image5.expected_content.contains("Test 5"));
|
||||
|
||||
// Invalid numbers should return None
|
||||
assert!(get_test_image(0).is_none());
|
||||
assert!(get_test_image(10).is_none());
|
||||
}
|
||||
}
|
||||
|
|
@ -985,4 +985,235 @@ async fn test_concurrent_file_processing() {
|
|||
assert!(success_rate >= 0.8, "At least 80% of files should complete processing (not timeout)");
|
||||
|
||||
println!("🎉 Concurrent file processing test passed!");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_real_test_images_processing() {
|
||||
println!("🖼️ Testing real test images processing...");
|
||||
|
||||
// Check if test images are available
|
||||
if !readur::test_utils::test_images_available() {
|
||||
println!("⚠️ Test images not available - skipping real image processing test");
|
||||
return;
|
||||
}
|
||||
|
||||
let mut client = FileProcessingTestClient::new();
|
||||
client.setup_user().await
|
||||
.expect("Failed to setup test user");
|
||||
|
||||
println!("✅ User setup complete");
|
||||
|
||||
let available_images = readur::test_utils::get_available_test_images();
|
||||
|
||||
if available_images.is_empty() {
|
||||
println!("⚠️ No test images found - skipping test");
|
||||
return;
|
||||
}
|
||||
|
||||
println!("📋 Found {} test images to process", available_images.len());
|
||||
|
||||
let mut processed_results = Vec::new();
|
||||
|
||||
// Process each available test image
|
||||
for test_image in available_images.iter().take(3) { // Limit to first 3 for faster testing
|
||||
println!("📤 Processing test image: {}", test_image.filename);
|
||||
|
||||
// Load the image data
|
||||
let image_data = match test_image.load_data().await {
|
||||
Ok(data) => data,
|
||||
Err(e) => {
|
||||
println!("⚠️ Failed to load {}: {}", test_image.filename, e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
println!("✅ Loaded {} ({} bytes, {})",
|
||||
test_image.filename, image_data.len(), test_image.mime_type);
|
||||
|
||||
// Upload the image
|
||||
let upload_start = std::time::Instant::now();
|
||||
let document = match client.upload_binary_file(
|
||||
image_data,
|
||||
test_image.filename,
|
||||
test_image.mime_type
|
||||
).await {
|
||||
Ok(doc) => doc,
|
||||
Err(e) => {
|
||||
println!("⚠️ Failed to upload {}: {}", test_image.filename, e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let upload_time = upload_start.elapsed();
|
||||
println!("✅ {} uploaded in {:?}: {}", test_image.filename, upload_time, document.id);
|
||||
|
||||
// Wait for OCR processing
|
||||
let processing_start = std::time::Instant::now();
|
||||
match client.wait_for_processing(&document.id.to_string()).await {
|
||||
Ok(processed_doc) => {
|
||||
let processing_time = processing_start.elapsed();
|
||||
println!("✅ {} processed in {:?}: status = {:?}",
|
||||
test_image.filename, processing_time, processed_doc.ocr_status);
|
||||
|
||||
// Get OCR results and verify content
|
||||
if let Ok(ocr_results) = client.get_ocr_results(&document.id.to_string()).await {
|
||||
if let Some(ocr_text) = ocr_results["ocr_text"].as_str() {
|
||||
let normalized_ocr = ocr_text.trim().to_lowercase();
|
||||
let normalized_expected = test_image.expected_content.trim().to_lowercase();
|
||||
|
||||
println!("🔍 OCR extracted: '{}'", ocr_text);
|
||||
println!("🎯 Expected: '{}'", test_image.expected_content);
|
||||
|
||||
// Check if OCR content matches expectations
|
||||
let test_number = test_image.filename.chars()
|
||||
.filter(|c| c.is_numeric())
|
||||
.collect::<String>();
|
||||
|
||||
let content_matches = if !test_number.is_empty() {
|
||||
normalized_ocr.contains(&format!("test {}", test_number)) ||
|
||||
normalized_ocr.contains(&test_number)
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
let has_text_content = normalized_ocr.contains("text") ||
|
||||
normalized_ocr.contains("some");
|
||||
|
||||
processed_results.push((
|
||||
test_image.filename.to_string(),
|
||||
upload_time,
|
||||
processing_time,
|
||||
processed_doc.ocr_status.clone(),
|
||||
ocr_text.to_string(),
|
||||
content_matches,
|
||||
has_text_content,
|
||||
));
|
||||
|
||||
if content_matches && has_text_content {
|
||||
println!("✅ OCR content verification PASSED for {}", test_image.filename);
|
||||
} else {
|
||||
println!("⚠️ OCR content verification PARTIAL for {} (number: {}, text: {})",
|
||||
test_image.filename, content_matches, has_text_content);
|
||||
}
|
||||
} else {
|
||||
println!("⚠️ No OCR text found for {}", test_image.filename);
|
||||
processed_results.push((
|
||||
test_image.filename.to_string(),
|
||||
upload_time,
|
||||
processing_time,
|
||||
processed_doc.ocr_status.clone(),
|
||||
"".to_string(),
|
||||
false,
|
||||
false,
|
||||
));
|
||||
}
|
||||
} else {
|
||||
println!("⚠️ Failed to get OCR results for {}", test_image.filename);
|
||||
processed_results.push((
|
||||
test_image.filename.to_string(),
|
||||
upload_time,
|
||||
processing_time,
|
||||
processed_doc.ocr_status.clone(),
|
||||
"".to_string(),
|
||||
false,
|
||||
false,
|
||||
));
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
println!("⚠️ Processing failed for {}: {}", test_image.filename, e);
|
||||
processed_results.push((
|
||||
test_image.filename.to_string(),
|
||||
upload_time,
|
||||
Duration::ZERO,
|
||||
Some("failed".to_string()),
|
||||
"".to_string(),
|
||||
false,
|
||||
false,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// Add small delay between uploads to avoid overwhelming the system
|
||||
tokio::time::sleep(Duration::from_millis(500)).await;
|
||||
}
|
||||
|
||||
// Analyze results
|
||||
println!("📊 Real Test Images Processing Results:");
|
||||
println!(" {:<12} {:<10} {:<12} {:<10} {:<8} {:<8} {}",
|
||||
"Image", "Upload", "Processing", "Status", "Number", "Text", "OCR Content");
|
||||
println!(" {}", "-".repeat(80));
|
||||
|
||||
let mut successful_ocr = 0;
|
||||
let mut failed_ocr = 0;
|
||||
let mut partial_matches = 0;
|
||||
|
||||
for (filename, upload_time, processing_time, status, ocr_text, number_match, text_match) in &processed_results {
|
||||
let status_str = status.as_deref().unwrap_or("unknown");
|
||||
let ocr_preview = if ocr_text.len() > 30 {
|
||||
format!("{}...", &ocr_text[..30])
|
||||
} else {
|
||||
ocr_text.clone()
|
||||
};
|
||||
|
||||
println!(" {:<12} {:<10?} {:<12?} {:<10} {:<8} {:<8} {}",
|
||||
filename, upload_time, processing_time, status_str,
|
||||
if *number_match { "✅" } else { "❌" },
|
||||
if *text_match { "✅" } else { "❌" },
|
||||
ocr_preview);
|
||||
|
||||
if status_str == "completed" {
|
||||
if *number_match && *text_match {
|
||||
successful_ocr += 1;
|
||||
} else if *number_match || *text_match {
|
||||
partial_matches += 1;
|
||||
} else {
|
||||
failed_ocr += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let total_processed = processed_results.len();
|
||||
|
||||
println!("\n📈 Summary:");
|
||||
println!(" Total processed: {}", total_processed);
|
||||
println!(" Successful OCR: {}", successful_ocr);
|
||||
println!(" Partial matches: {}", partial_matches);
|
||||
println!(" Failed OCR: {}", failed_ocr);
|
||||
|
||||
if total_processed > 0 {
|
||||
let success_rate = (successful_ocr + partial_matches) as f64 / total_processed as f64 * 100.0;
|
||||
println!(" Success rate: {:.1}%", success_rate);
|
||||
|
||||
// Calculate average processing time for successful cases
|
||||
let successful_processing_times: Vec<_> = processed_results.iter()
|
||||
.filter(|(_, _, _, status, _, number, text)| {
|
||||
status.as_deref() == Some("completed") && (*number || *text)
|
||||
})
|
||||
.map(|(_, _, processing_time, _, _, _, _)| *processing_time)
|
||||
.collect();
|
||||
|
||||
if !successful_processing_times.is_empty() {
|
||||
let avg_processing_time = successful_processing_times.iter().sum::<Duration>()
|
||||
/ successful_processing_times.len() as u32;
|
||||
println!(" Average processing time: {:?}", avg_processing_time);
|
||||
}
|
||||
}
|
||||
|
||||
// Test assertions
|
||||
assert!(!processed_results.is_empty(), "At least some test images should be processed");
|
||||
|
||||
// At least 50% should have some level of OCR success (either partial or full)
|
||||
let success_count = successful_ocr + partial_matches;
|
||||
assert!(success_count > 0, "At least some test images should have successful OCR");
|
||||
|
||||
if total_processed >= 2 {
|
||||
let min_success_rate = 0.5; // 50% minimum success rate
|
||||
let actual_success_rate = success_count as f64 / total_processed as f64;
|
||||
assert!(actual_success_rate >= min_success_rate,
|
||||
"OCR success rate should be at least {}% but was {:.1}%",
|
||||
min_success_rate * 100.0, actual_success_rate * 100.0);
|
||||
}
|
||||
|
||||
println!("🎉 Real test images processing test completed!");
|
||||
}
|
||||
|
|
@ -0,0 +1,279 @@
|
|||
//! Integration tests for OCR processing using real test images
|
||||
//!
|
||||
//! This test suite uses the actual test images from tests/test_images/
|
||||
//! to verify OCR functionality with known content.
|
||||
|
||||
use readur::test_utils::{get_test_images, get_available_test_images, get_test_image, skip_if_no_test_images};
|
||||
use readur::ocr::OcrService;
|
||||
use std::path::Path;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_ocr_with_all_available_test_images() {
|
||||
skip_if_no_test_images!();
|
||||
|
||||
let available_images = get_available_test_images();
|
||||
|
||||
if available_images.is_empty() {
|
||||
println!("No test images found - skipping OCR tests");
|
||||
return;
|
||||
}
|
||||
|
||||
println!("Testing OCR with {} available test images", available_images.len());
|
||||
|
||||
for test_image in available_images {
|
||||
println!("Testing OCR with {}", test_image.filename);
|
||||
|
||||
// Load the image data
|
||||
let image_data = match test_image.load_data().await {
|
||||
Ok(data) => data,
|
||||
Err(e) => {
|
||||
println!("Failed to load {}: {}", test_image.filename, e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// Create a temporary file for OCR processing
|
||||
let temp_path = format!("./temp_test_{}", test_image.filename);
|
||||
if let Err(e) = tokio::fs::write(&temp_path, &image_data).await {
|
||||
println!("Failed to write temp file for {}: {}", test_image.filename, e);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Test OCR processing
|
||||
let ocr_service = OcrService::new();
|
||||
let result = ocr_service.extract_text(&temp_path, test_image.mime_type).await;
|
||||
|
||||
// Clean up temp file
|
||||
let _ = tokio::fs::remove_file(&temp_path).await;
|
||||
|
||||
match result {
|
||||
Ok(extracted_text) => {
|
||||
println!("✅ OCR Success for {}: '{}'", test_image.filename, extracted_text);
|
||||
|
||||
// Verify the extracted text contains expected content
|
||||
let normalized_extracted = extracted_text.trim().to_lowercase();
|
||||
let normalized_expected = test_image.expected_content.trim().to_lowercase();
|
||||
|
||||
// Check for key parts of expected content
|
||||
let test_number = test_image.filename.chars()
|
||||
.filter(|c| c.is_numeric())
|
||||
.collect::<String>();
|
||||
|
||||
if !test_number.is_empty() {
|
||||
assert!(
|
||||
normalized_extracted.contains(&format!("test {}", test_number)) ||
|
||||
normalized_extracted.contains(&test_number),
|
||||
"OCR result '{}' should contain test number '{}' for image {}",
|
||||
extracted_text, test_number, test_image.filename
|
||||
);
|
||||
}
|
||||
|
||||
// Check for presence of "text" keyword
|
||||
assert!(
|
||||
normalized_extracted.contains("text") || normalized_extracted.contains("some"),
|
||||
"OCR result '{}' should contain expected text content for image {}",
|
||||
extracted_text, test_image.filename
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
println!("⚠️ OCR Failed for {}: {}", test_image.filename, e);
|
||||
// Don't fail the test immediately - log the error but continue
|
||||
// This allows us to see which images work and which don't
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_ocr_with_specific_test_images() {
|
||||
skip_if_no_test_images!();
|
||||
|
||||
// Test specific images that should definitely work
|
||||
let test_cases = vec![1, 2, 3]; // Test with first 3 images
|
||||
|
||||
for test_num in test_cases {
|
||||
let test_image = match get_test_image(test_num) {
|
||||
Some(img) => img,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
if !test_image.exists() {
|
||||
println!("Skipping test{}: file not found", test_num);
|
||||
continue;
|
||||
}
|
||||
|
||||
println!("Running OCR test for {}", test_image.filename);
|
||||
|
||||
// Load image data
|
||||
let image_data = test_image.load_data().await
|
||||
.expect("Should be able to load test image");
|
||||
|
||||
assert!(!image_data.is_empty(), "Test image should not be empty");
|
||||
|
||||
// Verify file format based on MIME type
|
||||
match test_image.mime_type {
|
||||
"image/png" => {
|
||||
assert!(image_data.starts_with(&[0x89, 0x50, 0x4E, 0x47]),
|
||||
"PNG file should start with PNG signature");
|
||||
}
|
||||
"image/jpeg" => {
|
||||
assert!(image_data.starts_with(&[0xFF, 0xD8, 0xFF]),
|
||||
"JPEG file should start with JPEG signature");
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
println!("Image {} loaded successfully: {} bytes, type: {}",
|
||||
test_image.filename, image_data.len(), test_image.mime_type);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_ocr_error_handling_with_corrupted_image() {
|
||||
skip_if_no_test_images!();
|
||||
|
||||
// Create a corrupted image file
|
||||
let corrupted_data = vec![0xFF; 100]; // Invalid image data
|
||||
let temp_path = "./temp_corrupted_test.png";
|
||||
|
||||
tokio::fs::write(temp_path, &corrupted_data).await
|
||||
.expect("Should be able to write corrupted test file");
|
||||
|
||||
let ocr_service = OcrService::new();
|
||||
let result = ocr_service.extract_text(temp_path, "image/png").await;
|
||||
|
||||
// Clean up
|
||||
let _ = tokio::fs::remove_file(temp_path).await;
|
||||
|
||||
// Should handle the error gracefully
|
||||
match result {
|
||||
Ok(text) => {
|
||||
println!("Unexpected success with corrupted image: '{}'", text);
|
||||
// Some OCR systems might return empty text instead of error
|
||||
}
|
||||
Err(e) => {
|
||||
println!("Expected error with corrupted image: {}", e);
|
||||
// This is the expected behavior
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_multiple_image_formats() {
|
||||
skip_if_no_test_images!();
|
||||
|
||||
let images = get_available_test_images();
|
||||
let mut png_count = 0;
|
||||
let mut jpeg_count = 0;
|
||||
|
||||
for image in &images {
|
||||
match image.mime_type {
|
||||
"image/png" => png_count += 1,
|
||||
"image/jpeg" => jpeg_count += 1,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
println!("Available test images: {} PNG, {} JPEG", png_count, jpeg_count);
|
||||
|
||||
// Ensure we have at least one of each format for comprehensive testing
|
||||
if png_count > 0 && jpeg_count > 0 {
|
||||
println!("✅ Both PNG and JPEG formats available for testing");
|
||||
} else {
|
||||
println!("⚠️ Limited format coverage: PNG={}, JPEG={}", png_count, jpeg_count);
|
||||
}
|
||||
|
||||
// Test at least one of each format if available
|
||||
for image in images.iter().take(2) {
|
||||
if image.exists() {
|
||||
println!("Testing format: {} ({})", image.mime_type, image.filename);
|
||||
|
||||
let image_data = image.load_data().await
|
||||
.expect("Should load test image");
|
||||
|
||||
assert!(!image_data.is_empty(), "Image data should not be empty");
|
||||
assert!(image_data.len() > 100, "Image should be reasonably sized");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[ignore = "Long running test - run with: cargo test test_ocr_performance -- --ignored"]
|
||||
async fn test_ocr_performance_with_test_images() {
|
||||
skip_if_no_test_images!();
|
||||
|
||||
let available_images = get_available_test_images();
|
||||
|
||||
if available_images.is_empty() {
|
||||
println!("No test images available for performance testing");
|
||||
return;
|
||||
}
|
||||
|
||||
let start_time = std::time::Instant::now();
|
||||
let mut successful_ocr = 0;
|
||||
let mut failed_ocr = 0;
|
||||
|
||||
for test_image in available_images {
|
||||
let image_start = std::time::Instant::now();
|
||||
|
||||
// Load image
|
||||
let image_data = match test_image.load_data().await {
|
||||
Ok(data) => data,
|
||||
Err(_) => {
|
||||
failed_ocr += 1;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// Write to temp file
|
||||
let temp_path = format!("./temp_perf_{}", test_image.filename);
|
||||
if tokio::fs::write(&temp_path, &image_data).await.is_err() {
|
||||
failed_ocr += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Run OCR
|
||||
let ocr_service = OcrService::new();
|
||||
let result = ocr_service.extract_text(&temp_path, test_image.mime_type).await;
|
||||
|
||||
// Clean up
|
||||
let _ = tokio::fs::remove_file(&temp_path).await;
|
||||
|
||||
let duration = image_start.elapsed();
|
||||
|
||||
match result {
|
||||
Ok(text) => {
|
||||
successful_ocr += 1;
|
||||
println!("✅ {} processed in {:?}: '{}'",
|
||||
test_image.filename, duration, text.chars().take(50).collect::<String>());
|
||||
}
|
||||
Err(e) => {
|
||||
failed_ocr += 1;
|
||||
println!("❌ {} failed in {:?}: {}",
|
||||
test_image.filename, duration, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let total_duration = start_time.elapsed();
|
||||
let total_images = successful_ocr + failed_ocr;
|
||||
|
||||
println!("\n📊 OCR Performance Summary:");
|
||||
println!("Total images: {}", total_images);
|
||||
println!("Successful: {}", successful_ocr);
|
||||
println!("Failed: {}", failed_ocr);
|
||||
println!("Total time: {:?}", total_duration);
|
||||
|
||||
if total_images > 0 {
|
||||
println!("Average time per image: {:?}", total_duration / total_images);
|
||||
let success_rate = (successful_ocr as f64 / total_images as f64) * 100.0;
|
||||
println!("Success rate: {:.1}%", success_rate);
|
||||
}
|
||||
|
||||
// Performance assertions
|
||||
if successful_ocr > 0 {
|
||||
let avg_time_per_image = total_duration / successful_ocr;
|
||||
assert!(avg_time_per_image.as_secs() < 30,
|
||||
"OCR should complete within 30 seconds per image on average");
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue