Readur/src/tests/ocr_tests.rs

100 lines
3.5 KiB
Rust

#[cfg(test)]
mod tests {
use super::super::ocr::OcrService;
use std::fs;
use tempfile::NamedTempFile;
#[test]
fn test_is_image_file() {
let ocr_service = OcrService::new();
assert!(ocr_service.is_image_file("image.png"));
assert!(ocr_service.is_image_file("photo.jpg"));
assert!(ocr_service.is_image_file("picture.JPEG"));
assert!(ocr_service.is_image_file("scan.tiff"));
assert!(ocr_service.is_image_file("bitmap.bmp"));
assert!(ocr_service.is_image_file("animation.gif"));
assert!(!ocr_service.is_image_file("document.pdf"));
assert!(!ocr_service.is_image_file("text.txt"));
assert!(!ocr_service.is_image_file("archive.zip"));
assert!(!ocr_service.is_image_file("noextension"));
}
#[tokio::test]
async fn test_extract_text_from_plain_text() {
let ocr_service = OcrService::new();
let mut temp_file = NamedTempFile::new().unwrap();
let test_content = "This is a test text file.\nWith multiple lines.";
fs::write(temp_file.path(), test_content).unwrap();
let result = ocr_service
.extract_text(temp_file.path().to_str().unwrap(), "text/plain")
.await;
assert!(result.is_ok());
let extracted_text = result.unwrap();
assert_eq!(extracted_text, test_content);
}
#[tokio::test]
async fn test_extract_text_unsupported_type() {
let ocr_service = OcrService::new();
let mut temp_file = NamedTempFile::new().unwrap();
fs::write(temp_file.path(), "some content").unwrap();
let result = ocr_service
.extract_text(temp_file.path().to_str().unwrap(), "application/zip")
.await;
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("Unsupported file type"));
}
#[tokio::test]
async fn test_extract_text_from_nonexistent_file() {
let ocr_service = OcrService::new();
let result = ocr_service
.extract_text("/path/to/nonexistent/file.txt", "text/plain")
.await;
assert!(result.is_err());
}
// Note: These tests would require actual PDF and image files to test fully
// For now, we're testing the error handling and basic functionality
#[tokio::test]
async fn test_extract_text_from_pdf_empty_file() {
let ocr_service = OcrService::new();
let mut temp_file = NamedTempFile::new().unwrap();
fs::write(temp_file.path(), "").unwrap(); // Empty file, not a valid PDF
let result = ocr_service
.extract_text_from_pdf(temp_file.path().to_str().unwrap())
.await;
// Should fail because it's not a valid PDF
assert!(result.is_err());
}
#[tokio::test]
async fn test_extract_text_with_image_extension_fallback() {
let ocr_service = OcrService::new();
let mut temp_file = NamedTempFile::with_suffix(".png").unwrap();
fs::write(temp_file.path(), "fake image data").unwrap();
let result = ocr_service
.extract_text(temp_file.path().to_str().unwrap(), "unknown/type")
.await;
// This should try to process as image due to extension, but fail due to invalid data
// The important thing is that it attempts image processing
assert!(result.is_err());
}
}