320 lines
13 KiB
Rust
320 lines
13 KiB
Rust
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use std::fs;
|
|
use serde_json::Value;
|
|
|
|
#[tokio::test]
|
|
async fn test_image_metadata_extraction_portrait() {
|
|
let image_data = fs::read("test_files/portrait_100x200.png").expect("Failed to read portrait test image");
|
|
|
|
let metadata = extract_content_metadata(&image_data, "image/png", "portrait_100x200.png")
|
|
.await
|
|
.expect("Failed to extract metadata");
|
|
|
|
assert!(metadata.is_some());
|
|
let metadata = metadata.unwrap();
|
|
|
|
// Check basic image properties
|
|
assert_eq!(metadata["image_width"], Value::Number(100.into()));
|
|
assert_eq!(metadata["image_height"], Value::Number(200.into()));
|
|
assert_eq!(metadata["orientation"], Value::String("portrait".to_string()));
|
|
assert_eq!(metadata["file_extension"], Value::String("png".to_string()));
|
|
|
|
// Check calculated values
|
|
assert_eq!(metadata["aspect_ratio"], Value::String("0.50".to_string()));
|
|
assert_eq!(metadata["megapixels"], Value::String("0.0 MP".to_string()));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_image_metadata_extraction_landscape() {
|
|
let image_data = fs::read("test_files/landscape_300x200.png").expect("Failed to read landscape test image");
|
|
|
|
let metadata = extract_content_metadata(&image_data, "image/png", "landscape_300x200.png")
|
|
.await
|
|
.expect("Failed to extract metadata");
|
|
|
|
assert!(metadata.is_some());
|
|
let metadata = metadata.unwrap();
|
|
|
|
assert_eq!(metadata["image_width"], Value::Number(300.into()));
|
|
assert_eq!(metadata["image_height"], Value::Number(200.into()));
|
|
assert_eq!(metadata["orientation"], Value::String("landscape".to_string()));
|
|
assert_eq!(metadata["aspect_ratio"], Value::String("1.50".to_string()));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_image_metadata_extraction_square() {
|
|
let image_data = fs::read("test_files/square_150x150.png").expect("Failed to read square test image");
|
|
|
|
let metadata = extract_content_metadata(&image_data, "image/png", "square_150x150.png")
|
|
.await
|
|
.expect("Failed to extract metadata");
|
|
|
|
assert!(metadata.is_some());
|
|
let metadata = metadata.unwrap();
|
|
|
|
assert_eq!(metadata["image_width"], Value::Number(150.into()));
|
|
assert_eq!(metadata["image_height"], Value::Number(150.into()));
|
|
assert_eq!(metadata["orientation"], Value::String("square".to_string()));
|
|
assert_eq!(metadata["aspect_ratio"], Value::String("1.00".to_string()));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_image_metadata_extraction_high_resolution() {
|
|
let image_data = fs::read("test_files/hires_1920x1080.png").expect("Failed to read high-res test image");
|
|
|
|
let metadata = extract_content_metadata(&image_data, "image/png", "hires_1920x1080.png")
|
|
.await
|
|
.expect("Failed to extract metadata");
|
|
|
|
assert!(metadata.is_some());
|
|
let metadata = metadata.unwrap();
|
|
|
|
assert_eq!(metadata["image_width"], Value::Number(1920.into()));
|
|
assert_eq!(metadata["image_height"], Value::Number(1080.into()));
|
|
assert_eq!(metadata["orientation"], Value::String("landscape".to_string()));
|
|
assert_eq!(metadata["megapixels"], Value::String("2.1 MP".to_string()));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_jpeg_metadata_extraction() {
|
|
let image_data = fs::read("test_files/test_image.jpg").expect("Failed to read JPEG test image");
|
|
|
|
let metadata = extract_content_metadata(&image_data, "image/jpeg", "test_image.jpg")
|
|
.await
|
|
.expect("Failed to extract metadata");
|
|
|
|
assert!(metadata.is_some());
|
|
let metadata = metadata.unwrap();
|
|
|
|
assert_eq!(metadata["file_extension"], Value::String("jpg".to_string()));
|
|
assert!(metadata.contains_key("image_width"));
|
|
assert!(metadata.contains_key("image_height"));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_pdf_metadata_extraction_single_page() {
|
|
let pdf_data = fs::read("test_files/single_page_v14.pdf").expect("Failed to read single page PDF");
|
|
|
|
let metadata = extract_content_metadata(&pdf_data, "application/pdf", "single_page_v14.pdf")
|
|
.await
|
|
.expect("Failed to extract metadata");
|
|
|
|
assert!(metadata.is_some());
|
|
let metadata = metadata.unwrap();
|
|
|
|
assert_eq!(metadata["file_extension"], Value::String("pdf".to_string()));
|
|
// Note: PDF version detection might vary depending on how reportlab creates the file
|
|
assert!(metadata.contains_key("pdf_version") || metadata.contains_key("file_type"));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_pdf_metadata_extraction_multipage() {
|
|
let pdf_data = fs::read("test_files/multipage_test.pdf").expect("Failed to read multipage PDF");
|
|
|
|
let metadata = extract_content_metadata(&pdf_data, "application/pdf", "multipage_test.pdf")
|
|
.await
|
|
.expect("Failed to extract metadata");
|
|
|
|
assert!(metadata.is_some());
|
|
let metadata = metadata.unwrap();
|
|
|
|
assert_eq!(metadata["file_extension"], Value::String("pdf".to_string()));
|
|
// Should detect multiple pages if our page counting works
|
|
if let Some(page_count) = metadata.get("page_count") {
|
|
if let Value::Number(count) = page_count {
|
|
assert!(count.as_u64().unwrap() > 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_pdf_metadata_with_fonts_and_images() {
|
|
let pdf_data = fs::read("test_files/complex_content.pdf").expect("Failed to read complex PDF");
|
|
|
|
let metadata = extract_content_metadata(&pdf_data, "application/pdf", "complex_content.pdf")
|
|
.await
|
|
.expect("Failed to extract metadata");
|
|
|
|
assert!(metadata.is_some());
|
|
let metadata = metadata.unwrap();
|
|
|
|
// Should detect fonts and potentially images/objects
|
|
if let Some(Value::Bool(has_fonts)) = metadata.get("contains_fonts") {
|
|
// Font detection might work depending on PDF structure
|
|
}
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_text_metadata_extraction_comprehensive() {
|
|
let text_data = fs::read("test_files/comprehensive_text.txt").expect("Failed to read comprehensive text");
|
|
|
|
let metadata = extract_content_metadata(&text_data, "text/plain", "comprehensive_text.txt")
|
|
.await
|
|
.expect("Failed to extract metadata");
|
|
|
|
assert!(metadata.is_some());
|
|
let metadata = metadata.unwrap();
|
|
|
|
assert_eq!(metadata["file_extension"], Value::String("txt".to_string()));
|
|
|
|
// Check text statistics
|
|
if let Value::Number(char_count) = &metadata["character_count"] {
|
|
assert!(char_count.as_u64().unwrap() > 500); // Should be substantial
|
|
}
|
|
|
|
if let Value::Number(word_count) = &metadata["word_count"] {
|
|
assert!(word_count.as_u64().unwrap() > 80); // Should have many words
|
|
}
|
|
|
|
if let Value::Number(line_count) = &metadata["line_count"] {
|
|
assert!(line_count.as_u64().unwrap() > 15); // Should have multiple lines
|
|
}
|
|
|
|
// Should detect Unicode content
|
|
assert_eq!(metadata["contains_unicode"], Value::Bool(true));
|
|
|
|
// Should detect likely English
|
|
if let Some(Value::String(lang)) = metadata.get("likely_language") {
|
|
assert_eq!(lang, "english");
|
|
}
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_text_metadata_extraction_ascii_only() {
|
|
let text_data = fs::read("test_files/ascii_only.txt").expect("Failed to read ASCII text");
|
|
|
|
let metadata = extract_content_metadata(&text_data, "text/plain", "ascii_only.txt")
|
|
.await
|
|
.expect("Failed to extract metadata");
|
|
|
|
assert!(metadata.is_some());
|
|
let metadata = metadata.unwrap();
|
|
|
|
// Should NOT contain Unicode
|
|
assert!(metadata.get("contains_unicode").is_none() || metadata["contains_unicode"] == Value::Bool(false));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_text_metadata_extraction_large_file() {
|
|
let text_data = fs::read("test_files/large_text.txt").expect("Failed to read large text");
|
|
|
|
let metadata = extract_content_metadata(&text_data, "text/plain", "large_text.txt")
|
|
.await
|
|
.expect("Failed to extract metadata");
|
|
|
|
assert!(metadata.is_some());
|
|
let metadata = metadata.unwrap();
|
|
|
|
// Should handle large files properly
|
|
if let Value::Number(char_count) = &metadata["character_count"] {
|
|
assert!(char_count.as_u64().unwrap() > 50000); // Should be large
|
|
}
|
|
|
|
if let Value::Number(word_count) = &metadata["word_count"] {
|
|
assert!(word_count.as_u64().unwrap() > 10000); // Should have many words
|
|
}
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_json_format_detection() {
|
|
let text_data = fs::read("test_files/test_format.json").expect("Failed to read JSON text");
|
|
|
|
let metadata = extract_content_metadata(&text_data, "text/plain", "test_format.json")
|
|
.await
|
|
.expect("Failed to extract metadata");
|
|
|
|
assert!(metadata.is_some());
|
|
let metadata = metadata.unwrap();
|
|
|
|
assert_eq!(metadata["file_extension"], Value::String("json".to_string()));
|
|
|
|
// Should detect JSON format
|
|
if let Some(Value::String(format)) = metadata.get("text_format") {
|
|
assert_eq!(format, "json");
|
|
}
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_xml_format_detection() {
|
|
let text_data = fs::read("test_files/test_format.xml").expect("Failed to read XML text");
|
|
|
|
let metadata = extract_content_metadata(&text_data, "text/plain", "test_format.xml")
|
|
.await
|
|
.expect("Failed to extract metadata");
|
|
|
|
assert!(metadata.is_some());
|
|
let metadata = metadata.unwrap();
|
|
|
|
assert_eq!(metadata["file_extension"], Value::String("xml".to_string()));
|
|
|
|
// Should detect XML format
|
|
if let Some(Value::String(format)) = metadata.get("text_format") {
|
|
assert_eq!(format, "xml");
|
|
}
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_html_format_detection() {
|
|
let text_data = fs::read("test_files/test_format.html").expect("Failed to read HTML text");
|
|
|
|
let metadata = extract_content_metadata(&text_data, "text/plain", "test_format.html")
|
|
.await
|
|
.expect("Failed to extract metadata");
|
|
|
|
assert!(metadata.is_some());
|
|
let metadata = metadata.unwrap();
|
|
|
|
assert_eq!(metadata["file_extension"], Value::String("html".to_string()));
|
|
|
|
// Should detect HTML format
|
|
if let Some(Value::String(format)) = metadata.get("text_format") {
|
|
assert_eq!(format, "html");
|
|
}
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_unknown_file_type() {
|
|
let dummy_data = b"This is some random binary data that doesn't match any known format.";
|
|
|
|
let metadata = extract_content_metadata(dummy_data, "application/octet-stream", "unknown.bin")
|
|
.await
|
|
.expect("Failed to extract metadata");
|
|
|
|
assert!(metadata.is_some());
|
|
let metadata = metadata.unwrap();
|
|
|
|
assert_eq!(metadata["file_type"], Value::String("application/octet-stream".to_string()));
|
|
assert_eq!(metadata["file_extension"], Value::String("bin".to_string()));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_empty_file() {
|
|
let empty_data = b"";
|
|
|
|
let metadata = extract_content_metadata(empty_data, "text/plain", "empty.txt")
|
|
.await
|
|
.expect("Failed to extract metadata");
|
|
|
|
// Should still return some metadata (at least file extension)
|
|
assert!(metadata.is_some());
|
|
let metadata = metadata.unwrap();
|
|
assert_eq!(metadata["file_extension"], Value::String("txt".to_string()));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_file_without_extension() {
|
|
let text_data = b"Some text content without file extension";
|
|
|
|
let metadata = extract_content_metadata(text_data, "text/plain", "no_extension")
|
|
.await
|
|
.expect("Failed to extract metadata");
|
|
|
|
assert!(metadata.is_some());
|
|
let metadata = metadata.unwrap();
|
|
|
|
// Should not have file_extension field
|
|
assert!(!metadata.contains_key("file_extension"));
|
|
}
|
|
} |