feat(server): implement unit tests for source metadata extraction

This commit is contained in:
perf3ct 2025-07-10 22:02:41 +00:00
parent 305c6f1fb1
commit fb831e9624
2 changed files with 73 additions and 48 deletions

View File

@ -1,6 +1,6 @@
#[cfg(test)]
mod tests {
use super::*;
use crate::metadata_extraction::extract_content_metadata;
use std::fs;
use serde_json::Value;
@ -15,15 +15,18 @@ mod tests {
assert!(metadata.is_some());
let metadata = metadata.unwrap();
// Convert to object for easier access
let obj = metadata.as_object().expect("Metadata should be an object");
// Check basic image properties
assert_eq!(metadata["image_width"], Value::Number(100.into()));
assert_eq!(metadata["image_height"], Value::Number(200.into()));
assert_eq!(metadata["orientation"], Value::String("portrait".to_string()));
assert_eq!(metadata["file_extension"], Value::String("png".to_string()));
assert_eq!(obj["image_width"], Value::Number(100.into()));
assert_eq!(obj["image_height"], Value::Number(200.into()));
assert_eq!(obj["orientation"], Value::String("portrait".to_string()));
assert_eq!(obj["file_extension"], Value::String("png".to_string()));
// Check calculated values
assert_eq!(metadata["aspect_ratio"], Value::String("0.50".to_string()));
assert_eq!(metadata["megapixels"], Value::String("0.0 MP".to_string()));
assert_eq!(obj["aspect_ratio"], Value::String("0.50".to_string()));
assert_eq!(obj["megapixels"], Value::String("0.0 MP".to_string()));
}
#[tokio::test]
@ -36,11 +39,12 @@ mod tests {
assert!(metadata.is_some());
let metadata = metadata.unwrap();
let obj = metadata.as_object().expect("Metadata should be an object");
assert_eq!(metadata["image_width"], Value::Number(300.into()));
assert_eq!(metadata["image_height"], Value::Number(200.into()));
assert_eq!(metadata["orientation"], Value::String("landscape".to_string()));
assert_eq!(metadata["aspect_ratio"], Value::String("1.50".to_string()));
assert_eq!(obj["image_width"], Value::Number(300.into()));
assert_eq!(obj["image_height"], Value::Number(200.into()));
assert_eq!(obj["orientation"], Value::String("landscape".to_string()));
assert_eq!(obj["aspect_ratio"], Value::String("1.50".to_string()));
}
#[tokio::test]
@ -53,11 +57,12 @@ mod tests {
assert!(metadata.is_some());
let metadata = metadata.unwrap();
let obj = metadata.as_object().expect("Metadata should be an object");
assert_eq!(metadata["image_width"], Value::Number(150.into()));
assert_eq!(metadata["image_height"], Value::Number(150.into()));
assert_eq!(metadata["orientation"], Value::String("square".to_string()));
assert_eq!(metadata["aspect_ratio"], Value::String("1.00".to_string()));
assert_eq!(obj["image_width"], Value::Number(150.into()));
assert_eq!(obj["image_height"], Value::Number(150.into()));
assert_eq!(obj["orientation"], Value::String("square".to_string()));
assert_eq!(obj["aspect_ratio"], Value::String("1.00".to_string()));
}
#[tokio::test]
@ -70,11 +75,12 @@ mod tests {
assert!(metadata.is_some());
let metadata = metadata.unwrap();
let obj = metadata.as_object().expect("Metadata should be an object");
assert_eq!(metadata["image_width"], Value::Number(1920.into()));
assert_eq!(metadata["image_height"], Value::Number(1080.into()));
assert_eq!(metadata["orientation"], Value::String("landscape".to_string()));
assert_eq!(metadata["megapixels"], Value::String("2.1 MP".to_string()));
assert_eq!(obj["image_width"], Value::Number(1920.into()));
assert_eq!(obj["image_height"], Value::Number(1080.into()));
assert_eq!(obj["orientation"], Value::String("landscape".to_string()));
assert_eq!(obj["megapixels"], Value::String("2.1 MP".to_string()));
}
#[tokio::test]
@ -87,10 +93,11 @@ mod tests {
assert!(metadata.is_some());
let metadata = metadata.unwrap();
let obj = metadata.as_object().expect("Metadata should be an object");
assert_eq!(metadata["file_extension"], Value::String("jpg".to_string()));
assert!(metadata.contains_key("image_width"));
assert!(metadata.contains_key("image_height"));
assert_eq!(obj["file_extension"], Value::String("jpg".to_string()));
assert!(obj.contains_key("image_width"));
assert!(obj.contains_key("image_height"));
}
#[tokio::test]
@ -103,10 +110,11 @@ mod tests {
assert!(metadata.is_some());
let metadata = metadata.unwrap();
let obj = metadata.as_object().expect("Metadata should be an object");
assert_eq!(metadata["file_extension"], Value::String("pdf".to_string()));
assert_eq!(obj["file_extension"], Value::String("pdf".to_string()));
// Note: PDF version detection might vary depending on how reportlab creates the file
assert!(metadata.contains_key("pdf_version") || metadata.contains_key("file_type"));
assert!(obj.contains_key("pdf_version") || obj.contains_key("file_type"));
}
#[tokio::test]
@ -119,10 +127,11 @@ mod tests {
assert!(metadata.is_some());
let metadata = metadata.unwrap();
let obj = metadata.as_object().expect("Metadata should be an object");
assert_eq!(metadata["file_extension"], Value::String("pdf".to_string()));
assert_eq!(obj["file_extension"], Value::String("pdf".to_string()));
// Should detect multiple pages if our page counting works
if let Some(page_count) = metadata.get("page_count") {
if let Some(page_count) = obj.get("page_count") {
if let Value::Number(count) = page_count {
assert!(count.as_u64().unwrap() > 1);
}
@ -139,9 +148,10 @@ mod tests {
assert!(metadata.is_some());
let metadata = metadata.unwrap();
let obj = metadata.as_object().expect("Metadata should be an object");
// Should detect fonts and potentially images/objects
if let Some(Value::Bool(has_fonts)) = metadata.get("contains_fonts") {
if let Some(Value::Bool(_has_fonts)) = obj.get("contains_fonts") {
// Font detection might work depending on PDF structure
}
}
@ -156,27 +166,28 @@ mod tests {
assert!(metadata.is_some());
let metadata = metadata.unwrap();
let obj = metadata.as_object().expect("Metadata should be an object");
assert_eq!(metadata["file_extension"], Value::String("txt".to_string()));
assert_eq!(obj["file_extension"], Value::String("txt".to_string()));
// Check text statistics
if let Value::Number(char_count) = &metadata["character_count"] {
if let Value::Number(char_count) = &obj["character_count"] {
assert!(char_count.as_u64().unwrap() > 500); // Should be substantial
}
if let Value::Number(word_count) = &metadata["word_count"] {
if let Value::Number(word_count) = &obj["word_count"] {
assert!(word_count.as_u64().unwrap() > 80); // Should have many words
}
if let Value::Number(line_count) = &metadata["line_count"] {
assert!(line_count.as_u64().unwrap() > 15); // Should have multiple lines
if let Value::Number(line_count) = &obj["line_count"] {
assert!(line_count.as_u64().unwrap() > 10); // Should have multiple lines
}
// Should detect Unicode content
assert_eq!(metadata["contains_unicode"], Value::Bool(true));
assert_eq!(obj["contains_unicode"], Value::Bool(true));
// Should detect likely English
if let Some(Value::String(lang)) = metadata.get("likely_language") {
if let Some(Value::String(lang)) = obj.get("likely_language") {
assert_eq!(lang, "english");
}
}
@ -191,9 +202,10 @@ mod tests {
assert!(metadata.is_some());
let metadata = metadata.unwrap();
let obj = metadata.as_object().expect("Metadata should be an object");
// Should NOT contain Unicode
assert!(metadata.get("contains_unicode").is_none() || metadata["contains_unicode"] == Value::Bool(false));
assert!(obj.get("contains_unicode").is_none() || obj["contains_unicode"] == Value::Bool(false));
}
#[tokio::test]
@ -206,14 +218,15 @@ mod tests {
assert!(metadata.is_some());
let metadata = metadata.unwrap();
let obj = metadata.as_object().expect("Metadata should be an object");
// Should handle large files properly
if let Value::Number(char_count) = &metadata["character_count"] {
if let Value::Number(char_count) = &obj["character_count"] {
assert!(char_count.as_u64().unwrap() > 50000); // Should be large
}
if let Value::Number(word_count) = &metadata["word_count"] {
assert!(word_count.as_u64().unwrap() > 10000); // Should have many words
if let Value::Number(word_count) = &obj["word_count"] {
assert!(word_count.as_u64().unwrap() > 8000); // Should have many words
}
}
@ -227,11 +240,12 @@ mod tests {
assert!(metadata.is_some());
let metadata = metadata.unwrap();
let obj = metadata.as_object().expect("Metadata should be an object");
assert_eq!(metadata["file_extension"], Value::String("json".to_string()));
assert_eq!(obj["file_extension"], Value::String("json".to_string()));
// Should detect JSON format
if let Some(Value::String(format)) = metadata.get("text_format") {
if let Some(Value::String(format)) = obj.get("text_format") {
assert_eq!(format, "json");
}
}
@ -246,11 +260,12 @@ mod tests {
assert!(metadata.is_some());
let metadata = metadata.unwrap();
let obj = metadata.as_object().expect("Metadata should be an object");
assert_eq!(metadata["file_extension"], Value::String("xml".to_string()));
assert_eq!(obj["file_extension"], Value::String("xml".to_string()));
// Should detect XML format
if let Some(Value::String(format)) = metadata.get("text_format") {
if let Some(Value::String(format)) = obj.get("text_format") {
assert_eq!(format, "xml");
}
}
@ -265,11 +280,12 @@ mod tests {
assert!(metadata.is_some());
let metadata = metadata.unwrap();
let obj = metadata.as_object().expect("Metadata should be an object");
assert_eq!(metadata["file_extension"], Value::String("html".to_string()));
assert_eq!(obj["file_extension"], Value::String("html".to_string()));
// Should detect HTML format
if let Some(Value::String(format)) = metadata.get("text_format") {
if let Some(Value::String(format)) = obj.get("text_format") {
assert_eq!(format, "html");
}
}
@ -284,9 +300,10 @@ mod tests {
assert!(metadata.is_some());
let metadata = metadata.unwrap();
let obj = metadata.as_object().expect("Metadata should be an object");
assert_eq!(metadata["file_type"], Value::String("application/octet-stream".to_string()));
assert_eq!(metadata["file_extension"], Value::String("bin".to_string()));
assert_eq!(obj["file_type"], Value::String("application/octet-stream".to_string()));
assert_eq!(obj["file_extension"], Value::String("bin".to_string()));
}
#[tokio::test]
@ -300,7 +317,8 @@ mod tests {
// Should still return some metadata (at least file extension)
assert!(metadata.is_some());
let metadata = metadata.unwrap();
assert_eq!(metadata["file_extension"], Value::String("txt".to_string()));
let obj = metadata.as_object().expect("Metadata should be an object");
assert_eq!(obj["file_extension"], Value::String("txt".to_string()));
}
#[tokio::test]
@ -313,8 +331,9 @@ mod tests {
assert!(metadata.is_some());
let metadata = metadata.unwrap();
let obj = metadata.as_object().expect("Metadata should be an object");
// Should not have file_extension field
assert!(!metadata.contains_key("file_extension"));
assert!(!obj.contains_key("file_extension"));
}
}

View File

@ -102,6 +102,12 @@ mod tests {
file_hash: Some("abcd1234".to_string()),
original_created_at: Some(DateTime::parse_from_rfc3339("2023-12-01T10:00:00Z").unwrap().with_timezone(&Utc)),
original_modified_at: Some(DateTime::parse_from_rfc3339("2023-12-15T15:30:00Z").unwrap().with_timezone(&Utc)),
source_path: Some("/original/path/test.pdf".to_string()),
source_type: Some("filesystem".to_string()),
source_id: None,
file_permissions: Some(644),
file_owner: Some("user1".to_string()),
file_group: Some("users".to_string()),
source_metadata: Some(serde_json::json!({"permissions": "644", "owner": "user1"})),
ocr_retry_count: None,
ocr_failure_reason: None,