From 59b4eb170cb6d85f579e2b9934f79788f9bba6b5 Mon Sep 17 00:00:00 2001 From: perf3ct Date: Thu, 10 Jul 2025 21:51:30 +0000 Subject: [PATCH] feat(server): show source metadata EVEN better --- src/ingestion/batch_ingest.rs | 9 +- src/metadata_extraction.rs | 6 +- src/metadata_extraction/tests.rs | 320 +++++++++++++++++++++++ src/routes/documents/crud.rs | 9 +- src/scheduling/watcher.rs | 9 +- test_files/ascii_only.txt | 5 + test_files/complex_content.pdf | 80 ++++++ test_files/comprehensive_text.txt | 15 ++ test_files/create_metadata_test_files.py | 266 +++++++++++++++++++ test_files/create_test_image.rs | 42 +++ test_files/hires_1920x1080.png | Bin 0 -> 25539 bytes test_files/landscape_300x200.png | Bin 0 -> 2000 bytes test_files/large_text.txt | 2 + test_files/multipage_test.pdf | 106 ++++++++ test_files/portrait_100x200.png | Bin 0 -> 1423 bytes test_files/sample.html | 13 + test_files/sample.json | 12 + test_files/sample.txt | 8 + test_files/sample.xml | 12 + test_files/single_page_v14.pdf | 68 +++++ test_files/small_50x50.png | Bin 0 -> 135 bytes test_files/square_150x150.png | Bin 0 -> 1525 bytes test_files/test_format.html | 16 ++ test_files/test_format.json | 15 ++ test_files/test_format.xml | 12 + test_files/test_image.jpg | Bin 0 -> 1748 bytes 26 files changed, 1017 insertions(+), 8 deletions(-) create mode 100644 src/metadata_extraction/tests.rs create mode 100644 test_files/ascii_only.txt create mode 100644 test_files/complex_content.pdf create mode 100644 test_files/comprehensive_text.txt create mode 100644 test_files/create_metadata_test_files.py create mode 100644 test_files/create_test_image.rs create mode 100644 test_files/hires_1920x1080.png create mode 100644 test_files/landscape_300x200.png create mode 100644 test_files/large_text.txt create mode 100644 test_files/multipage_test.pdf create mode 100644 test_files/portrait_100x200.png create mode 100644 test_files/sample.html create mode 100644 test_files/sample.json create mode 100644 test_files/sample.txt create mode 100644 test_files/sample.xml create mode 100644 test_files/single_page_v14.pdf create mode 100644 test_files/small_50x50.png create mode 100644 test_files/square_150x150.png create mode 100644 test_files/test_format.html create mode 100644 test_files/test_format.json create mode 100644 test_files/test_format.xml create mode 100644 test_files/test_image.jpg diff --git a/src/ingestion/batch_ingest.rs b/src/ingestion/batch_ingest.rs index 62c20d6..b865dab 100644 --- a/src/ingestion/batch_ingest.rs +++ b/src/ingestion/batch_ingest.rs @@ -230,8 +230,8 @@ async fn process_single_file( user_id: Uuid, db: Database, ) -> Result> { - // Extract file info with metadata - let file_info = extract_file_info_from_path(&path).await?; + // Extract basic file info first + let mut file_info = extract_file_info_from_path(&path).await?; // Skip very large files (> 100MB) if file_info.size > 100 * 1024 * 1024 { @@ -242,6 +242,11 @@ async fn process_single_file( // Read file data let file_data = fs::read(&path).await?; + // Extract content-based metadata + if let Ok(Some(content_metadata)) = crate::metadata_extraction::extract_content_metadata(&file_data, &file_info.mime_type, &file_info.name).await { + file_info.metadata = Some(content_metadata); + } + // Use the unified ingestion service with full metadata support let ingestion_service = DocumentIngestionService::new(db, file_service); diff --git a/src/metadata_extraction.rs b/src/metadata_extraction.rs index 7008410..1397464 100644 --- a/src/metadata_extraction.rs +++ b/src/metadata_extraction.rs @@ -1,6 +1,5 @@ use anyhow::Result; use serde_json::{Map, Value}; -use std::collections::HashMap; /// Extract metadata from file content based on file type pub async fn extract_content_metadata(file_data: &[u8], mime_type: &str, filename: &str) -> Result> { @@ -176,4 +175,7 @@ async fn extract_text_metadata(file_data: &[u8]) -> Result> { } Ok(metadata) -} \ No newline at end of file +} + +#[cfg(test)] +mod tests; \ No newline at end of file diff --git a/src/metadata_extraction/tests.rs b/src/metadata_extraction/tests.rs new file mode 100644 index 0000000..5b016a8 --- /dev/null +++ b/src/metadata_extraction/tests.rs @@ -0,0 +1,320 @@ +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use serde_json::Value; + + #[tokio::test] + async fn test_image_metadata_extraction_portrait() { + let image_data = fs::read("test_files/portrait_100x200.png").expect("Failed to read portrait test image"); + + let metadata = extract_content_metadata(&image_data, "image/png", "portrait_100x200.png") + .await + .expect("Failed to extract metadata"); + + assert!(metadata.is_some()); + let metadata = metadata.unwrap(); + + // Check basic image properties + assert_eq!(metadata["image_width"], Value::Number(100.into())); + assert_eq!(metadata["image_height"], Value::Number(200.into())); + assert_eq!(metadata["orientation"], Value::String("portrait".to_string())); + assert_eq!(metadata["file_extension"], Value::String("png".to_string())); + + // Check calculated values + assert_eq!(metadata["aspect_ratio"], Value::String("0.50".to_string())); + assert_eq!(metadata["megapixels"], Value::String("0.0 MP".to_string())); + } + + #[tokio::test] + async fn test_image_metadata_extraction_landscape() { + let image_data = fs::read("test_files/landscape_300x200.png").expect("Failed to read landscape test image"); + + let metadata = extract_content_metadata(&image_data, "image/png", "landscape_300x200.png") + .await + .expect("Failed to extract metadata"); + + assert!(metadata.is_some()); + let metadata = metadata.unwrap(); + + assert_eq!(metadata["image_width"], Value::Number(300.into())); + assert_eq!(metadata["image_height"], Value::Number(200.into())); + assert_eq!(metadata["orientation"], Value::String("landscape".to_string())); + assert_eq!(metadata["aspect_ratio"], Value::String("1.50".to_string())); + } + + #[tokio::test] + async fn test_image_metadata_extraction_square() { + let image_data = fs::read("test_files/square_150x150.png").expect("Failed to read square test image"); + + let metadata = extract_content_metadata(&image_data, "image/png", "square_150x150.png") + .await + .expect("Failed to extract metadata"); + + assert!(metadata.is_some()); + let metadata = metadata.unwrap(); + + assert_eq!(metadata["image_width"], Value::Number(150.into())); + assert_eq!(metadata["image_height"], Value::Number(150.into())); + assert_eq!(metadata["orientation"], Value::String("square".to_string())); + assert_eq!(metadata["aspect_ratio"], Value::String("1.00".to_string())); + } + + #[tokio::test] + async fn test_image_metadata_extraction_high_resolution() { + let image_data = fs::read("test_files/hires_1920x1080.png").expect("Failed to read high-res test image"); + + let metadata = extract_content_metadata(&image_data, "image/png", "hires_1920x1080.png") + .await + .expect("Failed to extract metadata"); + + assert!(metadata.is_some()); + let metadata = metadata.unwrap(); + + assert_eq!(metadata["image_width"], Value::Number(1920.into())); + assert_eq!(metadata["image_height"], Value::Number(1080.into())); + assert_eq!(metadata["orientation"], Value::String("landscape".to_string())); + assert_eq!(metadata["megapixels"], Value::String("2.1 MP".to_string())); + } + + #[tokio::test] + async fn test_jpeg_metadata_extraction() { + let image_data = fs::read("test_files/test_image.jpg").expect("Failed to read JPEG test image"); + + let metadata = extract_content_metadata(&image_data, "image/jpeg", "test_image.jpg") + .await + .expect("Failed to extract metadata"); + + assert!(metadata.is_some()); + let metadata = metadata.unwrap(); + + assert_eq!(metadata["file_extension"], Value::String("jpg".to_string())); + assert!(metadata.contains_key("image_width")); + assert!(metadata.contains_key("image_height")); + } + + #[tokio::test] + async fn test_pdf_metadata_extraction_single_page() { + let pdf_data = fs::read("test_files/single_page_v14.pdf").expect("Failed to read single page PDF"); + + let metadata = extract_content_metadata(&pdf_data, "application/pdf", "single_page_v14.pdf") + .await + .expect("Failed to extract metadata"); + + assert!(metadata.is_some()); + let metadata = metadata.unwrap(); + + assert_eq!(metadata["file_extension"], Value::String("pdf".to_string())); + // Note: PDF version detection might vary depending on how reportlab creates the file + assert!(metadata.contains_key("pdf_version") || metadata.contains_key("file_type")); + } + + #[tokio::test] + async fn test_pdf_metadata_extraction_multipage() { + let pdf_data = fs::read("test_files/multipage_test.pdf").expect("Failed to read multipage PDF"); + + let metadata = extract_content_metadata(&pdf_data, "application/pdf", "multipage_test.pdf") + .await + .expect("Failed to extract metadata"); + + assert!(metadata.is_some()); + let metadata = metadata.unwrap(); + + assert_eq!(metadata["file_extension"], Value::String("pdf".to_string())); + // Should detect multiple pages if our page counting works + if let Some(page_count) = metadata.get("page_count") { + if let Value::Number(count) = page_count { + assert!(count.as_u64().unwrap() > 1); + } + } + } + + #[tokio::test] + async fn test_pdf_metadata_with_fonts_and_images() { + let pdf_data = fs::read("test_files/complex_content.pdf").expect("Failed to read complex PDF"); + + let metadata = extract_content_metadata(&pdf_data, "application/pdf", "complex_content.pdf") + .await + .expect("Failed to extract metadata"); + + assert!(metadata.is_some()); + let metadata = metadata.unwrap(); + + // Should detect fonts and potentially images/objects + if let Some(Value::Bool(has_fonts)) = metadata.get("contains_fonts") { + // Font detection might work depending on PDF structure + } + } + + #[tokio::test] + async fn test_text_metadata_extraction_comprehensive() { + let text_data = fs::read("test_files/comprehensive_text.txt").expect("Failed to read comprehensive text"); + + let metadata = extract_content_metadata(&text_data, "text/plain", "comprehensive_text.txt") + .await + .expect("Failed to extract metadata"); + + assert!(metadata.is_some()); + let metadata = metadata.unwrap(); + + assert_eq!(metadata["file_extension"], Value::String("txt".to_string())); + + // Check text statistics + if let Value::Number(char_count) = &metadata["character_count"] { + assert!(char_count.as_u64().unwrap() > 500); // Should be substantial + } + + if let Value::Number(word_count) = &metadata["word_count"] { + assert!(word_count.as_u64().unwrap() > 80); // Should have many words + } + + if let Value::Number(line_count) = &metadata["line_count"] { + assert!(line_count.as_u64().unwrap() > 15); // Should have multiple lines + } + + // Should detect Unicode content + assert_eq!(metadata["contains_unicode"], Value::Bool(true)); + + // Should detect likely English + if let Some(Value::String(lang)) = metadata.get("likely_language") { + assert_eq!(lang, "english"); + } + } + + #[tokio::test] + async fn test_text_metadata_extraction_ascii_only() { + let text_data = fs::read("test_files/ascii_only.txt").expect("Failed to read ASCII text"); + + let metadata = extract_content_metadata(&text_data, "text/plain", "ascii_only.txt") + .await + .expect("Failed to extract metadata"); + + assert!(metadata.is_some()); + let metadata = metadata.unwrap(); + + // Should NOT contain Unicode + assert!(metadata.get("contains_unicode").is_none() || metadata["contains_unicode"] == Value::Bool(false)); + } + + #[tokio::test] + async fn test_text_metadata_extraction_large_file() { + let text_data = fs::read("test_files/large_text.txt").expect("Failed to read large text"); + + let metadata = extract_content_metadata(&text_data, "text/plain", "large_text.txt") + .await + .expect("Failed to extract metadata"); + + assert!(metadata.is_some()); + let metadata = metadata.unwrap(); + + // Should handle large files properly + if let Value::Number(char_count) = &metadata["character_count"] { + assert!(char_count.as_u64().unwrap() > 50000); // Should be large + } + + if let Value::Number(word_count) = &metadata["word_count"] { + assert!(word_count.as_u64().unwrap() > 10000); // Should have many words + } + } + + #[tokio::test] + async fn test_json_format_detection() { + let text_data = fs::read("test_files/test_format.json").expect("Failed to read JSON text"); + + let metadata = extract_content_metadata(&text_data, "text/plain", "test_format.json") + .await + .expect("Failed to extract metadata"); + + assert!(metadata.is_some()); + let metadata = metadata.unwrap(); + + assert_eq!(metadata["file_extension"], Value::String("json".to_string())); + + // Should detect JSON format + if let Some(Value::String(format)) = metadata.get("text_format") { + assert_eq!(format, "json"); + } + } + + #[tokio::test] + async fn test_xml_format_detection() { + let text_data = fs::read("test_files/test_format.xml").expect("Failed to read XML text"); + + let metadata = extract_content_metadata(&text_data, "text/plain", "test_format.xml") + .await + .expect("Failed to extract metadata"); + + assert!(metadata.is_some()); + let metadata = metadata.unwrap(); + + assert_eq!(metadata["file_extension"], Value::String("xml".to_string())); + + // Should detect XML format + if let Some(Value::String(format)) = metadata.get("text_format") { + assert_eq!(format, "xml"); + } + } + + #[tokio::test] + async fn test_html_format_detection() { + let text_data = fs::read("test_files/test_format.html").expect("Failed to read HTML text"); + + let metadata = extract_content_metadata(&text_data, "text/plain", "test_format.html") + .await + .expect("Failed to extract metadata"); + + assert!(metadata.is_some()); + let metadata = metadata.unwrap(); + + assert_eq!(metadata["file_extension"], Value::String("html".to_string())); + + // Should detect HTML format + if let Some(Value::String(format)) = metadata.get("text_format") { + assert_eq!(format, "html"); + } + } + + #[tokio::test] + async fn test_unknown_file_type() { + let dummy_data = b"This is some random binary data that doesn't match any known format."; + + let metadata = extract_content_metadata(dummy_data, "application/octet-stream", "unknown.bin") + .await + .expect("Failed to extract metadata"); + + assert!(metadata.is_some()); + let metadata = metadata.unwrap(); + + assert_eq!(metadata["file_type"], Value::String("application/octet-stream".to_string())); + assert_eq!(metadata["file_extension"], Value::String("bin".to_string())); + } + + #[tokio::test] + async fn test_empty_file() { + let empty_data = b""; + + let metadata = extract_content_metadata(empty_data, "text/plain", "empty.txt") + .await + .expect("Failed to extract metadata"); + + // Should still return some metadata (at least file extension) + assert!(metadata.is_some()); + let metadata = metadata.unwrap(); + assert_eq!(metadata["file_extension"], Value::String("txt".to_string())); + } + + #[tokio::test] + async fn test_file_without_extension() { + let text_data = b"Some text content without file extension"; + + let metadata = extract_content_metadata(text_data, "text/plain", "no_extension") + .await + .expect("Failed to extract metadata"); + + assert!(metadata.is_some()); + let metadata = metadata.unwrap(); + + // Should not have file_extension field + assert!(!metadata.contains_key("file_extension")); + } +} \ No newline at end of file diff --git a/src/routes/documents/crud.rs b/src/routes/documents/crud.rs index a211e2f..3fd1551 100644 --- a/src/routes/documents/crud.rs +++ b/src/routes/documents/crud.rs @@ -78,7 +78,7 @@ pub async fn upload_document( use crate::models::FileIngestionInfo; use chrono::Utc; - let file_info = FileIngestionInfo { + let mut file_info = FileIngestionInfo { path: format!("upload/{}", filename), // Virtual path for web uploads name: filename.clone(), size: data.len() as i64, @@ -90,9 +90,14 @@ pub async fn upload_document( permissions: None, // Web uploads don't have filesystem permissions owner: Some(auth_user.user.username.clone()), // Uploader as owner group: None, // Web uploads don't have filesystem groups - metadata: None, // Could extract EXIF/PDF metadata in the future + metadata: None, // Will be populated with extracted metadata below }; + // Extract content-based metadata from uploaded file + if let Ok(Some(content_metadata)) = crate::metadata_extraction::extract_content_metadata(&data, &content_type, &filename).await { + file_info.metadata = Some(content_metadata); + } + // Create ingestion service let file_service = FileService::new(state.config.upload_path.clone()); let ingestion_service = DocumentIngestionService::new( diff --git a/src/scheduling/watcher.rs b/src/scheduling/watcher.rs index 61b6e40..00ac8ae 100644 --- a/src/scheduling/watcher.rs +++ b/src/scheduling/watcher.rs @@ -337,8 +337,13 @@ async fn process_file( } } - // Extract file info with metadata - let file_info = extract_file_info_from_path(path).await?; + // Extract basic file info first + let mut file_info = extract_file_info_from_path(path).await?; + + // Extract content-based metadata + if let Ok(Some(content_metadata)) = crate::metadata_extraction::extract_content_metadata(&file_data, &file_info.mime_type, &file_info.name).await { + file_info.metadata = Some(content_metadata); + } // Use the unified ingestion service for consistent deduplication let ingestion_service = DocumentIngestionService::new(db.clone(), file_service.clone()); diff --git a/test_files/ascii_only.txt b/test_files/ascii_only.txt new file mode 100644 index 0000000..d9f3e53 --- /dev/null +++ b/test_files/ascii_only.txt @@ -0,0 +1,5 @@ +Pure ASCII text document without any Unicode characters. +This file contains only standard ASCII characters from the basic set. +Numbers: 0123456789 +Punctuation: .,;:!?'"()-[]{} +All characters should be ASCII-only for testing encoding detection. \ No newline at end of file diff --git a/test_files/complex_content.pdf b/test_files/complex_content.pdf new file mode 100644 index 0000000..71042c3 --- /dev/null +++ b/test_files/complex_content.pdf @@ -0,0 +1,80 @@ +%PDF-1.3 +% ReportLab Generated PDF document http://www.reportlab.com +1 0 obj +<< +/F1 2 0 R /F2 3 0 R /F3 4 0 R +>> +endobj +2 0 obj +<< +/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font +>> +endobj +3 0 obj +<< +/BaseFont /Helvetica-Bold /Encoding /WinAnsiEncoding /Name /F2 /Subtype /Type1 /Type /Font +>> +endobj +4 0 obj +<< +/BaseFont /Courier /Encoding /WinAnsiEncoding /Name /F3 /Subtype /Type1 /Type /Font +>> +endobj +5 0 obj +<< +/Contents 9 0 R /MediaBox [ 0 0 612 792 ] /Parent 8 0 R /Resources << +/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] +>> /Rotate 0 /Trans << + +>> + /Type /Page +>> +endobj +6 0 obj +<< +/PageMode /UseNone /Pages 8 0 R /Type /Catalog +>> +endobj +7 0 obj +<< +/Author (anonymous) /CreationDate (D:20250710214218+00'00') /Creator (ReportLab PDF Library - www.reportlab.com) /Keywords () /ModDate (D:20250710214218+00'00') /Producer (ReportLab PDF Library - www.reportlab.com) + /Subject (unspecified) /Title (Complex PDF with Fonts) /Trapped /False +>> +endobj +8 0 obj +<< +/Count 1 /Kids [ 5 0 R ] /Type /Pages +>> +endobj +9 0 obj +<< +/Filter [ /ASCII85Decode /FlateDecode ] /Length 246 +>> +stream +Garp%aUZ01'Euj3^Z$lr70RKbDN26UQ2En"QspCZ"fK-0V7'.H&;JtnEutX^`S4!djY>/-1Ll7*9[\kUD]$a]-(W-gT#W[42^Y:qOp/#=7!qndZ#V1iAYW[K/"S#OYOFENi#\m$3$pO%_hg)82^%7pMPXa0S88Np"d23mBn#h"bUhu6endstream +endobj +xref +0 10 +0000000000 65535 f +0000000073 00000 n +0000000124 00000 n +0000000231 00000 n +0000000343 00000 n +0000000448 00000 n +0000000641 00000 n +0000000709 00000 n +0000001019 00000 n +0000001078 00000 n +trailer +<< +/ID +[<915ab5b109826181e9414e12bf7351a6><915ab5b109826181e9414e12bf7351a6>] +% ReportLab generated PDF document -- digest (http://www.reportlab.com) + +/Info 7 0 R +/Root 6 0 R +/Size 10 +>> +startxref +1414 +%%EOF diff --git a/test_files/comprehensive_text.txt b/test_files/comprehensive_text.txt new file mode 100644 index 0000000..d68bf22 --- /dev/null +++ b/test_files/comprehensive_text.txt @@ -0,0 +1,15 @@ +This is a comprehensive test document for text metadata extraction. + +It contains multiple paragraphs, various types of content, and different characteristics. +Word count: This sentence has exactly seven words counting properly. +Line counting: Each line should be counted separately for accurate statistics. + +Unicode content: café, naïve, résumé, piñata, Zürich, москва, 東京, 🎉✨🔥 +Numbers and mixed content: 123 ABC def456 GHI789 test@example.com + +Special formatting: +- Bulleted lists +- Multiple items +- With various content + +The document ends here with a final paragraph. \ No newline at end of file diff --git a/test_files/create_metadata_test_files.py b/test_files/create_metadata_test_files.py new file mode 100644 index 0000000..085d222 --- /dev/null +++ b/test_files/create_metadata_test_files.py @@ -0,0 +1,266 @@ +#!/usr/bin/env python3 +""" +Create test files for metadata extraction testing. +""" + +import os +import sys +from pathlib import Path + +# Try to import PIL for image creation +try: + from PIL import Image, ImageDraw, ImageFont + from PIL.ExifTags import TAGS + from PIL.ExifTags import GPSTAGS + PIL_AVAILABLE = True +except ImportError: + print("PIL not available, skipping image creation with EXIF") + PIL_AVAILABLE = False + +# Try to import reportlab for PDF creation +try: + from reportlab.pdfgen import canvas + from reportlab.lib.pagesizes import letter, A4 + from reportlab.pdfbase import pdfmetrics + from reportlab.pdfbase.ttfonts import TTFont + REPORTLAB_AVAILABLE = True +except ImportError: + print("reportlab not available, creating simple PDF-like files") + REPORTLAB_AVAILABLE = False + +def create_test_images(): + """Create test images with various properties.""" + if not PIL_AVAILABLE: + print("Skipping image creation - PIL not available") + return + + print("Creating test images...") + + # 1. Portrait image (100x200) + img = Image.new('RGB', (100, 200), color='lightblue') + draw = ImageDraw.Draw(img) + draw.text((10, 50), "Portrait\n100x200", fill='black') + img.save('test_files/portrait_100x200.png') + + # 2. Landscape image (300x200) + img = Image.new('RGB', (300, 200), color='lightgreen') + draw = ImageDraw.Draw(img) + draw.text((50, 50), "Landscape 300x200", fill='black') + img.save('test_files/landscape_300x200.png') + + # 3. Square image (150x150) + img = Image.new('RGB', (150, 150), color='lightyellow') + draw = ImageDraw.Draw(img) + draw.text((25, 50), "Square\n150x150", fill='black') + img.save('test_files/square_150x150.png') + + # 4. High resolution image (1920x1080) + img = Image.new('RGB', (1920, 1080), color='lightcoral') + draw = ImageDraw.Draw(img) + try: + font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 40) + except: + font = ImageFont.load_default() + draw.text((100, 500), "High Resolution\n1920x1080\n2.07 Megapixels", fill='black', font=font) + img.save('test_files/hires_1920x1080.png') + + # 5. Small image (50x50) + img = Image.new('RGB', (50, 50), color='lightgray') + img.save('test_files/small_50x50.png') + + # 6. JPEG with different color mode + img = Image.new('RGB', (200, 200), color='purple') + draw = ImageDraw.Draw(img) + draw.text((50, 50), "JPEG\nTest", fill='white') + img.save('test_files/test_image.jpg', 'JPEG') + + print("Created test images") + +def create_test_pdfs(): + """Create test PDFs with various properties.""" + if not REPORTLAB_AVAILABLE: + print("Creating simple PDF-like files...") + # Create simple files that look like PDF headers + simple_pdfs = [ + ("%PDF-1.4\n1 0 obj\n<<\n/Type /Catalog\n/Pages 2 0 R\n>>\nendobj\n2 0 obj\n<<\n/Type /Pages\n/Kids [3 0 R]\n/Count 1\n>>\nendobj\n3 0 obj\n<<\n/Type /Page\n/Parent 2 0 R\n/MediaBox [0 0 612 792]\n>>\nendobj\nxref\n0 4\n0000000000 65535 f \n0000000009 00000 n \n0000000074 00000 n \n0000000120 00000 n \ntrailer\n<<\n/Size 4\n/Root 1 0 R\n>>\nstartxref\n179\n%%EOF", "simple_v14.pdf"), + ("%PDF-1.7\n1 0 obj\n<<\n/Type /Catalog\n/Pages 2 0 R\n>>\nendobj\n2 0 obj\n<<\n/Type /Pages\n/Kids [3 0 R 4 0 R]\n/Count 2\n>>\nendobj\n3 0 obj\n<<\n/Type /Page\n/Parent 2 0 R\n>>\nendobj\n4 0 obj\n<<\n/Type /Page\n/Parent 2 0 R\n>>\nendobj\nxref\n0 5\ntrailer\n<<\n/Size 5\n/Root 1 0 R\n>>\n%%EOF", "multipage_v17.pdf"), + ("%PDF-1.5\n1 0 obj\n<<\n/Type /Catalog\n/Pages 2 0 R\n/Linearized true\n>>\nendobj\n2 0 obj\n<<\n/Type /Pages\n/Kids [3 0 R]\n/Count 1\n>>\nendobj\n3 0 obj\n<<\n/Type /Page\n/Parent 2 0 R\n/Font 4 0 R\n/Image 5 0 R\n>>\nendobj\nxref\n0 4\ntrailer\n<<\n/Size 4\n/Root 1 0 R\n>>\n%%EOF", "with_fonts_images.pdf"), + ] + + for content, filename in simple_pdfs: + with open(f'test_files/{filename}', 'wb') as f: + f.write(content.encode('latin1')) + print("Created simple PDF-like files") + return + + print("Creating test PDFs with reportlab...") + + # 1. Single page PDF v1.4 + c = canvas.Canvas('test_files/single_page_v14.pdf', pagesize=letter) + c.setTitle("Single Page Test Document") + c.setAuthor("Test Author") + c.setSubject("Test Subject") + c.setCreator("Python reportlab") + c.setFont("Helvetica", 12) + c.drawString(100, 750, "Single Page PDF Document") + c.drawString(100, 700, "This is a test PDF for metadata extraction.") + c.drawString(100, 650, "It should be detected as PDF version 1.4") + c.save() + + # 2. Multi-page PDF + c = canvas.Canvas('test_files/multipage_test.pdf', pagesize=A4) + c.setTitle("Multi-page Test Document") + # Page 1 + c.setFont("Helvetica", 14) + c.drawString(100, 800, "Page 1 of Multi-page Document") + c.drawString(100, 750, "This document has multiple pages.") + c.showPage() + # Page 2 + c.drawString(100, 800, "Page 2 of Multi-page Document") + c.drawString(100, 750, "Second page content here.") + c.showPage() + # Page 3 + c.drawString(100, 800, "Page 3 - Final Page") + c.drawString(100, 750, "Third and final page.") + c.save() + + # 3. PDF with fonts and complex content + c = canvas.Canvas('test_files/complex_content.pdf', pagesize=letter) + c.setTitle("Complex PDF with Fonts") + c.setFont("Helvetica-Bold", 16) + c.drawString(100, 750, "Document with Multiple Fonts") + c.setFont("Helvetica", 12) + c.drawString(100, 700, "This document contains multiple font types.") + c.setFont("Courier", 10) + c.drawString(100, 650, "Some monospace text for variety.") + # Add some graphics/lines + c.line(100, 600, 500, 600) + c.rect(100, 550, 200, 30) + c.save() + + print("Created test PDFs") + +def create_text_files(): + """Create various text files for testing.""" + print("Creating test text files...") + + # 1. Plain text with various content + content = """This is a comprehensive test document for text metadata extraction. + +It contains multiple paragraphs, various types of content, and different characteristics. +Word count: This sentence has exactly seven words counting properly. +Line counting: Each line should be counted separately for accurate statistics. + +Unicode content: café, naïve, résumé, piñata, Zürich, москва, 東京, 🎉✨🔥 +Numbers and mixed content: 123 ABC def456 GHI789 test@example.com + +Special formatting: +- Bulleted lists +- Multiple items +- With various content + +The document ends here with a final paragraph.""" + + with open('test_files/comprehensive_text.txt', 'w', encoding='utf-8') as f: + f.write(content) + + # 2. JSON format text + json_content = """{ + "document": { + "title": "Test JSON Document", + "type": "metadata_test", + "properties": { + "word_count": 25, + "format": "json", + "encoding": "utf-8" + }, + "content": [ + "This JSON should be detected as JSON format", + "It contains structured data in JSON format" + ] + } +}""" + + with open('test_files/test_format.json', 'w') as f: + f.write(json_content) + + # 3. XML format text + xml_content = """ + + + XML Test Document + xml + 15 + + +
This XML document should be detected as XML format.
+
It contains structured markup for testing.
+
+
""" + + with open('test_files/test_format.xml', 'w') as f: + f.write(xml_content) + + # 4. HTML format text + html_content = """ + + + + HTML Test Document + + +

HTML Test Page

+

This document should be detected as HTML format.

+

It contains HTML markup and structure.

+
    +
  • List item one
  • +
  • List item two
  • +
+ +""" + + with open('test_files/test_format.html', 'w') as f: + f.write(html_content) + + # 5. Large text file for performance testing + large_content = "This is a large text file for testing performance. " * 1000 + large_content += "\nEnd of large file with final line." + + with open('test_files/large_text.txt', 'w') as f: + f.write(large_content) + + # 6. ASCII-only text + ascii_content = """Pure ASCII text document without any Unicode characters. +This file contains only standard ASCII characters from the basic set. +Numbers: 0123456789 +Punctuation: .,;:!?'"()-[]{} +All characters should be ASCII-only for testing encoding detection.""" + + with open('test_files/ascii_only.txt', 'w') as f: + f.write(ascii_content) + + print("Created test text files") + +def main(): + """Create all test files.""" + # Ensure test_files directory exists + os.makedirs('test_files', exist_ok=True) + + print("Creating test files for metadata extraction testing...") + + create_text_files() + create_test_images() + create_test_pdfs() + + print("\nAll test files created successfully!") + print("Files created in test_files/ directory:") + + # List all created files + test_files = sorted(Path('test_files').glob('*')) + for file_path in test_files: + if file_path.is_file() and not file_path.name.endswith('.py'): + size = file_path.stat().st_size + print(f" {file_path.name} ({size} bytes)") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/test_files/create_test_image.rs b/test_files/create_test_image.rs new file mode 100644 index 0000000..031b6bd --- /dev/null +++ b/test_files/create_test_image.rs @@ -0,0 +1,42 @@ +// This is a helper script to create test images +use image::{ImageBuffer, Rgb, DynamicImage}; +use std::path::Path; + +pub fn create_test_images() -> Result<(), Box> { + // Create a simple 100x200 RGB image (portrait) + let mut img = ImageBuffer::new(100, 200); + for (x, y, pixel) in img.enumerate_pixels_mut() { + let r = (x * 255 / 100) as u8; + let g = (y * 255 / 200) as u8; + let b = 128; + *pixel = Rgb([r, g, b]); + } + + let dynamic_img = DynamicImage::ImageRgb8(img); + dynamic_img.save("test_files/sample_portrait.png")?; + + // Create a simple 300x200 RGB image (landscape) + let mut img2 = ImageBuffer::new(300, 200); + for (x, y, pixel) in img2.enumerate_pixels_mut() { + let r = 255 - (x * 255 / 300) as u8; + let g = (y * 255 / 200) as u8; + let b = (x + y) as u8 % 255; + *pixel = Rgb([r, g, b]); + } + + let dynamic_img2 = DynamicImage::ImageRgb8(img2); + dynamic_img2.save("test_files/sample_landscape.png")?; + + // Create a square image 150x150 + let mut img3 = ImageBuffer::new(150, 150); + for (x, y, pixel) in img3.enumerate_pixels_mut() { + let distance = ((x as i32 - 75).pow(2) + (y as i32 - 75).pow(2)) as f32; + let intensity = (255.0 * (1.0 - distance / (75.0 * 75.0))).max(0.0) as u8; + *pixel = Rgb([intensity, 0, 255 - intensity]); + } + + let dynamic_img3 = DynamicImage::ImageRgb8(img3); + dynamic_img3.save("test_files/sample_square.png")?; + + Ok(()) +} \ No newline at end of file diff --git a/test_files/hires_1920x1080.png b/test_files/hires_1920x1080.png new file mode 100644 index 0000000000000000000000000000000000000000..613ff9f8db613a1b93db8c4479cf4e15ef67ce86 GIT binary patch literal 25539 zcmeIacT|&Uv@VY0jAb0a5tLCt#$jk8pweq}9Bc@Ph)B&S0#YNrCb6R^pn#&ZFbE=D zdQTi_QX{>F&^v?xArMmT{&dbdcb$9IZ~fN&^B&h?7L1VO``&lIdq2;!pZz_)Xmp1E z#{)m|@$vDWJNxG)KECa@`S`xq*zq0wi(}puVLm>)&~tyDyzC!8Pw|U?(RF8u?)anf zhoh@QDY8|TC!X58x%%6?q+YA*7cLClKo>MDN>{k}Mp0ZN!y2U9eh~`m-MdTndcEwA z$$!UenK|qDN$6N8}h@iJGLA^zTf`!`I~U*b z!?!&7tzLX92jAeqH<*M}!*6i$8*E16;2VAT#*&aY_{Ng{hqI(haY0*xTkk4ZN$at1 zxw41N%x$!tFMo~+a_Gy?B`*Ep%5KaqN$kr-e~uY@7QQU;;hN^l9t2T-$UAr2{ptM2 zk9SZUC)dF=?B)cO>${cUzevNAZfvgg`j4m+kG=Dn+TEo6QVQp-TIAf8>=qapQS5ex zIql)g{RG8&l zt*Rn-N_4$MM3_X?WbC=>hpMzdAAB z=`i^5yg|w00CSPWAlbbC%f;4qJ|sfHQl-+Wq@peY#ghO2%Qf8R!@6o#VW^csG`B@F zmAI#cg@ns>sFu-rh3{8B+^OHEgu^cMX9olX)Yghy79Hp=nv=S>of^E{Fq3|UCiphU zVN(Czwie5x!)v#OCR<_z#VvWJR~Gsi=dW~Eh^o0#N-)MJPoBSYY3k|IzildPkKP(C zdULwStuLRO7IWotCDC_bJ}WC#+jm~)*6@jQ=bH8@XcaoE!ivzbtPA^(B^z(9g;@KV zSoAfUWe%;ccG$z8%E`(qD$#n&eF(k_t)fT&YJY#Ny4=U5w*s}Ox-oU@Dcb^Bh+9|h z$JsUg*O1EYI4^$~x87|fc;xCu3$wkvk@ER@pT53a2Ydd-_KD|mrVoAQd|4c(pmUY4S6#p>^wa--8-Ut+0`=QM?AHj)xMp_q8&DnEtni z^6+?FW%a2^Zc`klH7ilf{Gw&KraH4o)rD)fIEdGwRchCVWzPL5;o83%W>KIvIsQBM zC@JN4Qpyb%+aVX1ZuY=FUX*>KLL-#nFcYp$C> zWDdbnayIovd>THzo2u%h(QE@Io*U~u4igOcweAy|d)zmA?FX5}b#|C~9bRuZ4b>ud zk0UPl@?8x?wUHehK3SC;A~;ro9#Y}@$u1xdfJ;Cd*u&aeg7BUgPu*+HMc!eklFPH zztQB?6m!PJoBQDo@&YF;%ll^6;fkc;7N0p}n-aDQ{Am(xb$K}4l7Up z39A^xV>%KN{2$zHZ^typ@2AC~4`{8jj@g~|p$%zDO3KN}rM*AuLQdR>Q%Aujye+>s zgP9`pYBy>15=LS=>+)#{YHuCMkOtr!78K;;HvZWjps#Nt?DnbQwF;KC9lhC)p3vPJ z$M(|L#yf(foSPb3SQ-V8{pFG|@ z-?xInki_xp1qMo-^B*1kFHN;l^;Vjl-kd>m(@@z>t>MWxP2Cq!Ra{msaW(ZJUf9s3 z+K(iv-)1$}Z5Y6&_ZlDknxn->>`w&?)DIJ+kg(yO+7Uk1tTfq@`7Y~e7~U7|oT2G{ zIpUz8ZGgR`A~$TOkh88-nR$gTN`uEo{e;p~ivfE`hNx{GDr--%9NP=+0=RZWMhj08E}v14Y^c2wjqZoj_y%&yA&z7Nx0$pB-GZr?6}%uZ#e^% z;Ad;2yy-K&)Us0uw>A`HB5Zc~ir#XA3@`YfA3`=g>$6S}s*f=ocft;Q7|Gwt@?ZYv zt)YF_nZ0Nz_LnYQ(v8ZMP@a6R$W8tG)SQ;1W@s`5{mHH=(X4?A=KGw%Tl)6)E^G4} zlrs_u^bcECkUDr&L29BWO1kSp^^;<^0b6WZh3J7L2c?55vTCwN6l@}Q%MV;8&GBn$ zYIYqfAJH4E)I8XjdqA&}D-?#&+ZbOjQS+;QSdGoLvq&P3TwVW~^V*21+L z;`8ZUGV!;rilDOnXe^mk=N|Ws)`Yex8o&SV2l4|dy-wFjt*fayr{`_===|mBSYBSq$+9Nz4x8k z_;^kIsS4k0yeD;S!743v@5`5JOQRuKIrM~&8~nt{=Vy%#IhzG!qSs()v4T~%*0qTo z8#-O&p#QIWIT;x&>d7!d&4l#4H_>^036EWA!PeHA2xChFR^Fkxo})Gf7GpQJ`ObQON)r=7!nOb3aMg!(tohAJSbrux zogv||Ti*n>Ipj+AfrCi{VzYM}x*Vl1cTblcijIy!a-WE?mXmkH!6KuxI*O7wySi=X zpT2OjDh^(1l*i!K`r1Q7WkaEUJ7im`x^!j8h6Wb7sQ-2*CS~Ay;bkBvJ3+QIZDx`GMr~o5t4e_kxb~k2UcVuQ_}TZK)^L8r@2!JM$gPI3*$^q zva8FcaOHk9c&|TAR{Mxp-ciTad*(JipX1kG|MJ4j4AN5%$*x`!Pk}25U+7Z~O@1g|^62jtDWd)_{gQom#y#*85$;s>f@7FBaJYNoy_%^+y9rke`E(7% z9iQu?l;Usw!A==-e5sI`k&)rJ9A)rQEKK#m>V(G_B#*kfT_x^VD9u86Z6b!cE++=8bo#a;Q2=5SVzPwA`d|!9P92>yMD78Oe>vpy4QjaT!V_d1Wn$9*MPLzgML&un-h7cgA<>j?>CEyo)d#2}5@7P<788pL#~u4Mbn4GomiZ)VF! zzf4c3YHrT2S4)sqjIMh@(PJ@o3SA#-%*fD2`~Q81>EL=Fe%>l2mFH0zSYL?gnr$4Z zwi}E8_>mXPq?9Y>ODWOtdhDZWM(2!(3(=+r@*8crVdScs0y0wQ=1@wT+>I6H zWto1Z)j7Y<7~W#D@>m=muZ-ssxXUrI=Dh*KAAtBNW#|=cyv?%{^;@DpQ?%LUH?v2S zR2j&KS9Ni2?Td_2Ur0|A2>5ROL%8@D-arD!wSHcw8T+4+rQzk7YwH&-Dk3WK;)M-& z`VBQs*|DwIZ9_`xL29bJ?_wo0yQCFA{r6yUGBCgRtKyCkXeT*2JmLLL7HYCJ`$F&c z3C~}y)f76pwRhMFin#V2pEM`2hT=87dUB}=1oRaXldG2Hc04Xq-JQL_-hlg-V6PB( z(+PgQc)d%xklYgc3bu1s(|S;x)l<7f6bH+F{BSrYeSOH~Kr*JXEWD~ajO*ikPFwcJ z7;7&i=?efoLMxU2s)p2ywI(I%(rO2M99izj5jiC^A%TOE2tGQ(-5d+KzdHAMq+SF) zI87d(OCubVpf1lBpn~VJvvh;bH#V&poPW94OMl}xq&aqHJCbMtcDy4k6dM^lp{3#M z8M8bbk7HO?#*h%TA^pe3&wJ5w$H*kKciY>+!Jh7%u;h2i`OE)A=mjpli4chGD7J4x z872hIJ>8!dlHO2HC#hJY9GbB!lWxrokCSgQ7AfT1tH*rCKdoazyl0kYvrC4Snw70q zmXl=*_4|?02P@q005SI{x|2nKqXkP2S%jv!u?1u{sK4 z^a%i>D`uR{0SpiRli{w#Z`tZF6ufdRd*LQd010og&G0eB4fRa@^NnwMti5_>((1{^ z8lRv#R0YmupDOnalkk4#-%s<9QPtcCnu-^~uLHEsewTG2{wRQYKU(EVtH;~-KHOQweHa(1x;jh%Rl7LboofK^&f#m-u1?9pj`TQDJhUzIz{~kpZiPx zbNcN0^MRiIg@94BT|Sm=-BtqvRU=X8Vo7gg%N;u3 z0sOTpa5NUypDQRJSeCB>h82?WcBZh>Vr&D+eU9YTM%D^ku`jJv@yHRQ_^fIR<;Sm>$vv5cc;XTZ*}TQ zt{v;K-JIeVO@wQFlWpZUF(spKLi1gyTbmE)=*?3#PS2^w+jurW^(I)9uPO}`yEV47 zC@Cmt-!;{^9^OBNZ0(0*igL7}ptvCZ@ADM3H#^ZhEj1$Z<1P|*@^wqaLF7+{F=3J# zT;~?+qQiQ6sph#3)(BGOTPitsn>$wsa?IuwIU-Ek@y1hNn*lvvDixa_3OH&mzksyX zZZN>4<+^}^M144ZYSj^Wb&@93q|D{UMvKfVE$20+ z%ib=&ejQ;Gm}o5N&bF$=ZVuhO8@ap;Ct}r!ii4{&=a(4Dw5fQA zKJ3C=RPV{QDs&qNwW;u(ni6^WqR7yg7Ww=bFm5jvlLXZ1rGlkccR&1+^xks<)GCF5 zfC>Szn^B2FlFDAz1U=I;;W{AKRR_PRPktMC4?;gT%T|1I55$RttQ3y@(TGRB-- z7@lo~ha9jdrmiR_mww14=;p{rJZ`0BG(X=GzgZh)#A-=7N9iwg`ji@eF$zC_%#g7S zZC$o3G1=S)WwPf!;7ym)VXTIl#|u_PTCuy3kh|@0=!AN^x?@YAYg=-lwAhuMoHlJ5 zn_a~e&|GN7aS}=Ny46lCW_AR7LaC%XE>Xx(Oywr4sON2$RUsVwpvh*LeM&iaBOsAO zrDNehlvI{dMYEkyYp-DWkU9a0!R3?u8m-r()vd&b+^nPZ^^+$@cZ5$eKYx7im=OS+ z1WKqnoa=5Ur-8MFAdnT{bxm?do+jTMdTWRdnn?{wyWsT6-gto$1e-qf(=VS(u$uze zmJ@sTa_k3+Q6Nu%Oi(LbS?s$2($~oIbkn)APh@J4($ER3l$2n|cA|ApXL|mwCXS8< z#Hn%E9A<$p4KxQo99Pv?hu$4S8icx2huzVVnm#lP2<`XoDbpY(mo<}SJUvk2b^5eN zz{V`4f`HB&9WmCE%Y64J`KFywYhhYq;FLW2Tu#t-7Mdd?2+R6``ki)?=!|xOBQIdA z?vA)Na{D0vZPoG>>&+X(lX%?KSxy=MU}(iL8OK2kUUV+fM3^&G^BAqoS-m%j2&e`T zdaN11nIRpyWs{-Ts-3I)K?Yndz;3>M`ilps40eHQSw5}RgkVqde4kmD;}soExBgV0 zEHA9@+B%h6T$z77G#Q{B*&5FQf$(=DpwJ0xou)Lm!C!*b!<1Xs+w?`mmlY%)zh(8> z#m@4-jE;#Yr4e<1_#rQ(B;Kadg&q&GOnzCKjxk0rV{zlvo5-~WrwsM=p}H_SEBKEP0TgMq-LgcDFjPVlY(?<^ z;U5fIXNT_f`C-~_DGK2ww@=@^QTFCswD}d{`|J9yATX{PPbFIQG#>(KTi5f3YbEH7 z*~X}BpMHt73+W`i`a!R53;Mkj5qHXgC$F~mSzTOeZ+wyG0ctp)ii>lp<;6~3<~}M) zib@1q+Lo63XzAd4kQ^f-(!#|o$YH;wg&hD)bj;CqaTf zI8fk7SX-ET9Xzo#LZ>%(Wk!tqD0g(!uTK9gYJSDc4Fpq7$JSWY7hUz<)}XQIG7nb< z%=h|V#!AW0o!#fFqBNmL2A8hW+v5z-BodF_H%|=u=Sk4bdQjl1p#&IFWs2gw&I21)`Wt! zdB9z@tqE{A=elxi*d@Rjquv_hNvy(AJaPDLP^(sEVkYI7UENAo#YfU{_d&8^)#_)= zTN4PZ(dgElveDmYslq%uAIN^Zlt+$0@^yoijResLqm%f2mF3l zsv7%nXT22r$J1~tqL-uVv*>7VOWF_v?G{WW@_3Ice5JUDHRWdh&diyOe+~khgx8c} zZ+>dd+y!)@tS#Y;H|^kRT_9w@H?jx%i(G;$6-OlYIyXW-!OuJ<|5$hw`ShE%-+6mw zWGsCDaGPaSkl!0Zoaxol4AxA;xpN#XZ;hLOcv$p7{p~GvIemKIrNSKPuhB9g7v{X? zbO}XSS)y{eu@|=x+SsU_h$nXI_E=Q-;W4f8y&94V3&y5tZzWn=N2}WlF-yr&MxmIy ziFv^RZ0;RZ-70Sc8iC=jWDm z%txE;F$=vWyzzU|o#l^94SZ)qTLEGI$;H>;Rg`)CN8By$#@OcS z<1n;U!6XnaA^avq( zmW9|Lj{5VsGH0{VYIb{jUDb368mCEo9e6fIQT?g7m6y>;hS9iesAbVjk54Yri7!kp z)_=Sd6(x~)e4?4kQS81-!_H;zl!-rD>`0akO+@EiXC>?BIm!4evZ?>L>#Xxc`Z_U3 z0h>b~sa0d-K7q4A4*K!OeeRTUP?cgkA|oS-D&Et^(nj-r#S3i~Uyrn;HtZ%qAVr;- zl-^QcH6$*3s@wq)U!G1h3E?Brm)akhvs*$Wh?RjpuJMua?74GMm!e!YM9*G$&9C{W z%&hwO&0ODwMS_l|nj&zJg|aDu=xEpGhK=qfrFvZNaa9Ifk!4z{I4eQy^=`~jIk_1i z3S15Q#wB-(bHcO4SXPx+>qg67^G-9u)_)KF6Fy4wsEp(p6BTS*GE_^C7R`#46-?k- zA26jF327NA@kN&^D4rxD;|hBO^xq+;svwDF3;uxDDch7LBmcQa)8ef5`a=*=lpTlE z>`v>v<*koCfwMOB!9eu|l=~*%c@5x5IsU6{FDiVywEj-1WXsTq+HB(HVw`$z@}lTo zwT-W_R;F?H&Fe5)#}r&Sj&$qV{D^LWBboAkqzM`%upv;l000vT99lrwAF#RzVq?Wi z1!zFkI~2S0Ex)&&2JnsvS}gNt#;XOc@^n|4%cV<Jic z5yh?+mDnI*{enHMd&{o(zSnlHP(tr8P&#oc+#yAK%5|bAXo;GsoK&_0Z-hw{dc>!ly7jRN1vbuNW8ABtlnz#E4jE- zR`k<9?`ioFH^!szOLd}oAt8H{%oqB77NU&n>?WY;%5!K>2~;B&pF8{d@#6x;$LXuj z_N##S4XB08qR<3>kN>S5@C_R6QF{peE?s3o)S8fh-2^_&ijR$Ff9^6zTVPLpgy>F5 zdAitU2Rds>0`$sc2<)a}HxuEi*cS}oqi=wfvtL6 zXyfD#`C6I$;g&TeHgXRjXW4RIJ!IkQg5*Ui53*Qy;HsDP=9TQxF@Oaj!H=PlUZhu| z2>Ee+c}iUyxsPf}2ln^aHGVD#5gD_l>>6cq_!H^Tk*x6Gw+yAx&+9Gvz_Ivj#NyVRNAOAPiKtIXuEL8Oh znG2;sAMck-2N(LdxPF$_#a@h245=JWynFXtM;fN3>4~lwP#G`v!Ozd`9k49E*#=^M zOIewT@IoUQb@9?3YKc`Qqpy!nA3mj!1!rOHzNlcGPL=lzaPbSGx~yj|E_)2ux0%!W zgitJm1~XjkA~fG%s8ofkyG4}-@+1epo|eA8E!e@UF$v-bOiWP!>x3soMx%aw?_R@1 zJ*&_lsqPCov+jvsEn>?)yD zIIhk6?9@~(gfqLDiP^PZ-(+)JaHaSjfyQ3u#*fs(e-}dLgoOHAB}-umYG9)r!&p&P zAz`^jNZxNDRCgYLbaNbLG5&emQcRr2&5OZ0(0U1USnMU?4H5v?uIqB{!t(TLO4|L&T+H(T>an$Jv%;lF9>t^I zA5cg%A3*)Cvhjs&@U`gQcE=%o2Dts~tf9uhXOxYN9u%Wr(ZCml50 z3fo|MQIU(Cpmw1%quo#)8WpBv`_?!stp+{@_}7tg>C$yUTB4>GAdoPtm1}^&{Z|@f>g#!$ev41!&AVBi zP-D_`H*T;c_H6T6)FB!Z{xb!rliW6$R8Z3FaEwes<>n&TJ3N2f)pYo<@5B`Ty;BFW zGjb#@1L1WJ(34mZiLtptP9=g+45id=ta)`+#J$|9_vgV^?m(`n+RVY;($dP{Ss`Qg ziYO*)sH&ng%XR9`RuBt4xDwpmm^YA=xr=>{JwEAv>aOb(l>vDkU>>3obT*O<2h&z( z8A>A7yG5t=?nO2S3;Y z3VUZ3^Yp1Yi9Lbqs=q0A18D4P(k5k*&6W+l6dC1630@IlF)}r?p~2BFLkNcUf}J4A zYpoCIg5R4|8L2<$H;y?Pnhf>`>*Y_9qF$Z{@>4)0KwINB4$*$^`~BZ6a07I~By8pt z6mfkt#+_J}$P#PC$QZI_Qe%oiHOSd1lITCW=|?d$-wL0_nth^FH|r&=Pt~1@x156? zz>dH)P81_qj4BP`Nt10thr~uipUxz{-WW0`1x_u--B?_Qd9{Y zc`)R}f#8R9%_3hENhx7K3O{XW8PVJfE+lJZ=T5Kzb+4?Vo8v@KA6TGqaEU|Jzd?UQ z$GZ!mG`U$B8Ce-Sn`W|P5w>%O`616*J)M{$*60sY>yWGRQu(|pQzQ9TZE*~5(OdYB4pzOp%M|a-O z<$~D5V^79GA*K_AV`3mTUWEiKf#R;iCa%Vviz+X2G9~XttR^Kr*)BB(H1PB|!Ox&oMQeHOMKLvgapkf~mjPARYm!Q0IbX->eTpOC0=sFr5;^sfblYQ! zQkt5nGv@5y$8#Ag%%LY~z5C?k(BddA7-?8O)9*nj1yu`$=W((q z6>ntw6A1VgA>&F0Xl!nM%b(CHi+AsA1(c~W@`D7ra{X}*YR@?u(x+*3r;19 z=8`?KJKDLo7+qjh4Ao^_sW}#{%#;=J7d6vdnUD`k=1Se5afmj(yS5 zML7B;vDDMvHJI_}XTy9xaASNmUxsn&bveko?f4TcrYNJ=p3t@niT5R2IHsCY^D!gc z+0bPO!_`*(@Pm?>UoXH;G~wvq-A>4o&aHOXjREwSd9qKHwf7CVX9W5u>LVMXz%xZf zbqc(lDl>X!!JGD@fx#QuMt~Qx?~cRs$;r=2#yri2jv8;bz=ijf<4e= zJe-+veaz)d*!j$gAE-b;jEAu5zN_`}78ZzoDl7=Opxj&=QM|vY&vExXJz)nqV}3(m z;TwEG;7z6xY>)fjvQd`Ebe}do9qvj=uDw>(m;nk_Lt`KSvh2;Z1v%NX=gxyna;(s$ zXV*4?O5e_R4<(*eIrrqq#3*7NIjn_EW=wPp(|N!b@NG}TL0ylf;m+~ub(wfLHGLKq zD{5}gf29E-NCYL(Eq1#ZnS1GYub8t5*FPz&%}URGfNyiet4BIG2` z*n-gQ*+EgsKc~{Aekle#A3Z%e2?;NXyz9fE?ug0CIrSy$gNNjy-<%qt13AglAnOz5 zhQ2rl_@z#Syto=40TpPDfIA@ZMUqkuDs^_qhIBd7p*UQ^&!0qaQ5BlBSJ7w3nD z_WFa4f3L#ZsYZXcT~~SKW0X;#TT85L=s{?O1GA5x48b84J{;?_7 zId0zw{Usw%b$b^ijQ=g_0SVxq;5u;G(1sM~UrhLt8cV@!DHC&Kqojn&-LQ|b@oqoI z$eMa>-$aCjkw=#=8PbAPptZhU`6NU5y|phou0l@TZ!*ISG?swjM)~dD^H=9m_d$=C zJ`!R#wmrA}FkLtta2*g563N4EeguWS^$+wA%*Yvwua1pDOkN+$eJaWXt3|eES~~H0 zU@#ynb%&PzS5Z;x)qq~$3#^E_*%pXB58LozRd+($uWlRE^fbnz4R0Kpc3hs!Vt-eu`2`*P*raqh=da|r%#zEbJdiNhY|E*d z5|rRvclII2#F`@@C0B+V%;?h+n%K7&%j_~IX}Vl7`@=%-EI=y-z|jL6EAn%%3cgxW z_qcEv71I&;_#$UBPDq$*LsQ(^coaWMH5DLD@qT524Dw^W{&GfSh=eU6f%0Kjos^7} zE>!UZ<>S`{dEoqnl-a2o0J099rIQ@Qw4?>~U8bJZ{qK#ck~xTUe`2h6i}*#OYapa4y`Tl) zs3VYh5M)p|XTZAGgoT6IeU2R>3b7S9?3a8*eXSehN=}rl@I#Gv;h1e_z<-h~d!Q$c z45j4)G(T*bUTO=7`Gb8NUCHAD@|6t*kdEf8kF5YezY-DD8-yr?DI=p`A$Qug__XI5 zQytY*K2a=gbHz;Z^bN4mkN(C0R+EV6L%~ZY=O>)EO@rBh@BRpUioEI4@8bx?)Yv&lRK=*In@R^1VEWE5j~hvr?$Xc)zS{l>9JU?EgkmmY_l z@pJ2#e6~^{)!d-XzL)l-sc|&q@Sld*&42!+%Yzfe59;W4jv9_z2Veub|K*27*wytC zxtRisq}M7lN!|NJM_SjeM*F zY5U&&o810bRVJB0SRa5~DilHM3EPjfYh%rCe#E34BwUsd9{SrX6LaC!#hyZlw|`x{ znpsZ}xN__F@24*kIm=Cb4<6(prXY8XyHBkyRyC-ECO?c%edhnuA5bMmh=b6hVTy>< z*Fmw_Z;sK_-2>XO7%eVBYQ7BmiuGI$CKoLsO6`3gkyDw!?AI0ak_q#aMk z+~}U|Ql;_#f(o{~*^|RKGCX?np0is=*wTVh@S&HH;1wH6$I%#@bbS%2u6N{P(01S% z`?M)4D&YfX+D{*m19ckwr=!|TM9T(4aD5$c-Jel8EHHTUDvCt~)nMaE>-aY4O+`h{ z<2#UJ6n^Ns;cn8#Qgw`ZZa9)i@VZWyW`Fk1t9fL-@{|&5DQ^jf;{5sN(ow+9Q{23EEUxPD6A=QdD`g_>7-XtPBrNAEzz`$F$?KIUN?e|5ZF z@005ZKVW7)9#+QPu~sZI3!dWPSuYuQjqj%oP=_SC-ki}Dau?H9BQ7Ige=q}zxF2u4 zAi6#yr}wrMm+sYq9}MgJH^PFH``jWMtts+>LL37Fd2YM|^#!)e68w*A6XOmtyM5!9St}T+%&i}f9w*?PBX5P0ZK&{Nw-^o~cCK^w=^>HV^h3%;tvU#61 zmfR2+Pa)wsLV?9@qNwN%C4Sj{9|VI}7@>64hN9zwKzJ%?dG#E9Y0qXugc8yJPw|oq zt`LPd=-)o&Z#;b1J$UlH=w7veIGhzMcAYrdKA~ z z3d|S#_e$VL?ra|&TOUZzphuMsk}+|VXBSZ+z6+MTxiwH|eJm}nZS{?v+PAkVKajB< zRC_@6AfW)q-9t+MlP-C2_89x(t|!SiulD31!fenCBR+7)}A{cf0SbFiMb>h2t2V?cHldswogTEHL%`2>5VOgb1b?lM2cH6z(>_7+@C} zs1xAb8u<0j z`iosRhF_QuvF8iGF=8SN9J3k5sP;7m{imln>z|VRFEcnhHSfM@_-~~4gAHwDkA+Oy zcOv?2ATgh6Mcj>ZUV<|eeo-);Dyc0(FZKt1?cj|a&^VF7tila$83EDTPJNyz)>p$__N61TI=rp+1m7!`< z@@XZ+&~*Fs`>o|puHcDnU!9>ZYlD7}mQ#^)Nc|_*N8o#awo+~K4V~RRk{(8E;o*m8 zD>=hZOHx$ybuo`b>%%!y>O&AG>8S=4jt=``Ug(>2=GZqW@r(B_J*m0p>UpU>M}nQC zfByWN9wF39%AxIvf0|srNGo2Yk+`fgfUjvUI@)s%9vhwKazcD(0PYtUT3T8HNBnlVezIM!!S;m&{nB(2oH(dedi`TmfM-J_i~P4coWn!l{Mx7!_d7B~!~)Pp zDh!a58n>Q_7cU^0(r05;@s+Gu@V)IU|1xzq`u(&tcnJT38@9^Ch^glXF{j*1M|K3E zlc9ToxRSiw-G%ilmRAI$qi?)T>rqB|#5o3g(GP)FrKPnoG%PAI5)xl=-kh#$$LR>} zh!y6A`ISZRd{C(YGHcaPAk=Pc^}N8o{Q!_X89MINJUdtbs+#R3TidIa{j7^<`{02f zLmH9 zC&{l4Ew((}pwqCJmPj`fif`av{j_^hD>+o5PyJKhJ>c6%d zR>Z%y8@zwd%e^95?(>8{BO_zq6--(-GNI?JM}L-RL)5{YuCd^-7}}u^atuz_)!y4y zTSspUD3!S=cy>BYLVQ*-13emqu5vWyM3szBba|E6iUP+YRS)5IS#<7|Un0>OLlkUA2$<5-M%-p^Tb+v%!v%UWEi44DhztPB(7`Pf@l-vo&tC&{0R_;! zr>`4uve@5XrPlMJ_|OOLNOO065e!f6{2XR(ZL0^~WW9~~g5K&xw}IC3iI?EGIb=Hr zt=ch}A5h>k01Gf^16Gw9#EG@#Z3Lf+>r<`vT~^kD%%urwdaaQ01@`m-557<9MC_QO zUUO@srIpQy=m^Zn>_)v`2ey(iTP1~9p*mLvYNBF0W_7>cfT2Xqr&2hsd}bF|L~G1( z|2&pC*j+nT{NFZN0PO0lRU>iLO3O*#NqR2n2jR-eEFnyFV{af&M&mOyxb2<-O1c#R zyT+NddH0`f6v0(b*c54V84`#f0O!zih0xf5Vb9&7(8a4t|7Sll@BRY0=jj2Tt07A2 zkE6%Ovw{{Iz=qe0Cu#v~wf)=PdA3L9!lN+x%#1}CLEJX<$NRUS?a&pbGJO{UooCw1 z9f;8Hb1-{VvwO={{;5WZjBa8oP0Yop;1h>=(O=b!j18+mJE^Yn7uJW| z;HSe2*6A4v8hIdNPD|kumBo@)%=I zHm7H`C3!sw%?Vg;Sa<42>n#-4iSu#5ZBu8QS?d2eDHo(uwmGzT1&v?tp~~O@>!Ec>dE_gP`mEg}Zn#{1{~O-oEJ-nllR%k%<~NKZ98U z#*@vinB;*RYva3?pP)@;uXcbp5VEnY>A>P}R1l*aW~_QCv*7Yk#ILp~5q$f>ZVaYn zgmHuT{q)N`YEnvNFt|=2D10bSXkdW@1j1AaU3&^QZv{g2Lgsq zxLT3~#1Yd?*!oc_*d=0CCTr_pkgE#6Jifla@z{U#u(a`#x$maKrw9Zf267FAP4#SP z_UYL?U)m6uez8184;e+TS=si}FO)ZD;4S-0oL>m*LvQ6Jd^n6N(sld65RYv5>EK_H zQ>t>858DfZgv_F;-o;CvLo__~nG6>Oma!{OmBYtVdEh8gpB%3d5C_l7IwM4a%c>T1 zF`A2fPM_yjFN7iSJ29NWh3ydo3*=)kI~u^~w!-*Nz6R~dL>PFW?t0sqFb&FYp~-mC zbTSMKQPYkPg1m z(Hu1SGRPuU^Rrm}YpPM~YKMW6n}y${%B311DrqhU*EO|rupQdiFghfGzWsJkAy2~Oo5_;vvt9LWR=aMQQk;P!D7=YUUutqvn zw=KbU5P$CcdE^u{R6o=uU9SaU+G=?j4AdZaF+MemfwH0c=Css7t-;;kwb+CDbKT%n zNHM1C@P~|uVi@A%t+pG`V7Lf$Wc<+kYh8;JVAY#Xp1@#iTb^0w$pmdUtz%DQ5;b7* zseIJ)0*oJnrK-|tzE|euk=GSC4lJL6{NcbG4g$07_WkcnVg&X_zKqw^O^0KYM*4y_XZ*8U<$?2Yn@?olthb(Os?GZ#S@*TKWA&jG76(cb{bBRxEuv3gYb z%{D03#S2t4Kq}Y)D6>ZUkBu(FxGLVyXQQ?9C3pp;zq zI}pz-%Jn%{%D8^P6f0R2$XOdeMixNVv)A@>|J2&5aJW(7aKE`V_)6jV4MQ+n2Mqvg zJ@4t)9G2?hR-_^%wf)-@=Bv+xrxv7fSv`4lcs9lQf-?ChrX~4^)moQ`?V~Vs8KYa7 zasg2%Z`;L5Y79SW{x4GnVlLm`yjcl?`XCCAa~%ZJ07&5f4kBp0-SPQi1-w7v5d^8f zNA(&!nxIoA@sR`{7GfnuVpci`-Wq_BzS6yZ7;4ciy3yd(^wLJ_#wUcDbyxT?VMN&U z63ho*S?p)9`imkIY^|?a9*ENclRs%!081Hpy;#KeH{Th9keg+e=^!PxYHbRVV0})N z8u%gKWq&F4;k&+5QL(*rsU1k`Vg-8{@JdV7KIFw3lgL#c4oF?yZene)eAcmEeP%ImQJ literal 0 HcmV?d00001 diff --git a/test_files/landscape_300x200.png b/test_files/landscape_300x200.png new file mode 100644 index 0000000000000000000000000000000000000000..31f85e7e14ff85aa27d35989e384b913d5385022 GIT binary patch literal 2000 zcmeHI`#aMM7+30$l1M4nbKFxWVy+|Vkoze2XU9EXM~uo`hs7!(xy9V(8XjTAnCmu- zLzx^cmo3af4r3UaTXxoeanAFcAKuI7eSdl1&+~lVH^mWRDJmi@A|N0jYHekHM?m1n z6@ES_e3b8}3AX$ogtj)n;T&DON<-Rx;>rzdaHnrPsVIBkLiRfjmN}VrPFPpEHs8+r zc71?{h}ED3xcCu{_Vv#u4{9|sbKE|`0474#jZJ+)SJcw)#tOnx@sjP&KudQfLOt}& zPLLwsqCP8Fx+m59dgh_uF3!E_v%@7BSM>ypAlpYC@uJLypNaqS^R35^ zE{GrX`&WYVuda8KTrRk{!|SZJS*8A4Yn6eVkyCK7XdyzasZBafdTy25u{nhzpf;sO z7!e_ER?&mgIi1|*4k&cGK?$W4G*Z(r)3ulOYmaQV8WUMx)fEtPz)7MfO}(C4790b{ z*QrmF^^BW&Uk-CwZhG=T#gW?mJ|nYZI&R#t)uCOB4>C`};u=-*WdQrfr4jM29CPc@ zrxmRkB)NRaTj$0}?%sI9O4Td0(XtUGqzqAJT-OC^*@7P~b&r5Z^aD;cT1-uWz2`aM zxaJrDSr~1u9H3^1MT$W>*L%aIY@BRuTXVl1Hb%9D?aY_t43rqTb~J3|;2l&Qnyv?a zub#3`R5C(^f{Qi%}h}U;9u$Qn@0XNw#p)mLGxVkziz4d~RAUc)7_6W3Lk zzQQLuG0QTIi=WaD{jr$7bZ9;;N$C6*uYp0?I(=qJf-6A*Tdf|G^I|+(4mmL8Qf!EKcWOuW2S+1c|ghqoZ z15RBQpFX0v_g`H>5aMP(hHV^W8)LiWKZ=dPX0B&WzMNEoX6y`nzr=8enGeozj;~pM z*uTKlIy@-(qoThOy1{Ea_VpMO4)voin_5HACXU-_yG-Sj0@fA~bL>r@q<;V}`h)oZ literal 0 HcmV?d00001 diff --git a/test_files/large_text.txt b/test_files/large_text.txt new file mode 100644 index 0000000..64021bc --- /dev/null +++ b/test_files/large_text.txt @@ -0,0 +1,2 @@ +This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. This is a large text file for testing performance. +End of large file with final line. \ No newline at end of file diff --git a/test_files/multipage_test.pdf b/test_files/multipage_test.pdf new file mode 100644 index 0000000..bda5e74 --- /dev/null +++ b/test_files/multipage_test.pdf @@ -0,0 +1,106 @@ +%PDF-1.3 +% ReportLab Generated PDF document http://www.reportlab.com +1 0 obj +<< +/F1 2 0 R +>> +endobj +2 0 obj +<< +/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font +>> +endobj +3 0 obj +<< +/Contents 9 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 8 0 R /Resources << +/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] +>> /Rotate 0 /Trans << + +>> + /Type /Page +>> +endobj +4 0 obj +<< +/Contents 10 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 8 0 R /Resources << +/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] +>> /Rotate 0 /Trans << + +>> + /Type /Page +>> +endobj +5 0 obj +<< +/Contents 11 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 8 0 R /Resources << +/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] +>> /Rotate 0 /Trans << + +>> + /Type /Page +>> +endobj +6 0 obj +<< +/PageMode /UseNone /Pages 8 0 R /Type /Catalog +>> +endobj +7 0 obj +<< +/Author (anonymous) /CreationDate (D:20250710214218+00'00') /Creator (ReportLab PDF Library - www.reportlab.com) /Keywords () /ModDate (D:20250710214218+00'00') /Producer (ReportLab PDF Library - www.reportlab.com) + /Subject (unspecified) /Title (Multi-page Test Document) /Trapped /False +>> +endobj +8 0 obj +<< +/Count 3 /Kids [ 3 0 R 4 0 R 5 0 R ] /Type /Pages +>> +endobj +9 0 obj +<< +/Filter [ /ASCII85Decode /FlateDecode ] /Length 169 +>> +stream +GapQh0E=F,0U\H3T\pNYT^QKk?tc>IP,;W#U1^23ihPEM_?CT3!K3H3=WkWlTgfk;(arJdECd6a7R1F/.SqNX9u4:qV^,3M!5s*':G0mdbq%1@#g#sMdDDX6g_?JG,U9Wt+U+lN+X=6o+W"Z_5u+E]aJ<9.,U4#5!.\endstream +endobj +10 0 obj +<< +/Filter [ /ASCII85Decode /FlateDecode ] /Length 155 +>> +stream +GapQh0E=F,0U\H3T\pNYT^QKk?tc>IP,;W#U1^23ihPEM_?CW4KISi90MntRifICKNI\t>FH]r4E^IYV%'!"K4m8,IJq"%nWf'(B@ns.4,~>endstream +endobj +11 0 obj +<< +/Filter [ /ASCII85Decode /FlateDecode ] /Length 139 +>> +stream +GapQh0E=F,0U\H3T\pNYT^QKk?tc>IP,;W#U1^23ihPEM_?CW4KISi90MntRifICKNI\t>m7^]]Ykcn&6DdR,YK:K$]??Q;i%<4N6J"1KuCVendstream +endobj +xref +0 12 +0000000000 65535 f +0000000073 00000 n +0000000104 00000 n +0000000211 00000 n +0000000414 00000 n +0000000618 00000 n +0000000822 00000 n +0000000890 00000 n +0000001202 00000 n +0000001273 00000 n +0000001532 00000 n +0000001778 00000 n +trailer +<< +/ID +[<3b1faa812dbde8df21a25a9c074387ca><3b1faa812dbde8df21a25a9c074387ca>] +% ReportLab generated PDF document -- digest (http://www.reportlab.com) + +/Info 7 0 R +/Root 6 0 R +/Size 12 +>> +startxref +2008 +%%EOF diff --git a/test_files/portrait_100x200.png b/test_files/portrait_100x200.png new file mode 100644 index 0000000000000000000000000000000000000000..7e3a61c36b1bbac967773039dd0e795f1d7bfc89 GIT binary patch literal 1423 zcmeAS@N?(olHy`uVBq!ia0vp^DL{OJg9%7ZjJrIIfq^y5)5S5QV$R#!^*&chWsZOR zemBtA<4Ji`eMpL`M-b29hNGvl4*D+eoGsAKF*{gd@eGfqxXJeo7YbVD^XN7r%(sjY6mm6=h=27~-VX?u^~Ro=bl*lGO(8|r+IaBt*iZ|~`!nSW!=QOW2Y_bA!J zrRQB{_v~IPcKXl}zlRbRz+4tbjT;BF?-q_V-Jj*i{*B3pve$D>n zZ`=;2xr%(Ro}NB6OkDMN`t<$EN)tDJJbN`&Y0=eJPRjG=ehuwgs92x>|Kq#j<@TFn z<>MPZ?_WIUc+u|H?;b5HJHBwynYabT#g{YE;}`ZlY`pVUI_}c(rD@-`n*6&yIX36R zT4xWJCl2xb9XwyYuBo*uy0-e%mn}xNw#A=5^FLplWbS)YefgWoe&V|=EW&Q(J-(Uz zwmLZ8vU*Kzc4OhS)s7DZMI&DttLrac^4_yAf92XwA7Ttsx1ZmC@U^(_1M?di#~VGqVljKY#gla^;%R_;;1BpIr0x+q!+;tBkoD z;`~qF7nc$(uAZ%9^*lW6=(#QKPS-L`X3cKTT~^+F*nCRu6+HWr!x4+&$;e&A0+;t0=WR!ZW=+OE6 yLBqsU?_xB&T`9u{J~_H(Q~DPS)UxqE!_~CUE7$*9xENT_GI+ZBxvX + + + + + Sample HTML Document + + +

Test HTML File

+

This is a sample HTML document for testing format detection.

+

It contains multiple paragraphs and should be detected as HTML.

+ + \ No newline at end of file diff --git a/test_files/sample.json b/test_files/sample.json new file mode 100644 index 0000000..8011f29 --- /dev/null +++ b/test_files/sample.json @@ -0,0 +1,12 @@ +{ + "name": "Test Document", + "type": "sample", + "metadata": { + "created": "2024-01-01", + "author": "Test User" + }, + "content": [ + "This is a JSON file", + "Used for testing text format detection" + ] +} \ No newline at end of file diff --git a/test_files/sample.txt b/test_files/sample.txt new file mode 100644 index 0000000..37130f7 --- /dev/null +++ b/test_files/sample.txt @@ -0,0 +1,8 @@ +This is a sample text file for testing metadata extraction. +It contains multiple lines and various words. +The quick brown fox jumps over the lazy dog. +This file is used to test character count, word count, and line count extraction. + +Some special characters: áéíóú, çñ, and emojis 🎉✨ + +This should help test Unicode detection as well. \ No newline at end of file diff --git a/test_files/sample.xml b/test_files/sample.xml new file mode 100644 index 0000000..6d5773c --- /dev/null +++ b/test_files/sample.xml @@ -0,0 +1,12 @@ + + + Sample XML Document + + This is a sample XML file for testing. + It should be detected as XML format. + + + Test User + 2024-01-01 + + \ No newline at end of file diff --git a/test_files/single_page_v14.pdf b/test_files/single_page_v14.pdf new file mode 100644 index 0000000..8db1349 --- /dev/null +++ b/test_files/single_page_v14.pdf @@ -0,0 +1,68 @@ +%PDF-1.3 +% ReportLab Generated PDF document http://www.reportlab.com +1 0 obj +<< +/F1 2 0 R +>> +endobj +2 0 obj +<< +/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font +>> +endobj +3 0 obj +<< +/Contents 7 0 R /MediaBox [ 0 0 612 792 ] /Parent 6 0 R /Resources << +/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] +>> /Rotate 0 /Trans << + +>> + /Type /Page +>> +endobj +4 0 obj +<< +/PageMode /UseNone /Pages 6 0 R /Type /Catalog +>> +endobj +5 0 obj +<< +/Author (Test Author) /CreationDate (D:20250710214218+00'00') /Creator (Python reportlab) /Keywords () /ModDate (D:20250710214218+00'00') /Producer (ReportLab PDF Library - www.reportlab.com) + /Subject (Test Subject) /Title (Single Page Test Document) /Trapped /False +>> +endobj +6 0 obj +<< +/Count 1 /Kids [ 3 0 R ] /Type /Pages +>> +endobj +7 0 obj +<< +/Filter [ /ASCII85Decode /FlateDecode ] /Length 206 +>> +stream +Gas2B]aDVA&;9pC`KY*LD^B-O0d'R(h@#jac&^q+OP>SPmh&K#W6@$9_Y5bf'gkod%QJ.'$Nb1<.o;ODct=>@o&ifTJs=-$X.UIKf=%kE`l6H4ncH?T8&r:oU$p@/Dkq]C\nM9+)$gJ8i5uRL/B/S_i27!\Ph,TnDqT$"E>!rEO.F7L@9L)S[P2.<5'R?1"%om1'9X0s@Rg.~>endstream +endobj +xref +0 8 +0000000000 65535 f +0000000073 00000 n +0000000104 00000 n +0000000211 00000 n +0000000404 00000 n +0000000472 00000 n +0000000763 00000 n +0000000822 00000 n +trailer +<< +/ID +[] +% ReportLab generated PDF document -- digest (http://www.reportlab.com) + +/Info 5 0 R +/Root 4 0 R +/Size 8 +>> +startxref +1118 +%%EOF diff --git a/test_files/small_50x50.png b/test_files/small_50x50.png new file mode 100644 index 0000000000000000000000000000000000000000..68d8b589586b5ee35c70e2c4fe130009dc0bc086 GIT binary patch literal 135 zcmeAS@N?(olHy`uVBq!ia0vp^Mj*_=1SBWM%0B~Aex5FlAr*7po-<@Tpdi4oG3fuh zdjf0`c}=y9 BFhBqR literal 0 HcmV?d00001 diff --git a/test_files/square_150x150.png b/test_files/square_150x150.png new file mode 100644 index 0000000000000000000000000000000000000000..b5286233da3e4b1d059b2b8374885c5778837b46 GIT binary patch literal 1525 zcmc(f=~EI20L7Uab8}ZbGmpfiY%(*~G_CB|60g>@#MXl*bcsa_G>_EHgAPsF^0rb# zNz^07ue{qF&j@ihX%4(f)DcCIE)0V53( zI?d^F)spmvh_&q!()Pl>nXRM>Q`4JZwdAF-}1Q^%rEMI#kvR(H?0W3&sS2l!8cXSpX&zOJpk zdG+dPKfe#HeDXM!!wI;YsX(J0+M@5+f@-o9lUQWB&*8Xo`wmp-xyZ<{RGOnJmfTI5 z!iY*s&C9B66REs2;e~iUoGzT=0CaiqSub;q~BA!G(^uvpR(foT*z`Sm+-gcMu9u1%-w2^v>E^xZjI` zpZR?L$Fp;C#?!7YbWBVWNx~c-H*9F&ZZO1|l`vAK<;pIpOy-6Aws7T&!4RhibV>y; z3@f{T{~2yo)hHNk5>8D)&Znkodh!q;&^!WxFgEs{P!h{on_I$539P;)e@$PR$l!a? zpPzh(YRDEPEiJcNbEC;9D3ppP?2m;uhl7)){)pt{!OxUlvGtN*aFdIO+|s!dOs8w} z@*0fh(gx-rsLUqbY;S%S!DFUDqaiyvaW!8|)Adn9NDjxRtjv39 zk@X}MO;K~^Fw}EIQ%l;bfq^lxbrRXKghaBBidr%RX4Q?v0qrLry-R2`{-Deo_E1>` z8cD`%GFF9W0-?DP|6JEHWP;C6Y{&58*H<^*+p|Ku7#j(JoPa?3bqe@Ryzq#r1JTB| z%G!a5?l1`!7FYU;bKBcL`5sRlIy&K3Usvbjmgt+Eja<{QZZEU9Z07buSy8^datfs( zFAoXzJ}*19U)&5uG8k5TbY|u~B60YI!OZY#%wEnP_&!S*G{p~MZhqwP+gZqVmrrUb zx2t-pxH4FrtCq2ksAz2VqldB+adD)PiX^3Iy_tM&E~!(I@Xkv@Sn*5Oe>WLuoZTyRY>5?H|Ur-`M$W q8$j$4mXURE{r^Lnf0J{pF2I&Z1M(sNdC?lq0YQEtkk;cd*Zu-0pZeGU literal 0 HcmV?d00001 diff --git a/test_files/test_format.html b/test_files/test_format.html new file mode 100644 index 0000000..847d551 --- /dev/null +++ b/test_files/test_format.html @@ -0,0 +1,16 @@ + + + + + HTML Test Document + + +

HTML Test Page

+

This document should be detected as HTML format.

+

It contains HTML markup and structure.

+
    +
  • List item one
  • +
  • List item two
  • +
+ + \ No newline at end of file diff --git a/test_files/test_format.json b/test_files/test_format.json new file mode 100644 index 0000000..f3ca405 --- /dev/null +++ b/test_files/test_format.json @@ -0,0 +1,15 @@ +{ + "document": { + "title": "Test JSON Document", + "type": "metadata_test", + "properties": { + "word_count": 25, + "format": "json", + "encoding": "utf-8" + }, + "content": [ + "This JSON should be detected as JSON format", + "It contains structured data in JSON format" + ] + } +} \ No newline at end of file diff --git a/test_files/test_format.xml b/test_files/test_format.xml new file mode 100644 index 0000000..f34e922 --- /dev/null +++ b/test_files/test_format.xml @@ -0,0 +1,12 @@ + + + + XML Test Document + xml + 15 + + +
This XML document should be detected as XML format.
+
It contains structured markup for testing.
+
+
\ No newline at end of file diff --git a/test_files/test_image.jpg b/test_files/test_image.jpg new file mode 100644 index 0000000000000000000000000000000000000000..852dd180287db23c00a29d18d90ef3a6b1721990 GIT binary patch literal 1748 zcmex=^(PF6}rMnOeST|r4lSw=>~TvNxu(8R<HEAm;@P_1sVSzVUP#9la&z+7@&ZWiJ66!jh%y&iyNq5 zs{jKNBQrA-3o|P#3ky(nEl{3;MUYiU(a@1iI53f2sZhkIapFP_Wv7h?MT0JWP%%y_ zYU1P)6PJ*bQdLve(9|+9H8Z!cv~qTFb#wRd^a>6M4GWKmj7m;PO-s+n%qlJ^Ei136 ztZHs)ZENr7?3y%r%G7DoXUv?nXz`Mz%a*TLxoXqqEnBy3-?4Mop~FXx9y@;G&P778mFHFAhJOT9Fqw0Hlzap~QibJuoNZcW)@Jh$c0 zik&iv<+@!&USU;t&7gnbKZsqDzdjGPjWN+ zzDoSt#i%$NgF8#lIIwvJBp4_NPQ3BC$y&fQ=j5Y^WsyqrZ2cZdni(3ul78y$RAyv9 z{nO*(o`k>euFhSTwLI!-+>awkx-uT2OZ4hIt{O#r^1MB-w)*RGx$IXK#h5VrYyF)cPSVvIS#E9J z5~TA?axuT;jf0UJ_U}>o)LK_NXX^Lkro30KeT;Dxe(T+FB&dgZr?d9s+s|BWszSeg zlv{OHTDn!0Q~kTs)o$75!nuF1C2O2}dvw~?wPKgOFTQ;enHTkT zy;|#$4VCpuOMHwtRRo_^H%~stpw6^D?zQ->7CYZ7t3Gzx91Rp&cX?y*(*r-b%7 literal 0 HcmV?d00001