feat(ocr): gracefully handle problematic PDFs in all the ways, create tests so that it doesn't happen again
This commit is contained in:
parent
ad27d99949
commit
6165148e4d
|
|
@ -839,6 +839,7 @@ impl EnhancedOcrService {
|
||||||
let text = match extraction_result {
|
let text = match extraction_result {
|
||||||
Ok(Ok(Ok(Ok(text)))) => text,
|
Ok(Ok(Ok(Ok(text)))) => text,
|
||||||
Ok(Ok(Ok(Err(e)))) => {
|
Ok(Ok(Ok(Err(e)))) => {
|
||||||
|
warn!("PDF text extraction failed for file '{}' (size: {} bytes): {}", file_path, file_size, e);
|
||||||
return Err(anyhow!(
|
return Err(anyhow!(
|
||||||
"PDF text extraction failed for file '{}' (size: {} bytes): {}. This may indicate a corrupted or unsupported PDF format.",
|
"PDF text extraction failed for file '{}' (size: {} bytes): {}. This may indicate a corrupted or unsupported PDF format.",
|
||||||
file_path, file_size, e
|
file_path, file_size, e
|
||||||
|
|
@ -847,8 +848,7 @@ impl EnhancedOcrService {
|
||||||
Ok(Ok(Err(_panic))) => {
|
Ok(Ok(Err(_panic))) => {
|
||||||
// pdf-extract panicked (e.g., missing unicode map, corrupted font encoding)
|
// pdf-extract panicked (e.g., missing unicode map, corrupted font encoding)
|
||||||
// For now, gracefully handle this common issue
|
// For now, gracefully handle this common issue
|
||||||
use tracing::debug;
|
warn!("PDF text extraction panicked for '{}' (size: {} bytes) due to font encoding issues. This is a known limitation with certain PDF files.", file_path, file_size);
|
||||||
debug!("PDF text extraction failed for '{}' due to font encoding issues. This is a known limitation with certain PDF files.", file_path);
|
|
||||||
|
|
||||||
return Err(anyhow!(
|
return Err(anyhow!(
|
||||||
"PDF text extraction failed due to font encoding issues in '{}' (size: {} bytes). This PDF uses non-standard fonts or character encoding that cannot be processed. To extract text from this PDF, consider: 1) Converting it to images and uploading those instead, 2) Using a different PDF viewer to re-save the PDF with standard encoding, or 3) Using external tools to convert the PDF to a more compatible format.",
|
"PDF text extraction failed due to font encoding issues in '{}' (size: {} bytes). This PDF uses non-standard fonts or character encoding that cannot be processed. To extract text from this PDF, consider: 1) Converting it to images and uploading those instead, 2) Using a different PDF viewer to re-save the PDF with standard encoding, or 3) Using external tools to convert the PDF to a more compatible format.",
|
||||||
|
|
@ -856,9 +856,11 @@ impl EnhancedOcrService {
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
Ok(Err(e)) => {
|
Ok(Err(e)) => {
|
||||||
|
warn!("PDF extraction task failed for '{}' (size: {} bytes): {}", file_path, file_size, e);
|
||||||
return Err(anyhow!("PDF extraction task failed: {}", e));
|
return Err(anyhow!("PDF extraction task failed: {}", e));
|
||||||
}
|
}
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
|
warn!("PDF extraction timed out after 2 minutes for file '{}' (size: {} bytes). The PDF may be corrupted or too complex.", file_path, file_size);
|
||||||
return Err(anyhow!(
|
return Err(anyhow!(
|
||||||
"PDF extraction timed out after 2 minutes for file '{}' (size: {} bytes). The PDF may be corrupted or too complex.",
|
"PDF extraction timed out after 2 minutes for file '{}' (size: {} bytes). The PDF may be corrupted or too complex.",
|
||||||
file_path, file_size
|
file_path, file_size
|
||||||
|
|
@ -1019,7 +1021,26 @@ impl EnhancedOcrService {
|
||||||
let temp_ocr_path = temp_ocr_path.clone();
|
let temp_ocr_path = temp_ocr_path.clone();
|
||||||
move || -> Result<String> {
|
move || -> Result<String> {
|
||||||
let bytes = std::fs::read(&temp_ocr_path)?;
|
let bytes = std::fs::read(&temp_ocr_path)?;
|
||||||
let text = pdf_extract::extract_text_from_mem(&bytes)?;
|
// Catch panics from pdf-extract library (same pattern as used elsewhere)
|
||||||
|
let text = match catch_unwind(AssertUnwindSafe(|| {
|
||||||
|
pdf_extract::extract_text_from_mem(&bytes)
|
||||||
|
})) {
|
||||||
|
Ok(Ok(text)) => text,
|
||||||
|
Ok(Err(e)) => {
|
||||||
|
warn!("PDF text extraction failed after OCR processing for '{}': {}", temp_ocr_path, e);
|
||||||
|
return Err(anyhow!(
|
||||||
|
"PDF text extraction failed after OCR processing: {}. This may indicate a corrupted or unsupported PDF format.",
|
||||||
|
e
|
||||||
|
));
|
||||||
|
},
|
||||||
|
Err(_) => {
|
||||||
|
warn!("PDF extraction panicked after OCR processing for '{}' due to invalid content stream", temp_ocr_path);
|
||||||
|
return Err(anyhow!(
|
||||||
|
"PDF extraction panicked after OCR processing due to invalid content stream or corrupted PDF structure. \
|
||||||
|
This suggests the PDF has malformed internal structure that cannot be parsed safely."
|
||||||
|
));
|
||||||
|
},
|
||||||
|
};
|
||||||
Ok(text.trim().to_string())
|
Ok(text.trim().to_string())
|
||||||
}
|
}
|
||||||
}).await??;
|
}).await??;
|
||||||
|
|
|
||||||
|
|
@ -393,4 +393,295 @@ startxref
|
||||||
assert!(all_updates.contains_key(&doc_id1));
|
assert!(all_updates.contains_key(&doc_id1));
|
||||||
assert!(all_updates.contains_key(&doc_id2));
|
assert!(all_updates.contains_key(&doc_id2));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Test that malformed PDFs don't crash the OCR system
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_malformed_pdf_panic_handling() {
|
||||||
|
let ocr_service = OcrService::new();
|
||||||
|
|
||||||
|
// Create a malformed PDF in memory that will cause pdf-extract to panic
|
||||||
|
let malformed_pdf_content = b"%PDF-1.4
|
||||||
|
1 0 obj
|
||||||
|
<< /Type /Catalog /Pages 2 0 R >>
|
||||||
|
endobj
|
||||||
|
2 0 obj
|
||||||
|
<< /Type /Pages /Kids [3 0 R] /Count 1 >>
|
||||||
|
endobj
|
||||||
|
3 0 obj
|
||||||
|
<< /Type /Page /Parent 2 0 R /Resources << /Font << /F1 4 0 R >> >> /MediaBox [0 0 612 792] /Contents 5 0 R >>
|
||||||
|
endobj
|
||||||
|
4 0 obj
|
||||||
|
<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
|
||||||
|
endobj
|
||||||
|
5 0 obj
|
||||||
|
<< /Length 999 >>
|
||||||
|
stream
|
||||||
|
BT
|
||||||
|
/F1 12 Tf
|
||||||
|
100 700 Td
|
||||||
|
(This is a malformed PDF with invalid content stream) Tj
|
||||||
|
ET
|
||||||
|
INVALID_CONTENT_STREAM_DATA_THAT_CAUSES_PANIC
|
||||||
|
endstream
|
||||||
|
endobj
|
||||||
|
xref
|
||||||
|
0 6
|
||||||
|
0000000000 65535 f
|
||||||
|
0000000009 00000 n
|
||||||
|
0000000058 00000 n
|
||||||
|
0000000115 00000 n
|
||||||
|
0000000262 00000 n
|
||||||
|
0000000341 00000 n
|
||||||
|
trailer
|
||||||
|
<< /Size 6 /Root 1 0 R >>
|
||||||
|
startxref
|
||||||
|
999
|
||||||
|
%%EOF";
|
||||||
|
|
||||||
|
// Write to temporary file
|
||||||
|
let temp_file = NamedTempFile::with_suffix(".pdf").unwrap();
|
||||||
|
std::fs::write(temp_file.path(), malformed_pdf_content).unwrap();
|
||||||
|
|
||||||
|
let result = ocr_service.extract_text_from_pdf(temp_file.path().to_str().unwrap()).await;
|
||||||
|
// Should not panic, should return an error instead
|
||||||
|
assert!(result.is_err(), "Expected error for malformed PDF");
|
||||||
|
let error_msg = result.unwrap_err().to_string();
|
||||||
|
println!("Error message: {}", error_msg);
|
||||||
|
// Should contain descriptive error message
|
||||||
|
assert!(
|
||||||
|
error_msg.contains("panic") ||
|
||||||
|
error_msg.contains("invalid content stream") ||
|
||||||
|
error_msg.contains("corrupted") ||
|
||||||
|
error_msg.contains("extract") ||
|
||||||
|
error_msg.contains("Failed to extract")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_corrupted_pdf_structure_handling() {
|
||||||
|
let ocr_service = OcrService::new();
|
||||||
|
|
||||||
|
// Create a corrupted PDF structure that will cause pdf-extract to fail
|
||||||
|
let corrupted_pdf_content = b"%PDF-1.4
|
||||||
|
1 0 obj
|
||||||
|
<< /Type /Catalog /Pages 2 0 R >>
|
||||||
|
endobj
|
||||||
|
2 0 obj
|
||||||
|
<< /Type /Pages /Kids [3 0 R] /Count 1 >>
|
||||||
|
endobj
|
||||||
|
3 0 obj
|
||||||
|
<< /Type /Page /Parent 2 0 R /Resources << /Font << /F1 4 0 R >> >> /MediaBox [0 0 612 792] /Contents 5 0 R >>
|
||||||
|
endobj
|
||||||
|
4 0 obj
|
||||||
|
<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
|
||||||
|
endobj
|
||||||
|
5 0 obj
|
||||||
|
<< /Length 44 >>
|
||||||
|
stream
|
||||||
|
BT
|
||||||
|
/F1 12 Tf
|
||||||
|
100 700 Td
|
||||||
|
(Corrupted PDF) Tj
|
||||||
|
ET
|
||||||
|
endstream
|
||||||
|
endobj
|
||||||
|
xref
|
||||||
|
0 6
|
||||||
|
0000000000 65535 f
|
||||||
|
0000000009 00000 n
|
||||||
|
0000000058 00000 n
|
||||||
|
0000000115 00000 n
|
||||||
|
0000000262 00000 n
|
||||||
|
0000000341 00000 n
|
||||||
|
trailer
|
||||||
|
<< /Size 6 /Root 1 0 R /InvalidKey >>
|
||||||
|
startxref
|
||||||
|
999999
|
||||||
|
%%EOF";
|
||||||
|
|
||||||
|
let temp_file = NamedTempFile::with_suffix(".pdf").unwrap();
|
||||||
|
std::fs::write(temp_file.path(), corrupted_pdf_content).unwrap();
|
||||||
|
|
||||||
|
let result = ocr_service.extract_text_from_pdf(temp_file.path().to_str().unwrap()).await;
|
||||||
|
// Should not panic, should return an error instead
|
||||||
|
assert!(result.is_err(), "Expected error for corrupted PDF");
|
||||||
|
let error_msg = result.unwrap_err().to_string();
|
||||||
|
println!("Corrupted PDF error: {}", error_msg);
|
||||||
|
// Should contain descriptive error message
|
||||||
|
assert!(
|
||||||
|
error_msg.contains("panic") ||
|
||||||
|
error_msg.contains("corrupted") ||
|
||||||
|
error_msg.contains("extract") ||
|
||||||
|
error_msg.contains("PDF") ||
|
||||||
|
error_msg.contains("Failed to extract")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_invalid_font_encoding_handling() {
|
||||||
|
let ocr_service = OcrService::new();
|
||||||
|
|
||||||
|
// Test with invalid font encoding
|
||||||
|
let invalid_font = "tests/test_pdfs/invalid_font_encoding.pdf";
|
||||||
|
if Path::new(invalid_font).exists() {
|
||||||
|
let result = ocr_service.extract_text_from_pdf(invalid_font).await;
|
||||||
|
// Should not panic, should return an error instead
|
||||||
|
assert!(result.is_err());
|
||||||
|
let error_msg = result.unwrap_err().to_string();
|
||||||
|
// Should contain descriptive error message
|
||||||
|
assert!(
|
||||||
|
error_msg.contains("panic") ||
|
||||||
|
error_msg.contains("font") ||
|
||||||
|
error_msg.contains("encoding") ||
|
||||||
|
error_msg.contains("extract")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_fake_pdf_handling() {
|
||||||
|
let ocr_service = OcrService::new();
|
||||||
|
|
||||||
|
// Create a fake PDF file (not actually a PDF) that will definitely cause an error
|
||||||
|
let fake_pdf_content = b"This is not a PDF file at all, just plain text with a PDF extension.
|
||||||
|
It should cause pdf-extract to fail when trying to parse it.
|
||||||
|
This tests the error handling for files that aren't actually PDFs.";
|
||||||
|
|
||||||
|
let temp_file = NamedTempFile::with_suffix(".pdf").unwrap();
|
||||||
|
std::fs::write(temp_file.path(), fake_pdf_content).unwrap();
|
||||||
|
|
||||||
|
let result = ocr_service.extract_text_from_pdf(temp_file.path().to_str().unwrap()).await;
|
||||||
|
// Should not panic, should return an error instead
|
||||||
|
assert!(result.is_err(), "Expected error for fake PDF");
|
||||||
|
let error_msg = result.unwrap_err().to_string();
|
||||||
|
println!("Fake PDF error: {}", error_msg);
|
||||||
|
// Should contain descriptive error message about parsing failure
|
||||||
|
assert!(
|
||||||
|
error_msg.contains("extract") ||
|
||||||
|
error_msg.contains("parse") ||
|
||||||
|
error_msg.contains("PDF") ||
|
||||||
|
error_msg.contains("format") ||
|
||||||
|
error_msg.contains("Failed to extract")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_problematic_encoding_pdf_handling() {
|
||||||
|
let ocr_service = OcrService::new();
|
||||||
|
|
||||||
|
// Test with the existing problematic encoding PDF
|
||||||
|
let problematic_encoding = "tests/test_pdfs/problematic_encoding.pdf";
|
||||||
|
if Path::new(problematic_encoding).exists() {
|
||||||
|
let result = ocr_service.extract_text_from_pdf(problematic_encoding).await;
|
||||||
|
// Should not panic, should return an error instead
|
||||||
|
assert!(result.is_err());
|
||||||
|
let error_msg = result.unwrap_err().to_string();
|
||||||
|
// Should contain descriptive error message
|
||||||
|
assert!(
|
||||||
|
error_msg.contains("panic") ||
|
||||||
|
error_msg.contains("encoding") ||
|
||||||
|
error_msg.contains("extract") ||
|
||||||
|
error_msg.contains("font")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test that the enhanced OCR service also handles panics correctly
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_enhanced_ocr_panic_handling() {
|
||||||
|
use crate::ocr::enhanced::EnhancedOcrService;
|
||||||
|
use crate::services::file_service::FileService;
|
||||||
|
use crate::models::Settings;
|
||||||
|
|
||||||
|
let ocr_service = EnhancedOcrService::new("tests".to_string());
|
||||||
|
let settings = Settings::default();
|
||||||
|
|
||||||
|
// Test all malformed PDFs with enhanced OCR
|
||||||
|
let test_files = vec![
|
||||||
|
"tests/test_pdfs/malformed_content_stream.pdf",
|
||||||
|
"tests/test_pdfs/corrupted_structure.pdf",
|
||||||
|
"tests/test_pdfs/invalid_font_encoding.pdf",
|
||||||
|
"tests/test_pdfs/fake_pdf.pdf",
|
||||||
|
"tests/test_pdfs/problematic_encoding.pdf",
|
||||||
|
];
|
||||||
|
|
||||||
|
for test_file in test_files {
|
||||||
|
if Path::new(test_file).exists() {
|
||||||
|
let result = ocr_service.extract_text_with_context(
|
||||||
|
test_file,
|
||||||
|
"application/pdf",
|
||||||
|
&Path::new(test_file).file_name().unwrap().to_str().unwrap(),
|
||||||
|
1024, // file_size
|
||||||
|
&settings
|
||||||
|
).await;
|
||||||
|
|
||||||
|
// Should not panic, should return an error instead
|
||||||
|
assert!(result.is_err(), "Expected error for file: {}", test_file);
|
||||||
|
let error_msg = result.unwrap_err().to_string();
|
||||||
|
|
||||||
|
// Should contain descriptive error message
|
||||||
|
assert!(
|
||||||
|
error_msg.contains("panic") ||
|
||||||
|
error_msg.contains("extract") ||
|
||||||
|
error_msg.contains("PDF") ||
|
||||||
|
error_msg.contains("corrupted") ||
|
||||||
|
error_msg.contains("encoding") ||
|
||||||
|
error_msg.contains("font"),
|
||||||
|
"Error message should be descriptive for {}: {}", test_file, error_msg
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test that panic handling works correctly in concurrent scenarios
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_concurrent_pdf_panic_handling() {
|
||||||
|
use std::sync::Arc;
|
||||||
|
use futures::future::join_all;
|
||||||
|
|
||||||
|
let ocr_service = Arc::new(OcrService::new());
|
||||||
|
let mut handles = Vec::new();
|
||||||
|
|
||||||
|
// Test concurrent processing of malformed PDFs
|
||||||
|
let test_files = vec![
|
||||||
|
"tests/test_pdfs/malformed_content_stream.pdf",
|
||||||
|
"tests/test_pdfs/corrupted_structure.pdf",
|
||||||
|
"tests/test_pdfs/invalid_font_encoding.pdf",
|
||||||
|
"tests/test_pdfs/fake_pdf.pdf",
|
||||||
|
];
|
||||||
|
|
||||||
|
for test_file in test_files {
|
||||||
|
if Path::new(test_file).exists() {
|
||||||
|
let ocr_service_clone = Arc::clone(&ocr_service);
|
||||||
|
let test_file_owned = test_file.to_string();
|
||||||
|
|
||||||
|
let handle = tokio::spawn(async move {
|
||||||
|
let result = ocr_service_clone.extract_text_from_pdf(&test_file_owned).await;
|
||||||
|
// Should not panic, should return an error instead
|
||||||
|
assert!(result.is_err(), "Expected error for file: {}", test_file_owned);
|
||||||
|
let error_msg = result.unwrap_err().to_string();
|
||||||
|
|
||||||
|
// Should contain descriptive error message
|
||||||
|
assert!(
|
||||||
|
error_msg.contains("panic") ||
|
||||||
|
error_msg.contains("extract") ||
|
||||||
|
error_msg.contains("PDF") ||
|
||||||
|
error_msg.contains("corrupted") ||
|
||||||
|
error_msg.contains("encoding"),
|
||||||
|
"Error message should be descriptive for {}: {}", test_file_owned, error_msg
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
handles.push(handle);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for all concurrent tasks to complete
|
||||||
|
let results = join_all(handles).await;
|
||||||
|
|
||||||
|
// Verify all tasks completed without panicking
|
||||||
|
for result in results {
|
||||||
|
assert!(result.is_ok(), "Task should complete without panicking");
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -0,0 +1,36 @@
|
||||||
|
%PDF-1.4
|
||||||
|
1 0 obj
|
||||||
|
<< /Type /Catalog /Pages 2 0 R >>
|
||||||
|
endobj
|
||||||
|
2 0 obj
|
||||||
|
<< /Type /Pages /Kids [3 0 R] /Count 1 >>
|
||||||
|
endobj
|
||||||
|
3 0 obj
|
||||||
|
<< /Type /Page /Parent 2 0 R /Resources << /Font << /F1 4 0 R >> >> /MediaBox [0 0 612 792] /Contents 5 0 R >>
|
||||||
|
endobj
|
||||||
|
4 0 obj
|
||||||
|
<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
|
||||||
|
endobj
|
||||||
|
5 0 obj
|
||||||
|
<< /Length 44 >>
|
||||||
|
stream
|
||||||
|
BT
|
||||||
|
/F1 12 Tf
|
||||||
|
100 700 Td
|
||||||
|
(Corrupted PDF) Tj
|
||||||
|
ET
|
||||||
|
endstream
|
||||||
|
endobj
|
||||||
|
xref
|
||||||
|
0 6
|
||||||
|
0000000000 65535 f
|
||||||
|
0000000009 00000 n
|
||||||
|
0000000058 00000 n
|
||||||
|
0000000115 00000 n
|
||||||
|
0000000262 00000 n
|
||||||
|
0000000341 00000 n
|
||||||
|
trailer
|
||||||
|
<< /Size 6 /Root 1 0 R /InvalidKey >>
|
||||||
|
startxref
|
||||||
|
999999
|
||||||
|
%%EOF
|
||||||
|
|
@ -0,0 +1,3 @@
|
||||||
|
This is not a PDF file at all, just plain text with a PDF extension.
|
||||||
|
It should cause pdf-extract to fail when trying to parse it.
|
||||||
|
This tests the error handling for files that aren't actually PDFs.
|
||||||
|
|
@ -0,0 +1,36 @@
|
||||||
|
%PDF-1.4
|
||||||
|
1 0 obj
|
||||||
|
<< /Type /Catalog /Pages 2 0 R >>
|
||||||
|
endobj
|
||||||
|
2 0 obj
|
||||||
|
<< /Type /Pages /Kids [3 0 R] /Count 1 >>
|
||||||
|
endobj
|
||||||
|
3 0 obj
|
||||||
|
<< /Type /Page /Parent 2 0 R /Resources << /Font << /F1 4 0 R >> >> /MediaBox [0 0 612 792] /Contents 5 0 R >>
|
||||||
|
endobj
|
||||||
|
4 0 obj
|
||||||
|
<< /Type /Font /Subtype /Type1 /BaseFont /InvalidFont /Encoding /InvalidEncoding >>
|
||||||
|
endobj
|
||||||
|
5 0 obj
|
||||||
|
<< /Length 44 >>
|
||||||
|
stream
|
||||||
|
BT
|
||||||
|
/F1 12 Tf
|
||||||
|
100 700 Td
|
||||||
|
(Invalid font encoding) Tj
|
||||||
|
ET
|
||||||
|
endstream
|
||||||
|
endobj
|
||||||
|
xref
|
||||||
|
0 6
|
||||||
|
0000000000 65535 f
|
||||||
|
0000000009 00000 n
|
||||||
|
0000000058 00000 n
|
||||||
|
0000000115 00000 n
|
||||||
|
0000000262 00000 n
|
||||||
|
0000000341 00000 n
|
||||||
|
trailer
|
||||||
|
<< /Size 6 /Root 1 0 R >>
|
||||||
|
startxref
|
||||||
|
435
|
||||||
|
%%EOF
|
||||||
|
|
@ -0,0 +1,37 @@
|
||||||
|
%PDF-1.4
|
||||||
|
1 0 obj
|
||||||
|
<< /Type /Catalog /Pages 2 0 R >>
|
||||||
|
endobj
|
||||||
|
2 0 obj
|
||||||
|
<< /Type /Pages /Kids [3 0 R] /Count 1 >>
|
||||||
|
endobj
|
||||||
|
3 0 obj
|
||||||
|
<< /Type /Page /Parent 2 0 R /Resources << /Font << /F1 4 0 R >> >> /MediaBox [0 0 612 792] /Contents 5 0 R >>
|
||||||
|
endobj
|
||||||
|
4 0 obj
|
||||||
|
<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
|
||||||
|
endobj
|
||||||
|
5 0 obj
|
||||||
|
<< /Length 999 >>
|
||||||
|
stream
|
||||||
|
BT
|
||||||
|
/F1 12 Tf
|
||||||
|
100 700 Td
|
||||||
|
(This is a malformed PDF with invalid content stream) Tj
|
||||||
|
ET
|
||||||
|
INVALID_CONTENT_STREAM_DATA_THAT_CAUSES_PANIC
|
||||||
|
endstream
|
||||||
|
endobj
|
||||||
|
xref
|
||||||
|
0 6
|
||||||
|
0000000000 65535 f
|
||||||
|
0000000009 00000 n
|
||||||
|
0000000058 00000 n
|
||||||
|
0000000115 00000 n
|
||||||
|
0000000262 00000 n
|
||||||
|
0000000341 00000 n
|
||||||
|
trailer
|
||||||
|
<< /Size 6 /Root 1 0 R >>
|
||||||
|
startxref
|
||||||
|
999
|
||||||
|
%%EOF
|
||||||
Loading…
Reference in New Issue