fix(server): catch pdf-extract spammy logs
This commit is contained in:
parent
182ba7cf18
commit
40afb5ade5
|
|
@ -1,6 +1,7 @@
|
|||
use anyhow::Result;
|
||||
use chrono::Utc;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::panic::{catch_unwind, AssertUnwindSafe};
|
||||
use tokio::fs;
|
||||
use uuid::Uuid;
|
||||
use tracing::{info, warn, error};
|
||||
|
|
@ -321,14 +322,16 @@ impl FileService {
|
|||
async fn generate_pdf_thumbnail(&self, file_data: &[u8]) -> Result<Vec<u8>> {
|
||||
use image::Rgb;
|
||||
|
||||
// Try to extract first page as image using pdf-extract
|
||||
match pdf_extract::extract_text_from_mem(file_data) {
|
||||
Ok(text) => {
|
||||
// Try to extract first page as image using pdf-extract with panic protection
|
||||
match catch_unwind(AssertUnwindSafe(|| {
|
||||
pdf_extract::extract_text_from_mem(file_data)
|
||||
})) {
|
||||
Ok(Ok(text)) => {
|
||||
// If we can extract text, create a text-based thumbnail
|
||||
self.generate_text_based_thumbnail(&text, "PDF", Rgb([220, 38, 27])).await
|
||||
}
|
||||
Err(_) => {
|
||||
// Fall back to placeholder if PDF extraction fails
|
||||
Ok(Err(_)) | Err(_) => {
|
||||
// Fall back to placeholder if PDF extraction fails or panics
|
||||
self.generate_placeholder_thumbnail("PDF").await
|
||||
}
|
||||
}
|
||||
|
|
|
|||
10
src/ocr.rs
10
src/ocr.rs
|
|
@ -1,5 +1,6 @@
|
|||
use anyhow::{anyhow, Result};
|
||||
use std::path::Path;
|
||||
use std::panic::{catch_unwind, AssertUnwindSafe};
|
||||
use crate::ocr_error::OcrError;
|
||||
use crate::ocr_health::OcrHealthChecker;
|
||||
|
||||
|
|
@ -54,8 +55,13 @@ impl OcrService {
|
|||
#[cfg(feature = "ocr")]
|
||||
{
|
||||
let bytes = std::fs::read(file_path)?;
|
||||
let text = pdf_extract::extract_text_from_mem(&bytes)
|
||||
.map_err(|e| anyhow!("Failed to extract text from PDF: {}", e))?;
|
||||
let text = match catch_unwind(AssertUnwindSafe(|| {
|
||||
pdf_extract::extract_text_from_mem(&bytes)
|
||||
})) {
|
||||
Ok(Ok(text)) => text,
|
||||
Ok(Err(e)) => return Err(anyhow!("Failed to extract text from PDF: {}", e)),
|
||||
Err(_) => return Err(anyhow!("PDF extraction panicked due to invalid content stream in file: {}", file_path)),
|
||||
};
|
||||
|
||||
Ok(text.trim().to_string())
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue