diff --git a/src/bin/test_runner.rs b/src/bin/test_runner.rs index c1b561d..9241607 100644 --- a/src/bin/test_runner.rs +++ b/src/bin/test_runner.rs @@ -170,7 +170,7 @@ fn run_frontend_tests() -> Result> { fn check_server_running() -> Result<(), Box> { let output = Command::new("curl") - .args(&["-s", "-f", "http://localhost:8080/api/health"]) + .args(&["-s", "-f", "http://localhost:8000/api/health"]) .output()?; if output.status.success() { @@ -180,7 +180,7 @@ fn check_server_running() -> Result<(), Box> { } } - Err("Server not running or not healthy at http://localhost:8080".into()) + Err("Server not running or not healthy at http://localhost:8000".into()) } fn print_summary(results: &[TestResult]) { diff --git a/src/enhanced_ocr.rs b/src/enhanced_ocr.rs index e215c1e..c30ec7f 100644 --- a/src/enhanced_ocr.rs +++ b/src/enhanced_ocr.rs @@ -1,4 +1,4 @@ -use anyhow::Result; +use anyhow::{anyhow, Result}; use tracing::{debug, info, warn}; #[cfg(feature = "ocr")] @@ -503,7 +503,26 @@ impl EnhancedOcrService { info!("Extracting text from PDF: {}", file_path); let bytes = std::fs::read(file_path)?; - let text = pdf_extract::extract_text_from_mem(&bytes)?; + + // Validate PDF header + if bytes.len() < 5 || !bytes.starts_with(b"%PDF-") { + return Err(anyhow!( + "Invalid PDF file: Missing or corrupted PDF header. File size: {} bytes, Header: {:?}", + bytes.len(), + bytes.get(0..20).unwrap_or(&[]).iter().map(|&b| b as char).collect::() + )); + } + + let text = match pdf_extract::extract_text_from_mem(&bytes) { + Ok(text) => text, + Err(e) => { + // Provide more detailed error information + return Err(anyhow!( + "PDF text extraction failed for file '{}' (size: {} bytes): {}. This may indicate a corrupted or unsupported PDF format.", + file_path, bytes.len(), e + )); + } + }; let processing_time = start_time.elapsed().as_millis() as u64; let word_count = text.split_whitespace().count(); diff --git a/src/main.rs b/src/main.rs index 620f669..dcb3ff3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -133,6 +133,9 @@ async fn main() -> Result<(), Box> { // Seed admin user seed::seed_admin_user(&db).await?; + // Seed system user for watcher + seed::seed_system_user(&db).await?; + let state = AppState { db, config: config.clone() }; let app = Router::new() diff --git a/src/seed.rs b/src/seed.rs index 7147fe0..5885c06 100644 --- a/src/seed.rs +++ b/src/seed.rs @@ -46,5 +46,43 @@ pub async fn seed_admin_user(db: &Database) -> Result<()> { } } + Ok(()) +} + +pub async fn seed_system_user(db: &Database) -> Result<()> { + let system_username = "system"; + let system_email = "system@readur.internal"; + let system_password = "system-internal-password"; + + // Check if system user already exists + match db.get_user_by_username(system_username).await { + Ok(Some(_)) => { + info!("System user already exists"); + return Ok(()); + } + Ok(None) => { + // User doesn't exist, create it + } + Err(e) => { + info!("Error checking for system user: {}", e); + } + } + + let create_user = CreateUser { + username: system_username.to_string(), + email: system_email.to_string(), + password: system_password.to_string(), + }; + + match db.create_user(create_user).await { + Ok(user) => { + info!("✅ SYSTEM USER CREATED SUCCESSFULLY!"); + info!("🆔 System User ID: {}", user.id); + } + Err(e) => { + info!("Failed to create system user: {}", e); + } + } + Ok(()) } \ No newline at end of file diff --git a/src/watcher.rs b/src/watcher.rs index e656399..147ac0a 100644 --- a/src/watcher.rs +++ b/src/watcher.rs @@ -295,11 +295,25 @@ async fn process_file( } } + // Validate PDF files before processing + if mime_type == "application/pdf" { + if file_data.len() < 5 || !file_data.starts_with(b"%PDF-") { + warn!( + "Skipping invalid PDF file: {} (size: {} bytes, header: {:?})", + filename, + file_data.len(), + file_data.get(0..20).unwrap_or(&[]).iter().map(|&b| b as char).collect::() + ); + return Ok(()); + } + } + let saved_file_path = file_service.save_file(&filename, &file_data).await?; - // TODO: Make this configurable or fetch from database - // Using admin user ID for watch folder documents - let system_user_id = uuid::Uuid::parse_str("c2e66705-e54b-4eff-91f5-760cb0a69b62")?; + // Fetch system user ID from database + let system_user = db.get_user_by_username("system").await? + .ok_or_else(|| anyhow::anyhow!("System user not found. Please ensure the system user is created."))?; + let system_user_id = system_user.id; let document = file_service.create_document( &filename,