use anyhow::Result; use clap::{Arg, Command}; use std::path::Path; use uuid::Uuid; use readur::{ batch_ingest::BatchIngester, config::Config, db::Database, file_service::FileService, ocr_queue::OcrQueueService, }; #[tokio::main] async fn main() -> Result<()> { tracing_subscriber::fmt::init(); let matches = Command::new("batch_ingest") .about("Batch ingest files for OCR processing") .arg( Arg::new("directory") .help("Directory to ingest files from") .required(true) .index(1), ) .arg( Arg::new("user-id") .help("User ID to assign documents to") .long("user-id") .short('u') .value_name("UUID") .required(true), ) .arg( Arg::new("monitor") .help("Monitor progress after starting ingestion") .long("monitor") .short('m') .action(clap::ArgAction::SetTrue), ) .get_matches(); let directory = matches.get_one::("directory").unwrap(); let user_id_str = matches.get_one::("user-id").unwrap(); let monitor = matches.get_flag("monitor"); let user_id = Uuid::parse_str(user_id_str)?; let dir_path = Path::new(directory); if !dir_path.exists() { eprintln!("Error: Directory {} does not exist", directory); std::process::exit(1); } let config = Config::from_env()?; let db = Database::new(&config.database_url).await?; let file_service = FileService::new(config.upload_path.clone()); let queue_service = OcrQueueService::new(db.clone(), db.get_pool().clone(), 1); let ingester = BatchIngester::new(db, queue_service, file_service, config); println!("Starting batch ingestion from: {}", directory); println!("User ID: {}", user_id); // Start ingestion if let Err(e) = ingester.ingest_directory(dir_path, user_id).await { eprintln!("Ingestion failed: {}", e); std::process::exit(1); } println!("Batch ingestion completed successfully!"); if monitor { println!("Monitoring OCR queue progress..."); if let Err(e) = ingester.monitor_progress().await { eprintln!("Monitoring failed: {}", e); std::process::exit(1); } } Ok(()) }