82 lines
2.4 KiB
Rust
82 lines
2.4 KiB
Rust
use anyhow::Result;
|
|
use clap::{Arg, Command};
|
|
use std::path::Path;
|
|
use uuid::Uuid;
|
|
|
|
use readur::{
|
|
batch_ingest::BatchIngester,
|
|
config::Config,
|
|
db::Database,
|
|
file_service::FileService,
|
|
ocr_queue::OcrQueueService,
|
|
};
|
|
|
|
#[tokio::main]
|
|
async fn main() -> Result<()> {
|
|
tracing_subscriber::fmt::init();
|
|
|
|
let matches = Command::new("batch_ingest")
|
|
.about("Batch ingest files for OCR processing")
|
|
.arg(
|
|
Arg::new("directory")
|
|
.help("Directory to ingest files from")
|
|
.required(true)
|
|
.index(1),
|
|
)
|
|
.arg(
|
|
Arg::new("user-id")
|
|
.help("User ID to assign documents to")
|
|
.long("user-id")
|
|
.short('u')
|
|
.value_name("UUID")
|
|
.required(true),
|
|
)
|
|
.arg(
|
|
Arg::new("monitor")
|
|
.help("Monitor progress after starting ingestion")
|
|
.long("monitor")
|
|
.short('m')
|
|
.action(clap::ArgAction::SetTrue),
|
|
)
|
|
.get_matches();
|
|
|
|
let directory = matches.get_one::<String>("directory").unwrap();
|
|
let user_id_str = matches.get_one::<String>("user-id").unwrap();
|
|
let monitor = matches.get_flag("monitor");
|
|
|
|
let user_id = Uuid::parse_str(user_id_str)?;
|
|
let dir_path = Path::new(directory);
|
|
|
|
if !dir_path.exists() {
|
|
eprintln!("Error: Directory {} does not exist", directory);
|
|
std::process::exit(1);
|
|
}
|
|
|
|
let config = Config::from_env()?;
|
|
let db = Database::new(&config.database_url).await?;
|
|
let file_service = FileService::new(config.upload_path.clone());
|
|
let queue_service = OcrQueueService::new(db.clone(), db.get_pool().clone(), 1);
|
|
|
|
let ingester = BatchIngester::new(db, queue_service, file_service, config);
|
|
|
|
println!("Starting batch ingestion from: {}", directory);
|
|
println!("User ID: {}", user_id);
|
|
|
|
// Start ingestion
|
|
if let Err(e) = ingester.ingest_directory(dir_path, user_id).await {
|
|
eprintln!("Ingestion failed: {}", e);
|
|
std::process::exit(1);
|
|
}
|
|
|
|
println!("Batch ingestion completed successfully!");
|
|
|
|
if monitor {
|
|
println!("Monitoring OCR queue progress...");
|
|
if let Err(e) = ingester.monitor_progress().await {
|
|
eprintln!("Monitoring failed: {}", e);
|
|
std::process::exit(1);
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
} |