use crate::ocr_error::OcrError; use crate::ocr_health::OcrHealthChecker; use anyhow::{anyhow, Result}; use image::DynamicImage; use std::path::Path; use std::time::{Duration, Instant}; use tokio::time::timeout; #[cfg(feature = "ocr")] use tesseract::{Tesseract, PageSegMode}; pub struct EnhancedOcrService { health_checker: OcrHealthChecker, max_image_width: u32, max_image_height: u32, ocr_timeout_seconds: u64, min_confidence_threshold: f32, } impl EnhancedOcrService { pub fn new() -> Self { Self { health_checker: OcrHealthChecker::new(), max_image_width: 10000, max_image_height: 10000, ocr_timeout_seconds: 120, min_confidence_threshold: 60.0, } } pub fn with_limits(mut self, max_width: u32, max_height: u32) -> Self { self.max_image_width = max_width; self.max_image_height = max_height; self } pub fn with_timeout(mut self, seconds: u64) -> Self { self.ocr_timeout_seconds = seconds; self } pub async fn extract_text_with_validation(&self, file_path: &str, lang: &str) -> Result { // Perform pre-flight checks self.preflight_checks(lang)?; // Load and validate image let image = self.load_and_validate_image(file_path)?; // Check memory requirements let (width, height) = (image.width(), image.height()); self.health_checker.validate_memory_for_image(width, height) .map_err(|e| anyhow!(e))?; // Perform OCR with timeout let text = self.perform_ocr_with_timeout(file_path, lang).await?; Ok(text) } fn preflight_checks(&self, lang: &str) -> Result<()> { // Check Tesseract installation self.health_checker.check_tesseract_installation() .map_err(|e| anyhow!(e))?; // Check CPU requirements self.health_checker.validate_cpu_requirements() .map_err(|e| anyhow!(e))?; // Check language data self.health_checker.check_language_data(lang) .map_err(|e| anyhow!(e))?; Ok(()) } fn load_and_validate_image(&self, file_path: &str) -> Result { // Check file permissions if !Path::new(file_path).exists() { return Err(anyhow!("File not found: {}", file_path)); } let metadata = std::fs::metadata(file_path) .map_err(|_| OcrError::PermissionDenied { path: file_path.to_string() })?; if !metadata.is_file() { return Err(anyhow!("Path is not a file: {}", file_path)); } // Try to load image let image = image::open(file_path) .map_err(|e| OcrError::InvalidImageFormat { details: e.to_string() })?; // Validate dimensions if image.width() > self.max_image_width || image.height() > self.max_image_height { return Err(OcrError::ImageTooLarge { width: image.width(), height: image.height(), max_width: self.max_image_width, max_height: self.max_image_height, }.into()); } Ok(image) } async fn perform_ocr_with_timeout(&self, file_path: &str, lang: &str) -> Result { let file_path = file_path.to_string(); let lang = lang.to_string(); let timeout_duration = Duration::from_secs(self.ocr_timeout_seconds); let min_confidence = self.min_confidence_threshold; let ocr_future = tokio::task::spawn_blocking(move || { Self::perform_ocr_internal(&file_path, &lang, min_confidence) }); match timeout(timeout_duration, ocr_future).await { Ok(Ok(result)) => result, Ok(Err(e)) => Err(anyhow!("OCR task failed: {}", e)), Err(_) => Err(OcrError::OcrTimeout { seconds: self.ocr_timeout_seconds }.into()), } } #[cfg(feature = "ocr")] fn perform_ocr_internal(file_path: &str, lang: &str, min_confidence: f32) -> Result { let start_time = Instant::now(); // Initialize Tesseract with error handling let mut tesseract = Tesseract::new(None, Some(lang)) .map_err(|e| OcrError::InitializationFailed { details: e.to_string() })?; // Set optimal parameters for various hardware tesseract.set_page_seg_mode(PageSegMode::PsmAuto); let mut tesseract = tesseract .set_variable("tessedit_do_invert", "0")? .set_variable("edges_max_children_per_outline", "40")?; // For low-end hardware, use faster but less accurate settings if let Ok(available_mem) = std::env::var("OCR_LOW_MEMORY_MODE") { if available_mem == "true" { tesseract = tesseract .set_variable("textord_heavy_nr", "0")? .set_variable("cube_debug_level", "0")?; } } tesseract = tesseract.set_image(file_path) .map_err(|e| OcrError::InvalidImageFormat { details: e.to_string() })?; // Get text with confidence check let text = tesseract.get_text() .map_err(|e| OcrError::InitializationFailed { details: e.to_string() })?; // Get mean confidence let confidence = tesseract.mean_text_conf(); if confidence < min_confidence as i32 { return Err(OcrError::LowConfidence { score: confidence as f32, threshold: min_confidence }.into()); } let elapsed = start_time.elapsed(); tracing::info!("OCR completed in {:?} with confidence: {}%", elapsed, confidence); Ok(text.trim().to_string()) } #[cfg(not(feature = "ocr"))] fn perform_ocr_internal(_file_path: &str, _lang: &str, _min_confidence: f32) -> Result { Err(anyhow!("OCR feature is disabled. Recompile with --features ocr")) } pub async fn extract_with_fallback(&self, file_path: &str, lang: &str) -> Result { // Try primary extraction match self.extract_text_with_validation(file_path, lang).await { Ok(text) => Ok(text), Err(e) => { // Check if error is recoverable if let Some(ocr_error) = e.downcast_ref::() { if ocr_error.is_recoverable() { // Try with reduced quality settings self.extract_with_reduced_quality(file_path, lang).await } else { Err(e) } } else { Err(e) } } } } async fn extract_with_reduced_quality(&self, file_path: &str, lang: &str) -> Result { // Downsample image for lower memory usage let image = self.load_and_validate_image(file_path)?; let resized = self.resize_for_ocr(image); // Save temporary resized image let temp_path = format!("{}_resized.png", file_path); resized.save(&temp_path) .map_err(|e| anyhow!("Failed to save resized image: {}", e))?; // Try OCR on resized image let result = self.perform_ocr_with_timeout(&temp_path, lang).await; // Clean up let _ = std::fs::remove_file(&temp_path); result } fn resize_for_ocr(&self, image: DynamicImage) -> DynamicImage { let (width, height) = (image.width(), image.height()); // Target dimensions for low memory mode let max_dimension = 2000; if width > max_dimension || height > max_dimension { let scale = max_dimension as f32 / width.max(height) as f32; let new_width = (width as f32 * scale) as u32; let new_height = (height as f32 * scale) as u32; image.resize(new_width, new_height, image::imageops::FilterType::Lanczos3) } else { image } } pub async fn get_diagnostics(&self) -> String { let diagnostics = self.health_checker.get_full_diagnostics(); format!("{}", diagnostics) } }