use crate::ocr_error::{CpuFeatures, OcrDiagnostics, OcrError}; use std::process::Command; use std::env; use std::path::Path; use sysinfo::System; pub struct OcrHealthChecker; impl OcrHealthChecker { pub fn new() -> Self { Self } pub fn check_tesseract_installation(&self) -> Result { let output = Command::new("tesseract") .arg("--version") .output() .map_err(|_| OcrError::TesseractNotInstalled)?; if !output.status.success() { return Err(OcrError::TesseractNotInstalled); } let version_info = String::from_utf8_lossy(&output.stdout); let version = version_info .lines() .next() .map(|s| s.to_string()) .unwrap_or_else(|| "Unknown".to_string()); Ok(version) } pub fn check_language_data(&self, lang: &str) -> Result<(), OcrError> { let tessdata_path = self.get_tessdata_path()?; let lang_file = format!("{}.traineddata", lang); let lang_path = Path::new(&tessdata_path).join(&lang_file); if !lang_path.exists() { return Err(OcrError::LanguageDataNotFound { lang: lang.to_string(), }); } Ok(()) } pub fn get_tessdata_path(&self) -> Result { if let Ok(path) = env::var("TESSDATA_PREFIX") { if Path::new(&path).exists() { return Ok(path); } else { return Err(OcrError::TessdataPathInvalid { path }); } } let common_paths = vec![ "/usr/share/tesseract-ocr/4.00/tessdata", "/usr/share/tesseract-ocr/5.00/tessdata", "/usr/local/share/tessdata", "/opt/homebrew/share/tessdata", "C:\\Program Files\\Tesseract-OCR\\tessdata", ]; for path in common_paths { if Path::new(path).exists() { return Ok(path.to_string()); } } Err(OcrError::TessdataPathInvalid { path: "No tessdata directory found".to_string(), }) } pub fn get_available_languages(&self) -> Vec { let tessdata_path = match self.get_tessdata_path() { Ok(path) => path, Err(_) => return vec![], }; let mut languages = vec![]; if let Ok(entries) = std::fs::read_dir(&tessdata_path) { for entry in entries.flatten() { if let Some(name) = entry.file_name().to_str() { if name.ends_with(".traineddata") { let lang = name.trim_end_matches(".traineddata"); languages.push(lang.to_string()); } } } } languages.sort(); languages } pub fn check_cpu_features(&self) -> CpuFeatures { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { use raw_cpuid::CpuId; let cpuid = CpuId::new(); let features = cpuid.get_feature_info().map(|f| CpuFeatures { sse2: f.has_sse2(), sse3: f.has_sse3(), sse4_1: f.has_sse41(), sse4_2: f.has_sse42(), avx: f.has_avx(), avx2: cpuid.get_extended_feature_info() .map(|ef| ef.has_avx2()) .unwrap_or(false), }).unwrap_or_else(|| CpuFeatures { sse2: false, sse3: false, sse4_1: false, sse4_2: false, avx: false, avx2: false, }); features } #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] { CpuFeatures { sse2: false, sse3: false, sse4_1: false, sse4_2: false, avx: false, avx2: false, } } } pub fn check_memory_available(&self) -> u64 { let mut sys = System::new_all(); sys.refresh_memory(); sys.available_memory() / (1024 * 1024) // Convert to MB } pub fn check_temp_space(&self) -> u64 { use std::fs; let temp_dir = env::temp_dir(); // Try to get actual available space using statvfs on Unix-like systems #[cfg(target_family = "unix")] { use std::mem; #[repr(C)] struct statvfs { f_bsize: u64, // file system block size f_frsize: u64, // fragment size f_blocks: u64, // size of fs in f_frsize units f_bfree: u64, // # free blocks f_bavail: u64, // # free blocks for unprivileged users f_files: u64, // # inodes f_ffree: u64, // # free inodes f_favail: u64, // # free inodes for unprivileged users f_fsid: u64, // file system ID f_flag: u64, // mount flags f_namemax: u64, // maximum filename length } extern "C" { fn statvfs(path: *const i8, buf: *mut statvfs) -> i32; } unsafe { let mut buf: statvfs = mem::zeroed(); let path_cstr = format!("{}\0", temp_dir.display()); if statvfs(path_cstr.as_ptr() as *const i8, &mut buf) == 0 { let available_bytes = buf.f_bavail * buf.f_frsize; return available_bytes / (1024 * 1024); // Convert to MB } } } // Windows implementation #[cfg(target_family = "windows")] { // For Windows, we'd need to use GetDiskFreeSpaceEx from winapi // For now, try to estimate based on a test file write } // Fallback: Try to estimate available space by checking if we can create a test file let test_file = temp_dir.join(".ocr_space_test"); let test_size = 100 * 1024 * 1024; // 100MB test match fs::write(&test_file, vec![0u8; test_size]) { Ok(_) => { let _ = fs::remove_file(&test_file); // If we can write 100MB, assume at least 1GB is available 1000 } Err(_) => { // If we can't write 100MB, report low space 50 } } } pub fn validate_cpu_requirements(&self) -> Result<(), OcrError> { let features = self.check_cpu_features(); // Tesseract 4.x+ requires at least SSE2 if !features.sse2 { return Err(OcrError::MissingCpuInstruction { instruction: "SSE2".to_string(), }); } Ok(()) } pub fn estimate_memory_requirement(&self, image_width: u32, image_height: u32) -> u64 { // Rough estimation: 4 bytes per pixel (RGBA) * 3 (for processing buffers) // Plus 100MB base overhead for Tesseract let pixels = (image_width as u64) * (image_height as u64); let image_memory = (pixels * 4 * 3) / (1024 * 1024); // Convert to MB image_memory + 100 } pub fn validate_memory_for_image(&self, width: u32, height: u32) -> Result<(), OcrError> { let required = self.estimate_memory_requirement(width, height); let available = self.check_memory_available(); if required > available { return Err(OcrError::InsufficientMemory { required, available }); } Ok(()) } pub fn get_full_diagnostics(&self) -> OcrDiagnostics { OcrDiagnostics { tesseract_version: self.check_tesseract_installation().ok(), available_languages: self.get_available_languages(), tessdata_path: self.get_tessdata_path().ok(), cpu_features: self.check_cpu_features(), memory_available_mb: self.check_memory_available(), temp_space_available_mb: self.check_temp_space(), } } pub fn perform_full_health_check(&self) -> Result> { let mut errors = Vec::new(); // Check Tesseract installation if let Err(e) = self.check_tesseract_installation() { errors.push(e); } // Check CPU requirements if let Err(e) = self.validate_cpu_requirements() { errors.push(e); } // Check tessdata path if let Err(e) = self.get_tessdata_path() { errors.push(e); } // Check for at least English language data if let Err(e) = self.check_language_data("eng") { errors.push(e); } let diagnostics = self.get_full_diagnostics(); if errors.is_empty() { Ok(diagnostics) } else { Err(errors) } } }