129 lines
4.7 KiB
Rust
129 lines
4.7 KiB
Rust
use std::fmt;
|
|
use thiserror::Error;
|
|
|
|
#[derive(Error, Debug)]
|
|
pub enum OcrError {
|
|
#[error("Tesseract is not installed on the system")]
|
|
TesseractNotInstalled,
|
|
|
|
#[error("Tesseract language data not found for '{lang}'. Please install tesseract-ocr-{lang}")]
|
|
LanguageDataNotFound { lang: String },
|
|
|
|
#[error("TESSDATA_PREFIX environment variable not set or invalid: {path}")]
|
|
TessdataPathInvalid { path: String },
|
|
|
|
#[error("Insufficient memory for OCR operation. Required: {required}MB, Available: {available}MB")]
|
|
InsufficientMemory { required: u64, available: u64 },
|
|
|
|
#[error("CPU instruction set missing: {instruction}. Tesseract requires {instruction} support")]
|
|
MissingCpuInstruction { instruction: String },
|
|
|
|
#[error("Image too large for OCR. Max dimensions: {max_width}x{max_height}, Actual: {width}x{height}")]
|
|
ImageTooLarge {
|
|
width: u32,
|
|
height: u32,
|
|
max_width: u32,
|
|
max_height: u32,
|
|
},
|
|
|
|
#[error("Invalid image format or corrupted image: {details}")]
|
|
InvalidImageFormat { details: String },
|
|
|
|
#[error("OCR timeout after {seconds} seconds. Consider reducing image size or quality")]
|
|
OcrTimeout { seconds: u64 },
|
|
|
|
#[error("Permission denied accessing file: {path}")]
|
|
PermissionDenied { path: String },
|
|
|
|
#[error("Tesseract initialization failed: {details}")]
|
|
InitializationFailed { details: String },
|
|
|
|
#[error("OCR quality too low. Confidence score: {score}% (minimum: {threshold}%)")]
|
|
LowConfidence { score: f32, threshold: f32 },
|
|
|
|
#[error("Hardware acceleration not available: {details}")]
|
|
HardwareAccelerationUnavailable { details: String },
|
|
|
|
#[error(transparent)]
|
|
Io(#[from] std::io::Error),
|
|
|
|
#[error(transparent)]
|
|
Other(#[from] anyhow::Error),
|
|
}
|
|
|
|
impl OcrError {
|
|
pub fn is_recoverable(&self) -> bool {
|
|
matches!(
|
|
self,
|
|
OcrError::InsufficientMemory { .. }
|
|
| OcrError::OcrTimeout { .. }
|
|
| OcrError::LowConfidence { .. }
|
|
)
|
|
}
|
|
|
|
pub fn is_configuration_error(&self) -> bool {
|
|
matches!(
|
|
self,
|
|
OcrError::TesseractNotInstalled
|
|
| OcrError::LanguageDataNotFound { .. }
|
|
| OcrError::TessdataPathInvalid { .. }
|
|
| OcrError::MissingCpuInstruction { .. }
|
|
)
|
|
}
|
|
|
|
pub fn error_code(&self) -> &'static str {
|
|
match self {
|
|
OcrError::TesseractNotInstalled => "OCR_NOT_INSTALLED",
|
|
OcrError::LanguageDataNotFound { .. } => "OCR_LANG_MISSING",
|
|
OcrError::TessdataPathInvalid { .. } => "OCR_DATA_PATH_INVALID",
|
|
OcrError::InsufficientMemory { .. } => "OCR_OUT_OF_MEMORY",
|
|
OcrError::MissingCpuInstruction { .. } => "OCR_CPU_UNSUPPORTED",
|
|
OcrError::ImageTooLarge { .. } => "OCR_IMAGE_TOO_LARGE",
|
|
OcrError::InvalidImageFormat { .. } => "OCR_INVALID_FORMAT",
|
|
OcrError::OcrTimeout { .. } => "OCR_TIMEOUT",
|
|
OcrError::PermissionDenied { .. } => "OCR_PERMISSION_DENIED",
|
|
OcrError::InitializationFailed { .. } => "OCR_INIT_FAILED",
|
|
OcrError::LowConfidence { .. } => "OCR_LOW_CONFIDENCE",
|
|
OcrError::HardwareAccelerationUnavailable { .. } => "OCR_NO_HW_ACCEL",
|
|
OcrError::Io(_) => "OCR_IO_ERROR",
|
|
OcrError::Other(_) => "OCR_UNKNOWN_ERROR",
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct OcrDiagnostics {
|
|
pub tesseract_version: Option<String>,
|
|
pub available_languages: Vec<String>,
|
|
pub tessdata_path: Option<String>,
|
|
pub cpu_features: CpuFeatures,
|
|
pub memory_available_mb: u64,
|
|
pub temp_space_available_mb: u64,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct CpuFeatures {
|
|
pub sse2: bool,
|
|
pub sse3: bool,
|
|
pub sse4_1: bool,
|
|
pub sse4_2: bool,
|
|
pub avx: bool,
|
|
pub avx2: bool,
|
|
}
|
|
|
|
impl fmt::Display for OcrDiagnostics {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
writeln!(f, "OCR Diagnostics:")?;
|
|
writeln!(f, " Tesseract Version: {}", self.tesseract_version.as_deref().unwrap_or("Not installed"))?;
|
|
writeln!(f, " Tessdata Path: {}", self.tessdata_path.as_deref().unwrap_or("Not set"))?;
|
|
writeln!(f, " Available Languages: {}", self.available_languages.join(", "))?;
|
|
writeln!(f, " Memory Available: {} MB", self.memory_available_mb)?;
|
|
writeln!(f, " Temp Space: {} MB", self.temp_space_available_mb)?;
|
|
writeln!(f, " CPU Features:")?;
|
|
writeln!(f, " SSE2: {}", self.cpu_features.sse2)?;
|
|
writeln!(f, " SSE4.1: {}", self.cpu_features.sse4_1)?;
|
|
writeln!(f, " AVX: {}", self.cpu_features.avx)?;
|
|
writeln!(f, " AVX2: {}", self.cpu_features.avx2)?;
|
|
Ok(())
|
|
}
|
|
} |