feat(office): yeet unused fallback strategy

This commit is contained in:
perf3ct 2025-09-02 03:47:20 +00:00
parent d5d6d2edb4
commit 149c3b9a3f
No known key found for this signature in database
GPG Key ID: 569C4EEC436F5232
4 changed files with 65 additions and 347 deletions

View File

@ -1,220 +0,0 @@
use anyhow::Result;
use serde::{Deserialize, Serialize};
use tracing::{info, warn};
use super::xml_extractor::{OfficeExtractionResult, XmlOfficeExtractor};
#[cfg(test)]
use anyhow::anyhow;
/// Configuration for XML-based Office document extraction
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FallbackConfig {
/// Enable XML extraction
pub enabled: bool,
/// Maximum number of retry attempts for transient failures
pub max_retries: u32,
/// Initial retry delay in milliseconds
pub initial_retry_delay_ms: u64,
/// Maximum retry delay in milliseconds
pub max_retry_delay_ms: u64,
/// Timeout for XML extraction in seconds
pub xml_timeout_seconds: u64,
}
impl Default for FallbackConfig {
fn default() -> Self {
Self {
enabled: true,
max_retries: 3,
initial_retry_delay_ms: 1000,
max_retry_delay_ms: 30000,
xml_timeout_seconds: 180,
}
}
}
/// Statistics for monitoring XML extraction performance
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FallbackStats {
pub total_extractions: u64,
pub xml_successes: u64,
pub retry_attempts: u64,
pub average_processing_time_ms: f64,
pub success_rate_percentage: f64,
}
impl Default for FallbackStats {
fn default() -> Self {
Self {
total_extractions: 0,
xml_successes: 0,
retry_attempts: 0,
average_processing_time_ms: 0.0,
success_rate_percentage: 100.0,
}
}
}
/// XML-based Office document extraction service
pub struct FallbackStrategy {
config: FallbackConfig,
xml_extractor: XmlOfficeExtractor,
stats: std::sync::Arc<std::sync::RwLock<FallbackStats>>,
}
impl FallbackStrategy {
/// Create a new XML extraction service
pub fn new(config: FallbackConfig, temp_dir: String) -> Self {
Self {
config,
xml_extractor: XmlOfficeExtractor::new(temp_dir),
stats: std::sync::Arc::new(std::sync::RwLock::new(FallbackStats::default())),
}
}
/// Extract Office document using XML extraction
pub async fn extract_with_fallback(
&self,
file_path: &str,
mime_type: &str,
) -> Result<OfficeExtractionResult> {
let start_time = std::time::Instant::now();
let document_type = self.get_document_type(mime_type);
info!("Starting XML extraction for {} (type: {})", file_path, document_type);
// Update total extraction count
if let Ok(mut stats) = self.stats.write() {
stats.total_extractions += 1;
}
// Use XML extraction as the only method
let result = self.execute_xml_extraction(file_path, mime_type).await;
let processing_time = start_time.elapsed();
// Update statistics
self.update_stats(&result, processing_time).await;
result
}
/// Execute XML extraction directly
async fn execute_xml_extraction(
&self,
file_path: &str,
mime_type: &str,
) -> Result<OfficeExtractionResult> {
let result = self.xml_extractor.extract_text_from_office(file_path, mime_type).await?;
// Update stats
if let Ok(mut stats) = self.stats.write() {
stats.xml_successes += 1;
}
Ok(result)
}
/// Get document type from MIME type
fn get_document_type(&self, mime_type: &str) -> String {
match mime_type {
"application/vnd.openxmlformats-officedocument.wordprocessingml.document" => "docx".to_string(),
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" => "xlsx".to_string(),
"application/vnd.openxmlformats-officedocument.presentationml.presentation" => "pptx".to_string(),
"application/msword" => "doc".to_string(),
"application/vnd.ms-excel" => "xls".to_string(),
"application/vnd.ms-powerpoint" => "ppt".to_string(),
"application/pdf" => "pdf".to_string(),
_ => "unknown".to_string(),
}
}
/// Update statistics after extraction
async fn update_stats(&self, result: &Result<OfficeExtractionResult>, processing_time: std::time::Duration) {
if let Ok(mut stats) = self.stats.write() {
let processing_time_ms = processing_time.as_millis() as f64;
// Update average processing time using exponential moving average
let alpha = 0.1; // Smoothing factor
stats.average_processing_time_ms =
alpha * processing_time_ms + (1.0 - alpha) * stats.average_processing_time_ms;
// Update success rate with proper division by zero protection
let total_attempts = stats.total_extractions;
let successful_attempts = stats.xml_successes;
if total_attempts > 0 {
stats.success_rate_percentage = (successful_attempts as f64 / total_attempts as f64) * 100.0;
} else if result.is_ok() {
stats.success_rate_percentage = 100.0;
}
}
}
/// Get current statistics
pub async fn get_stats(&self) -> FallbackStats {
self.stats.read()
.map(|stats| stats.clone())
.unwrap_or_else(|_| {
warn!("Failed to acquire read lock on stats, returning default");
FallbackStats::default()
})
}
/// Reset statistics
pub async fn reset_stats(&self) {
if let Ok(mut stats) = self.stats.write() {
*stats = FallbackStats::default();
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn create_test_strategy() -> (FallbackStrategy, TempDir) {
let temp_dir = TempDir::new().unwrap();
let config = FallbackConfig::default();
let strategy = FallbackStrategy::new(config, temp_dir.path().to_string_lossy().to_string());
(strategy, temp_dir)
}
#[tokio::test]
async fn test_stats_tracking() {
let (strategy, _temp_dir) = create_test_strategy();
let initial_stats = strategy.get_stats().await;
assert_eq!(initial_stats.total_extractions, 0);
// Simulate some operations by updating stats directly
if let Ok(mut stats) = strategy.stats.write() {
stats.total_extractions = 10;
stats.xml_successes = 9;
// Calculate success rate manually as update_stats would do
stats.success_rate_percentage = (9.0 / 10.0) * 100.0;
}
let updated_stats = strategy.get_stats().await;
assert_eq!(updated_stats.total_extractions, 10);
assert_eq!(updated_stats.xml_successes, 9);
assert_eq!(updated_stats.success_rate_percentage, 90.0); // 9 successes out of 10
}
#[test]
fn test_get_document_type() {
let (strategy, _temp_dir) = create_test_strategy();
assert_eq!(strategy.get_document_type("application/vnd.openxmlformats-officedocument.wordprocessingml.document"), "docx");
assert_eq!(strategy.get_document_type("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"), "xlsx");
assert_eq!(strategy.get_document_type("application/vnd.openxmlformats-officedocument.presentationml.presentation"), "pptx");
assert_eq!(strategy.get_document_type("application/pdf"), "pdf");
assert_eq!(strategy.get_document_type("unknown/type"), "unknown");
}
}

View File

@ -2,7 +2,6 @@ pub mod api;
pub mod enhanced; pub mod enhanced;
pub mod enhanced_processing; pub mod enhanced_processing;
pub mod error; pub mod error;
pub mod fallback_strategy;
pub mod health; pub mod health;
pub mod queue; pub mod queue;
pub mod tests; pub mod tests;
@ -12,21 +11,18 @@ use anyhow::{anyhow, Result};
use std::path::Path; use std::path::Path;
use crate::ocr::error::OcrError; use crate::ocr::error::OcrError;
use crate::ocr::health::OcrHealthChecker; use crate::ocr::health::OcrHealthChecker;
use crate::ocr::fallback_strategy::{FallbackStrategy, FallbackConfig};
#[cfg(feature = "ocr")] #[cfg(feature = "ocr")]
use tesseract::Tesseract; use tesseract::Tesseract;
pub struct OcrService { pub struct OcrService {
health_checker: OcrHealthChecker, health_checker: OcrHealthChecker,
fallback_strategy: Option<FallbackStrategy>, temp_dir: String,
} }
/// Configuration for the OCR service /// Configuration for the OCR service
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct OcrConfig { pub struct OcrConfig {
/// Fallback configuration
pub fallback_config: FallbackConfig,
/// Temporary directory for processing /// Temporary directory for processing
pub temp_dir: String, pub temp_dir: String,
} }
@ -34,7 +30,6 @@ pub struct OcrConfig {
impl Default for OcrConfig { impl Default for OcrConfig {
fn default() -> Self { fn default() -> Self {
Self { Self {
fallback_config: FallbackConfig::default(),
temp_dir: std::env::var("TEMP_DIR").unwrap_or_else(|_| "/tmp".to_string()), temp_dir: std::env::var("TEMP_DIR").unwrap_or_else(|_| "/tmp".to_string()),
} }
} }
@ -44,21 +39,15 @@ impl OcrService {
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
health_checker: OcrHealthChecker::new(), health_checker: OcrHealthChecker::new(),
fallback_strategy: None, temp_dir: std::env::var("TEMP_DIR").unwrap_or_else(|_| "/tmp".to_string()),
} }
} }
/// Create OCR service with configuration /// Create OCR service with configuration
pub fn new_with_config(config: OcrConfig) -> Self { pub fn new_with_config(config: OcrConfig) -> Self {
let fallback_strategy = if config.fallback_config.enabled {
Some(FallbackStrategy::new(config.fallback_config, config.temp_dir))
} else {
None
};
Self { Self {
health_checker: OcrHealthChecker::new(), health_checker: OcrHealthChecker::new(),
fallback_strategy, temp_dir: config.temp_dir,
} }
} }
@ -201,37 +190,21 @@ impl OcrService {
file_path: &str, file_path: &str,
mime_type: &str, mime_type: &str,
) -> Result<crate::ocr::enhanced::OcrResult> { ) -> Result<crate::ocr::enhanced::OcrResult> {
match &self.fallback_strategy { // Use XML extraction directly
Some(strategy) => { let xml_extractor = crate::ocr::xml_extractor::XmlOfficeExtractor::new(
let result = strategy.extract_with_fallback(file_path, mime_type).await?; self.temp_dir.clone()
// Convert the result to OcrResult for backward compatibility );
Ok(crate::ocr::enhanced::OcrResult {
text: result.text, let result = xml_extractor.extract_text_from_office(file_path, mime_type).await?;
confidence: result.confidence, // Convert OfficeExtractionResult to OcrResult for backward compatibility
processing_time_ms: result.processing_time_ms, Ok(crate::ocr::enhanced::OcrResult {
word_count: result.word_count, text: result.text,
preprocessing_applied: vec![format!("XML extraction - {}", result.extraction_method)], confidence: result.confidence,
processed_image_path: None, processing_time_ms: result.processing_time_ms,
}) word_count: result.word_count,
} preprocessing_applied: vec![format!("XML extraction - {}", result.extraction_method)],
None => { processed_image_path: None,
// Use basic XML extraction if no strategy is configured })
let xml_extractor = crate::ocr::xml_extractor::XmlOfficeExtractor::new(
std::env::var("TEMP_DIR").unwrap_or_else(|_| "/tmp".to_string())
);
let result = xml_extractor.extract_text_from_office(file_path, mime_type).await?;
// Convert OfficeExtractionResult to OcrResult for backward compatibility
Ok(crate::ocr::enhanced::OcrResult {
text: result.text,
confidence: result.confidence,
processing_time_ms: result.processing_time_ms,
word_count: result.word_count,
preprocessing_applied: vec![format!("XML extraction - {}", result.extraction_method)],
processed_image_path: None,
})
}
}
} }
/// Extract text from Office documents with custom configuration /// Extract text from Office documents with custom configuration
@ -331,28 +304,10 @@ impl OcrService {
} }
} }
/// Get XML extraction statistics
pub async fn get_fallback_stats(&self) -> Option<crate::ocr::fallback_strategy::FallbackStats> {
match &self.fallback_strategy {
Some(strategy) => Some(strategy.get_stats().await),
None => None,
}
}
/// Reset XML extraction statistics
pub async fn reset_fallback_stats(&self) -> Result<()> {
match &self.fallback_strategy {
Some(strategy) => {
strategy.reset_stats().await;
Ok(())
}
None => Err(anyhow!("XML extraction strategy not configured")),
}
}
/// Check if Office document extraction is available /// Check if Office document extraction is available
pub fn supports_office_documents(&self) -> bool { pub fn supports_office_documents(&self) -> bool {
self.fallback_strategy.is_some() true // XML extraction is always available
} }
/// Get supported MIME types /// Get supported MIME types
@ -367,16 +322,15 @@ impl OcrService {
"text/plain", "text/plain",
]; ];
if self.supports_office_documents() { // Office document types are always supported via XML extraction
types.extend_from_slice(&[ types.extend_from_slice(&[
"application/vnd.openxmlformats-officedocument.wordprocessingml.document", "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.openxmlformats-officedocument.presentationml.presentation", "application/vnd.openxmlformats-officedocument.presentationml.presentation",
"application/msword", "application/msword",
"application/vnd.ms-excel", "application/vnd.ms-excel",
"application/vnd.ms-powerpoint", "application/vnd.ms-powerpoint",
]); ]);
}
types types
} }

View File

@ -7,7 +7,6 @@ use tokio::time::timeout;
use readur::ocr::{ use readur::ocr::{
OcrService, OcrConfig, OcrService, OcrConfig,
fallback_strategy::FallbackConfig,
}; };
/// Test utilities for creating mock Office documents /// Test utilities for creating mock Office documents
@ -72,7 +71,7 @@ impl OfficeTestDocuments {
let file = fs::File::create(&file_path)?; let file = fs::File::create(&file_path)?;
let mut zip = zip::ZipWriter::new(file); let mut zip = zip::ZipWriter::new(file);
// Add [Content_Types].xml // Add [Content_Types].xml with shared strings support
zip.start_file("[Content_Types].xml", zip::write::FileOptions::default())?; zip.start_file("[Content_Types].xml", zip::write::FileOptions::default())?;
zip.write_all(br#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?> zip.write_all(br#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types"> <Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
@ -80,6 +79,7 @@ impl OfficeTestDocuments {
<Default Extension="xml" ContentType="application/xml"/> <Default Extension="xml" ContentType="application/xml"/>
<Override PartName="/xl/workbook.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"/> <Override PartName="/xl/workbook.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"/>
<Override PartName="/xl/worksheets/sheet1.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"/> <Override PartName="/xl/worksheets/sheet1.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"/>
<Override PartName="/xl/sharedStrings.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml"/>
</Types>"#)?; </Types>"#)?;
// Add _rels/.rels // Add _rels/.rels
@ -98,26 +98,42 @@ impl OfficeTestDocuments {
</sheets> </sheets>
</workbook>"#)?; </workbook>"#)?;
// Add xl/_rels/workbook.xml.rels // Add xl/_rels/workbook.xml.rels with shared strings relationship
zip.start_file("xl/_rels/workbook.xml.rels", zip::write::FileOptions::default())?; zip.start_file("xl/_rels/workbook.xml.rels", zip::write::FileOptions::default())?;
zip.write_all(br#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?> zip.write_all(br#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"> <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" Target="worksheets/sheet1.xml"/> <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" Target="worksheets/sheet1.xml"/>
<Relationship Id="rId2" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings" Target="sharedStrings.xml"/>
</Relationships>"#)?; </Relationships>"#)?;
// Add xl/worksheets/sheet1.xml with actual content // Add xl/sharedStrings.xml with the text content
zip.start_file("xl/sharedStrings.xml", zip::write::FileOptions::default())?;
let mut shared_strings_xml = String::from(r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="{count}" uniqueCount="{count}">"#);
shared_strings_xml = shared_strings_xml.replace("{count}", &content.len().to_string());
for cell_content in content {
shared_strings_xml.push_str(&format!(r#"
<si><t>{}</t></si>"#, cell_content));
}
shared_strings_xml.push_str(r#"
</sst>"#);
zip.write_all(shared_strings_xml.as_bytes())?;
// Add xl/worksheets/sheet1.xml with references to shared strings
zip.start_file("xl/worksheets/sheet1.xml", zip::write::FileOptions::default())?; zip.start_file("xl/worksheets/sheet1.xml", zip::write::FileOptions::default())?;
let mut worksheet_xml = String::from(r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?> let mut worksheet_xml = String::from(r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main"> <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
<sheetData>"#); <sheetData>"#);
for (row_idx, cell_content) in content.iter().enumerate() { for (row_idx, _) in content.iter().enumerate() {
worksheet_xml.push_str(&format!(r#" worksheet_xml.push_str(&format!(r#"
<row r="{}"> <row r="{}">
<c r="A{}" t="inlineStr"> <c r="A{}" t="s">
<is><t>{}</t></is> <v>{}</v>
</c> </c>
</row>"#, row_idx + 1, row_idx + 1, cell_content)); </row>"#, row_idx + 1, row_idx + 1, row_idx));
} }
worksheet_xml.push_str(r#" worksheet_xml.push_str(r#"
@ -146,16 +162,9 @@ impl OfficeTestDocuments {
} }
} }
/// Create a test OCR service with fallback strategy /// Create a test OCR service with XML extraction
fn create_test_ocr_service(temp_dir: &str) -> OcrService { fn create_test_ocr_service(temp_dir: &str) -> OcrService {
let config = OcrConfig { let config = OcrConfig {
fallback_config: FallbackConfig {
enabled: true,
max_retries: 2,
initial_retry_delay_ms: 100,
max_retry_delay_ms: 1000,
xml_timeout_seconds: 60,
},
temp_dir: temp_dir.to_string(), temp_dir: temp_dir.to_string(),
}; };
@ -224,7 +233,6 @@ async fn test_extraction_modes() -> Result<()> {
// Test XML extraction with the simplified approach // Test XML extraction with the simplified approach
let ocr_config = OcrConfig { let ocr_config = OcrConfig {
fallback_config: FallbackConfig::default(),
temp_dir: temp_dir.clone(), temp_dir: temp_dir.clone(),
}; };
@ -250,15 +258,8 @@ async fn test_fallback_mechanism() -> Result<()> {
let test_docs = OfficeTestDocuments::new()?; let test_docs = OfficeTestDocuments::new()?;
let temp_dir = test_docs.temp_dir.path().to_string_lossy().to_string(); let temp_dir = test_docs.temp_dir.path().to_string_lossy().to_string();
// Create a service with XML-only mode (simplified) // Create a service with XML extraction
let config = OcrConfig { let config = OcrConfig {
fallback_config: FallbackConfig {
enabled: true,
max_retries: 1,
initial_retry_delay_ms: 50,
max_retry_delay_ms: 200,
xml_timeout_seconds: 30,
},
temp_dir, temp_dir,
}; };
@ -387,15 +388,8 @@ async fn test_concurrent_extraction() -> Result<()> {
async fn test_circuit_breaker() -> Result<()> { async fn test_circuit_breaker() -> Result<()> {
let test_docs = OfficeTestDocuments::new()?; let test_docs = OfficeTestDocuments::new()?;
// Create service with simple retry settings (circuit breaker functionality removed) // Create service with XML extraction
let config = OcrConfig { let config = OcrConfig {
fallback_config: FallbackConfig {
enabled: true,
max_retries: 0, // No retries to make failures immediate
initial_retry_delay_ms: 10,
max_retry_delay_ms: 100,
xml_timeout_seconds: 30,
},
temp_dir: test_docs.temp_dir.path().to_string_lossy().to_string(), temp_dir: test_docs.temp_dir.path().to_string_lossy().to_string(),
}; };
@ -442,13 +436,7 @@ async fn test_statistics_tracking() -> Result<()> {
let test_docs = OfficeTestDocuments::new()?; let test_docs = OfficeTestDocuments::new()?;
let ocr_service = create_test_ocr_service(test_docs.temp_dir.path().to_string_lossy().as_ref()); let ocr_service = create_test_ocr_service(test_docs.temp_dir.path().to_string_lossy().as_ref());
// Reset stats // Perform some extractions to verify functionality
ocr_service.reset_fallback_stats().await?;
let initial_stats = ocr_service.get_fallback_stats().await.unwrap();
assert_eq!(initial_stats.total_extractions, 0);
// Perform some extractions
let valid_path = test_docs.create_mock_docx("stats_test.docx", "Statistics test document")?; let valid_path = test_docs.create_mock_docx("stats_test.docx", "Statistics test document")?;
for i in 0..3 { for i in 0..3 {
@ -462,13 +450,10 @@ async fn test_statistics_tracking() -> Result<()> {
assert!(!ocr_result.text.is_empty()); assert!(!ocr_result.text.is_empty());
assert!(ocr_result.confidence > 0.0); assert!(ocr_result.confidence > 0.0);
assert!(ocr_result.word_count > 0); assert!(ocr_result.word_count > 0);
assert!(ocr_result.processing_time_ms > 0);
} }
// Check updated stats // All extractions succeeded, indicating the XML extraction is working correctly
let final_stats = ocr_service.get_fallback_stats().await.unwrap();
assert_eq!(final_stats.total_extractions, 3);
assert!(final_stats.success_rate_percentage > 0.0);
assert!(final_stats.average_processing_time_ms > 0.0);
Ok(()) Ok(())
} }
@ -495,15 +480,8 @@ async fn test_mime_type_support() -> Result<()> {
async fn test_learning_mechanism() -> Result<()> { async fn test_learning_mechanism() -> Result<()> {
let test_docs = OfficeTestDocuments::new()?; let test_docs = OfficeTestDocuments::new()?;
// Create service with simple XML extraction (learning functionality removed) // Create service with XML extraction
let config = OcrConfig { let config = OcrConfig {
fallback_config: FallbackConfig {
enabled: true,
max_retries: 1,
initial_retry_delay_ms: 10,
max_retry_delay_ms: 100,
xml_timeout_seconds: 30,
},
temp_dir: test_docs.temp_dir.path().to_string_lossy().to_string(), temp_dir: test_docs.temp_dir.path().to_string_lossy().to_string(),
}; };

View File

@ -72,6 +72,9 @@ fn create_empty_update_settings() -> UpdateSettings {
webdav_file_extensions: None, webdav_file_extensions: None,
webdav_auto_sync: None, webdav_auto_sync: None,
webdav_sync_interval_minutes: None, webdav_sync_interval_minutes: None,
// Office document extraction configuration
office_extraction_timeout_seconds: None,
office_extraction_enable_detailed_logging: None,
} }
} }
@ -215,6 +218,9 @@ async fn setup_webdav_settings(state: &AppState, user_id: Uuid) {
ocr_quality_threshold_noise: None, ocr_quality_threshold_noise: None,
ocr_quality_threshold_sharpness: None, ocr_quality_threshold_sharpness: None,
ocr_skip_enhancement: None, ocr_skip_enhancement: None,
// Office document extraction configuration
office_extraction_timeout_seconds: None,
office_extraction_enable_detailed_logging: None,
}; };
state.db.create_or_update_settings(user_id, &update_settings).await state.db.create_or_update_settings(user_id, &update_settings).await