fix(server): resolve compilation warnings and fix test that expects no pass, to have it actually expect pass

This commit is contained in:
perf3ct 2025-09-02 22:51:17 +00:00
parent 1b7fbed90d
commit 43b679f59b
No known key found for this signature in database
GPG Key ID: 569C4EEC436F5232
7 changed files with 90 additions and 28 deletions

2
docs/v2.6.0 Normal file
View File

@ -0,0 +1,2 @@
> [!WARNING]
> The external dependencies `catdoc` and `antiword` have been added to support consumption of `.doc` documents.

View File

@ -195,7 +195,7 @@ impl Database {
("ocr_text", document.ocr_text.as_deref().unwrap_or("")) ("ocr_text", document.ocr_text.as_deref().unwrap_or(""))
]; ];
for (source, text) in texts { for (_source, text) in texts {
if text.is_empty() { if text.is_empty() {
continue; continue;
} }

View File

@ -6,7 +6,7 @@ use std::collections::HashMap;
use super::Database; use super::Database;
use crate::models::{ use crate::models::{
CreateSourceScanFailure, SourceScanFailure, SourceScanFailureStats, CreateSourceScanFailure, SourceScanFailure, SourceScanFailureStats,
ErrorSourceType, SourceErrorType, SourceErrorSeverity, ListFailuresQuery, ErrorSourceType, ListFailuresQuery,
}; };
impl Database { impl Database {
@ -59,22 +59,22 @@ impl Database {
let mut bind_index = 2; let mut bind_index = 2;
let mut conditions = Vec::new(); let mut conditions = Vec::new();
if let Some(source_type) = &query.source_type { if let Some(_source_type) = &query.source_type {
conditions.push(format!("source_type = ${}::source_error_source_type", bind_index)); conditions.push(format!("source_type = ${}::source_error_source_type", bind_index));
bind_index += 1; bind_index += 1;
} }
if let Some(source_id) = &query.source_id { if let Some(_source_id) = &query.source_id {
conditions.push(format!("source_id = ${}", bind_index)); conditions.push(format!("source_id = ${}", bind_index));
bind_index += 1; bind_index += 1;
} }
if let Some(error_type) = &query.error_type { if let Some(_error_type) = &query.error_type {
conditions.push(format!("error_type = ${}::source_error_type", bind_index)); conditions.push(format!("error_type = ${}::source_error_type", bind_index));
bind_index += 1; bind_index += 1;
} }
if let Some(severity) = &query.severity { if let Some(_severity) = &query.severity {
conditions.push(format!("error_severity = ${}::source_error_severity", bind_index)); conditions.push(format!("error_severity = ${}::source_error_severity", bind_index));
bind_index += 1; bind_index += 1;
} }
@ -104,12 +104,12 @@ impl Database {
sql.push_str(" ORDER BY error_severity DESC, last_failure_at DESC"); sql.push_str(" ORDER BY error_severity DESC, last_failure_at DESC");
if let Some(limit) = query.limit { if let Some(_limit) = query.limit {
sql.push_str(&format!(" LIMIT ${}", bind_index)); sql.push_str(&format!(" LIMIT ${}", bind_index));
bind_index += 1; bind_index += 1;
} }
if let Some(offset) = query.offset { if let Some(_offset) = query.offset {
sql.push_str(&format!(" OFFSET ${}", bind_index)); sql.push_str(&format!(" OFFSET ${}", bind_index));
} }
@ -361,7 +361,7 @@ impl Database {
WHERE user_id = $1"# WHERE user_id = $1"#
); );
let mut bind_index = 2; let bind_index = 2;
if let Some(_) = source_type { if let Some(_) = source_type {
sql.push_str(&format!(" AND source_type = ${}::source_error_source_type", bind_index)); sql.push_str(&format!(" AND source_type = ${}::source_error_source_type", bind_index));
} }

View File

@ -1,5 +1,4 @@
use anyhow::Result; // Unused imports removed - anyhow::Result and std::collections::HashMap are not used in this file
use std::collections::HashMap;
use crate::models::{ use crate::models::{
ErrorSourceType, SourceErrorType, SourceErrorSeverity, SourceErrorClassifier, ErrorSourceType, SourceErrorType, SourceErrorSeverity, SourceErrorClassifier,

View File

@ -1,5 +1,4 @@
use anyhow::Result; // Unused imports removed - anyhow::Result and std::collections::HashMap are not used in this file
use std::collections::HashMap;
use crate::models::{ use crate::models::{
ErrorSourceType, SourceErrorType, SourceErrorSeverity, SourceErrorClassifier, ErrorSourceType, SourceErrorType, SourceErrorSeverity, SourceErrorClassifier,

View File

@ -1,11 +1,11 @@
use anyhow::{anyhow, Result}; use anyhow::{anyhow, Result};
use reqwest::{Client, Method, Response}; use reqwest::{Client, Method};
use std::sync::Arc; use std::sync::Arc;
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
use std::collections::{HashMap, HashSet}; use std::collections::HashSet;
use tokio::sync::Semaphore; use tokio::sync::Semaphore;
use tokio::time::sleep; use tokio::time::sleep;
use futures_util::stream; // futures_util::stream import removed as unused
use tracing::{debug, error, info, warn}; use tracing::{debug, error, info, warn};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use rand::Rng; use rand::Rng;
@ -15,12 +15,11 @@ use crate::models::{
}; };
use crate::models::source::{ use crate::models::source::{
WebDAVConnectionResult, WebDAVCrawlEstimate, WebDAVTestConnection, WebDAVConnectionResult, WebDAVCrawlEstimate, WebDAVTestConnection,
WebDAVFolderInfo,
}; };
use crate::models::source_error::{ErrorSourceType, ErrorContext}; use crate::models::source_error::{ErrorSourceType, ErrorContext};
use crate::services::source_error_tracker::SourceErrorTracker; use crate::services::source_error_tracker::SourceErrorTracker;
use crate::webdav_xml_parser::{parse_propfind_response, parse_propfind_response_with_directories}; use crate::webdav_xml_parser::{parse_propfind_response, parse_propfind_response_with_directories};
use crate::mime_detection::{detect_mime_from_content, update_mime_type_with_content, MimeDetectionResult}; use crate::mime_detection::{detect_mime_from_content, MimeDetectionResult};
use super::{config::{WebDAVConfig, RetryConfig, ConcurrencyConfig}, SyncProgress}; use super::{config::{WebDAVConfig, RetryConfig, ConcurrencyConfig}, SyncProgress};
use super::common::build_user_agent; use super::common::build_user_agent;

View File

@ -328,12 +328,13 @@ async fn test_corrupted_docx() {
} }
#[tokio::test] #[tokio::test]
async fn test_legacy_doc_error() { async fn test_legacy_doc_extraction() {
let temp_dir = TempDir::new().unwrap(); let temp_dir = TempDir::new().unwrap();
let doc_path = temp_dir.path().join("legacy.doc"); let doc_path = temp_dir.path().join("legacy.doc");
// Create a fake DOC file // Create a simple text file with .doc extension to test DOC processing
fs::write(&doc_path, b"Legacy DOC format").unwrap(); // catdoc will process this as text, which is expected behavior
fs::write(&doc_path, b"This is test content for DOC extraction").unwrap();
// Create OCR service // Create OCR service
let ocr_service = EnhancedOcrService { let ocr_service = EnhancedOcrService {
@ -343,19 +344,81 @@ async fn test_legacy_doc_error() {
let settings = Settings::default(); let settings = Settings::default();
// Try to extract text from legacy DOC // Try to extract text from DOC file
let result = ocr_service.extract_text_from_office( let result = ocr_service.extract_text_from_office(
doc_path.to_str().unwrap(), doc_path.to_str().unwrap(),
"application/msword", "application/msword",
&settings &settings
).await; ).await;
// Should fail with helpful error about external tools not available // DOC processing should succeed when external tools are available
assert!(result.is_err(), "Legacy DOC should return an error"); assert!(result.is_ok(), "DOC extraction should succeed when tools are available");
let error_msg = result.unwrap_err().to_string(); let ocr_result = result.unwrap();
// The error message now comes from external tool extraction failure
assert!(error_msg.contains("DOC extraction tools") || error_msg.contains("antiword") || error_msg.contains("catdoc"), // Verify the extraction results
"Expected error about DOC extraction tools, got: {}", error_msg); assert!(ocr_result.word_count > 0, "Should have extracted some words");
assert!(ocr_result.text.contains("test content"), "Should contain the test text");
assert!(ocr_result.confidence > 0.0, "Should have confidence score");
assert!(ocr_result.preprocessing_applied.len() > 0, "Should have preprocessing steps recorded");
// Verify it used an external DOC tool
let preprocessing_info = &ocr_result.preprocessing_applied[0];
assert!(
preprocessing_info.contains("catdoc") ||
preprocessing_info.contains("antiword") ||
preprocessing_info.contains("wvText"),
"Should indicate which DOC tool was used"
);
}
#[tokio::test]
async fn test_legacy_doc_error_when_tools_unavailable() {
// This test documents the expected behavior when DOC extraction tools are not available.
// Since antiword and catdoc are available in the current test environment, this test
// would need to be run in an environment without these tools to actually fail.
// For now, this serves as documentation of the expected error message format.
let temp_dir = TempDir::new().unwrap();
let doc_path = temp_dir.path().join("test.doc");
// Create a test DOC file
fs::write(&doc_path, b"Test DOC content").unwrap();
// Create OCR service
let ocr_service = EnhancedOcrService {
temp_dir: temp_dir.path().to_str().unwrap().to_string(),
file_service: FileService::new(temp_dir.path().to_str().unwrap().to_string()),
};
let settings = Settings::default();
// Try to extract text from DOC file
let result = ocr_service.extract_text_from_office(
doc_path.to_str().unwrap(),
"application/msword",
&settings
).await;
// Since tools are available in this environment, this should succeed
// In an environment without DOC tools, it would fail with a helpful error message like:
// "None of the DOC extraction tools (antiword, catdoc, wvText) are available or working."
match result {
Ok(ocr_result) => {
// Tools are available - verify successful extraction
assert!(ocr_result.word_count > 0, "Should extract text when tools are available");
println!("DOC tools are available, extraction succeeded with {} words", ocr_result.word_count);
}
Err(error) => {
// Tools are not available - verify proper error message
let error_msg = error.to_string();
assert!(
error_msg.contains("DOC extraction tools") &&
(error_msg.contains("antiword") || error_msg.contains("catdoc") || error_msg.contains("wvText")),
"Should provide helpful error about missing DOC tools, got: {}", error_msg
);
println!("DOC tools not available, got expected error: {}", error_msg);
}
}
} }
#[tokio::test] #[tokio::test]