fix(server): resolve compilation warnings and fix test that expects no pass, to have it actually expect pass
This commit is contained in:
parent
1b7fbed90d
commit
43b679f59b
|
|
@ -0,0 +1,2 @@
|
||||||
|
> [!WARNING]
|
||||||
|
> The external dependencies `catdoc` and `antiword` have been added to support consumption of `.doc` documents.
|
||||||
|
|
@ -195,7 +195,7 @@ impl Database {
|
||||||
("ocr_text", document.ocr_text.as_deref().unwrap_or(""))
|
("ocr_text", document.ocr_text.as_deref().unwrap_or(""))
|
||||||
];
|
];
|
||||||
|
|
||||||
for (source, text) in texts {
|
for (_source, text) in texts {
|
||||||
if text.is_empty() {
|
if text.is_empty() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ use std::collections::HashMap;
|
||||||
use super::Database;
|
use super::Database;
|
||||||
use crate::models::{
|
use crate::models::{
|
||||||
CreateSourceScanFailure, SourceScanFailure, SourceScanFailureStats,
|
CreateSourceScanFailure, SourceScanFailure, SourceScanFailureStats,
|
||||||
ErrorSourceType, SourceErrorType, SourceErrorSeverity, ListFailuresQuery,
|
ErrorSourceType, ListFailuresQuery,
|
||||||
};
|
};
|
||||||
|
|
||||||
impl Database {
|
impl Database {
|
||||||
|
|
@ -59,22 +59,22 @@ impl Database {
|
||||||
let mut bind_index = 2;
|
let mut bind_index = 2;
|
||||||
let mut conditions = Vec::new();
|
let mut conditions = Vec::new();
|
||||||
|
|
||||||
if let Some(source_type) = &query.source_type {
|
if let Some(_source_type) = &query.source_type {
|
||||||
conditions.push(format!("source_type = ${}::source_error_source_type", bind_index));
|
conditions.push(format!("source_type = ${}::source_error_source_type", bind_index));
|
||||||
bind_index += 1;
|
bind_index += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(source_id) = &query.source_id {
|
if let Some(_source_id) = &query.source_id {
|
||||||
conditions.push(format!("source_id = ${}", bind_index));
|
conditions.push(format!("source_id = ${}", bind_index));
|
||||||
bind_index += 1;
|
bind_index += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(error_type) = &query.error_type {
|
if let Some(_error_type) = &query.error_type {
|
||||||
conditions.push(format!("error_type = ${}::source_error_type", bind_index));
|
conditions.push(format!("error_type = ${}::source_error_type", bind_index));
|
||||||
bind_index += 1;
|
bind_index += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(severity) = &query.severity {
|
if let Some(_severity) = &query.severity {
|
||||||
conditions.push(format!("error_severity = ${}::source_error_severity", bind_index));
|
conditions.push(format!("error_severity = ${}::source_error_severity", bind_index));
|
||||||
bind_index += 1;
|
bind_index += 1;
|
||||||
}
|
}
|
||||||
|
|
@ -104,12 +104,12 @@ impl Database {
|
||||||
|
|
||||||
sql.push_str(" ORDER BY error_severity DESC, last_failure_at DESC");
|
sql.push_str(" ORDER BY error_severity DESC, last_failure_at DESC");
|
||||||
|
|
||||||
if let Some(limit) = query.limit {
|
if let Some(_limit) = query.limit {
|
||||||
sql.push_str(&format!(" LIMIT ${}", bind_index));
|
sql.push_str(&format!(" LIMIT ${}", bind_index));
|
||||||
bind_index += 1;
|
bind_index += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(offset) = query.offset {
|
if let Some(_offset) = query.offset {
|
||||||
sql.push_str(&format!(" OFFSET ${}", bind_index));
|
sql.push_str(&format!(" OFFSET ${}", bind_index));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -361,7 +361,7 @@ impl Database {
|
||||||
WHERE user_id = $1"#
|
WHERE user_id = $1"#
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut bind_index = 2;
|
let bind_index = 2;
|
||||||
if let Some(_) = source_type {
|
if let Some(_) = source_type {
|
||||||
sql.push_str(&format!(" AND source_type = ${}::source_error_source_type", bind_index));
|
sql.push_str(&format!(" AND source_type = ${}::source_error_source_type", bind_index));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,4 @@
|
||||||
use anyhow::Result;
|
// Unused imports removed - anyhow::Result and std::collections::HashMap are not used in this file
|
||||||
use std::collections::HashMap;
|
|
||||||
|
|
||||||
use crate::models::{
|
use crate::models::{
|
||||||
ErrorSourceType, SourceErrorType, SourceErrorSeverity, SourceErrorClassifier,
|
ErrorSourceType, SourceErrorType, SourceErrorSeverity, SourceErrorClassifier,
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,4 @@
|
||||||
use anyhow::Result;
|
// Unused imports removed - anyhow::Result and std::collections::HashMap are not used in this file
|
||||||
use std::collections::HashMap;
|
|
||||||
|
|
||||||
use crate::models::{
|
use crate::models::{
|
||||||
ErrorSourceType, SourceErrorType, SourceErrorSeverity, SourceErrorClassifier,
|
ErrorSourceType, SourceErrorType, SourceErrorSeverity, SourceErrorClassifier,
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,11 @@
|
||||||
use anyhow::{anyhow, Result};
|
use anyhow::{anyhow, Result};
|
||||||
use reqwest::{Client, Method, Response};
|
use reqwest::{Client, Method};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::{Duration, Instant};
|
use std::time::{Duration, Instant};
|
||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::HashSet;
|
||||||
use tokio::sync::Semaphore;
|
use tokio::sync::Semaphore;
|
||||||
use tokio::time::sleep;
|
use tokio::time::sleep;
|
||||||
use futures_util::stream;
|
// futures_util::stream import removed as unused
|
||||||
use tracing::{debug, error, info, warn};
|
use tracing::{debug, error, info, warn};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use rand::Rng;
|
use rand::Rng;
|
||||||
|
|
@ -15,12 +15,11 @@ use crate::models::{
|
||||||
};
|
};
|
||||||
use crate::models::source::{
|
use crate::models::source::{
|
||||||
WebDAVConnectionResult, WebDAVCrawlEstimate, WebDAVTestConnection,
|
WebDAVConnectionResult, WebDAVCrawlEstimate, WebDAVTestConnection,
|
||||||
WebDAVFolderInfo,
|
|
||||||
};
|
};
|
||||||
use crate::models::source_error::{ErrorSourceType, ErrorContext};
|
use crate::models::source_error::{ErrorSourceType, ErrorContext};
|
||||||
use crate::services::source_error_tracker::SourceErrorTracker;
|
use crate::services::source_error_tracker::SourceErrorTracker;
|
||||||
use crate::webdav_xml_parser::{parse_propfind_response, parse_propfind_response_with_directories};
|
use crate::webdav_xml_parser::{parse_propfind_response, parse_propfind_response_with_directories};
|
||||||
use crate::mime_detection::{detect_mime_from_content, update_mime_type_with_content, MimeDetectionResult};
|
use crate::mime_detection::{detect_mime_from_content, MimeDetectionResult};
|
||||||
|
|
||||||
use super::{config::{WebDAVConfig, RetryConfig, ConcurrencyConfig}, SyncProgress};
|
use super::{config::{WebDAVConfig, RetryConfig, ConcurrencyConfig}, SyncProgress};
|
||||||
use super::common::build_user_agent;
|
use super::common::build_user_agent;
|
||||||
|
|
|
||||||
|
|
@ -328,12 +328,13 @@ async fn test_corrupted_docx() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_legacy_doc_error() {
|
async fn test_legacy_doc_extraction() {
|
||||||
let temp_dir = TempDir::new().unwrap();
|
let temp_dir = TempDir::new().unwrap();
|
||||||
let doc_path = temp_dir.path().join("legacy.doc");
|
let doc_path = temp_dir.path().join("legacy.doc");
|
||||||
|
|
||||||
// Create a fake DOC file
|
// Create a simple text file with .doc extension to test DOC processing
|
||||||
fs::write(&doc_path, b"Legacy DOC format").unwrap();
|
// catdoc will process this as text, which is expected behavior
|
||||||
|
fs::write(&doc_path, b"This is test content for DOC extraction").unwrap();
|
||||||
|
|
||||||
// Create OCR service
|
// Create OCR service
|
||||||
let ocr_service = EnhancedOcrService {
|
let ocr_service = EnhancedOcrService {
|
||||||
|
|
@ -343,19 +344,81 @@ async fn test_legacy_doc_error() {
|
||||||
|
|
||||||
let settings = Settings::default();
|
let settings = Settings::default();
|
||||||
|
|
||||||
// Try to extract text from legacy DOC
|
// Try to extract text from DOC file
|
||||||
let result = ocr_service.extract_text_from_office(
|
let result = ocr_service.extract_text_from_office(
|
||||||
doc_path.to_str().unwrap(),
|
doc_path.to_str().unwrap(),
|
||||||
"application/msword",
|
"application/msword",
|
||||||
&settings
|
&settings
|
||||||
).await;
|
).await;
|
||||||
|
|
||||||
// Should fail with helpful error about external tools not available
|
// DOC processing should succeed when external tools are available
|
||||||
assert!(result.is_err(), "Legacy DOC should return an error");
|
assert!(result.is_ok(), "DOC extraction should succeed when tools are available");
|
||||||
let error_msg = result.unwrap_err().to_string();
|
let ocr_result = result.unwrap();
|
||||||
// The error message now comes from external tool extraction failure
|
|
||||||
assert!(error_msg.contains("DOC extraction tools") || error_msg.contains("antiword") || error_msg.contains("catdoc"),
|
// Verify the extraction results
|
||||||
"Expected error about DOC extraction tools, got: {}", error_msg);
|
assert!(ocr_result.word_count > 0, "Should have extracted some words");
|
||||||
|
assert!(ocr_result.text.contains("test content"), "Should contain the test text");
|
||||||
|
assert!(ocr_result.confidence > 0.0, "Should have confidence score");
|
||||||
|
assert!(ocr_result.preprocessing_applied.len() > 0, "Should have preprocessing steps recorded");
|
||||||
|
|
||||||
|
// Verify it used an external DOC tool
|
||||||
|
let preprocessing_info = &ocr_result.preprocessing_applied[0];
|
||||||
|
assert!(
|
||||||
|
preprocessing_info.contains("catdoc") ||
|
||||||
|
preprocessing_info.contains("antiword") ||
|
||||||
|
preprocessing_info.contains("wvText"),
|
||||||
|
"Should indicate which DOC tool was used"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_legacy_doc_error_when_tools_unavailable() {
|
||||||
|
// This test documents the expected behavior when DOC extraction tools are not available.
|
||||||
|
// Since antiword and catdoc are available in the current test environment, this test
|
||||||
|
// would need to be run in an environment without these tools to actually fail.
|
||||||
|
// For now, this serves as documentation of the expected error message format.
|
||||||
|
|
||||||
|
let temp_dir = TempDir::new().unwrap();
|
||||||
|
let doc_path = temp_dir.path().join("test.doc");
|
||||||
|
|
||||||
|
// Create a test DOC file
|
||||||
|
fs::write(&doc_path, b"Test DOC content").unwrap();
|
||||||
|
|
||||||
|
// Create OCR service
|
||||||
|
let ocr_service = EnhancedOcrService {
|
||||||
|
temp_dir: temp_dir.path().to_str().unwrap().to_string(),
|
||||||
|
file_service: FileService::new(temp_dir.path().to_str().unwrap().to_string()),
|
||||||
|
};
|
||||||
|
|
||||||
|
let settings = Settings::default();
|
||||||
|
|
||||||
|
// Try to extract text from DOC file
|
||||||
|
let result = ocr_service.extract_text_from_office(
|
||||||
|
doc_path.to_str().unwrap(),
|
||||||
|
"application/msword",
|
||||||
|
&settings
|
||||||
|
).await;
|
||||||
|
|
||||||
|
// Since tools are available in this environment, this should succeed
|
||||||
|
// In an environment without DOC tools, it would fail with a helpful error message like:
|
||||||
|
// "None of the DOC extraction tools (antiword, catdoc, wvText) are available or working."
|
||||||
|
match result {
|
||||||
|
Ok(ocr_result) => {
|
||||||
|
// Tools are available - verify successful extraction
|
||||||
|
assert!(ocr_result.word_count > 0, "Should extract text when tools are available");
|
||||||
|
println!("DOC tools are available, extraction succeeded with {} words", ocr_result.word_count);
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
// Tools are not available - verify proper error message
|
||||||
|
let error_msg = error.to_string();
|
||||||
|
assert!(
|
||||||
|
error_msg.contains("DOC extraction tools") &&
|
||||||
|
(error_msg.contains("antiword") || error_msg.contains("catdoc") || error_msg.contains("wvText")),
|
||||||
|
"Should provide helpful error about missing DOC tools, got: {}", error_msg
|
||||||
|
);
|
||||||
|
println!("DOC tools not available, got expected error: {}", error_msg);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue