fix(server): resolve type issues and functions for compilation issues

This commit is contained in:
perf3ct 2025-07-04 00:53:32 +00:00
parent 0e84993afa
commit 497b34ce0a
19 changed files with 226 additions and 155 deletions

View File

@ -10,6 +10,7 @@ use uuid::Uuid;
use sha2::{Digest, Sha256};
use tracing::{debug, info, warn};
use serde_json;
use chrono::Utc;
use crate::models::{Document, FileInfo};
use crate::db::Database;
@ -164,28 +165,34 @@ impl DocumentIngestionService {
warn!("Failed to save file {}: {}", request.filename, e);
// Create failed document record for storage failure
if let Err(failed_err) = self.db.create_failed_document(
request.user_id,
request.filename.clone(),
Some(request.original_filename.clone()),
None, // original_path
None, // file_path (couldn't save)
Some(file_size),
Some(file_hash.clone()),
Some(request.mime_type.clone()),
None, // content
Vec::new(), // tags
None, // ocr_text
None, // ocr_confidence
None, // ocr_word_count
None, // ocr_processing_time_ms
"storage_error".to_string(),
"storage".to_string(),
None, // existing_document_id
request.source_type.unwrap_or_else(|| "upload".to_string()),
Some(e.to_string()),
None, // retry_count
).await {
let failed_document = crate::models::FailedDocument {
id: Uuid::new_v4(),
user_id: request.user_id,
filename: request.filename.clone(),
original_filename: Some(request.original_filename.clone()),
original_path: None,
file_path: None, // couldn't save
file_size: Some(file_size),
file_hash: Some(file_hash.clone()),
mime_type: Some(request.mime_type.clone()),
content: None,
tags: Vec::new(),
ocr_text: None,
ocr_confidence: None,
ocr_word_count: None,
ocr_processing_time_ms: None,
failure_reason: "storage_error".to_string(),
failure_stage: "storage".to_string(),
existing_document_id: None,
ingestion_source: request.source_type.unwrap_or_else(|| "upload".to_string()),
error_message: Some(e.to_string()),
retry_count: Some(0),
last_retry_at: None,
created_at: Utc::now(),
updated_at: Utc::now(),
};
if let Err(failed_err) = self.db.create_failed_document(failed_document).await {
warn!("Failed to create failed document record for storage error: {}", failed_err);
}
@ -238,28 +245,34 @@ impl DocumentIngestionService {
request.filename, &file_hash[..8], e);
// Create failed document record for database creation failure
if let Err(failed_err) = self.db.create_failed_document(
request.user_id,
request.filename.clone(),
Some(request.original_filename.clone()),
None, // original_path
Some(file_path.clone()), // file was saved successfully
Some(file_size),
Some(file_hash.clone()),
Some(request.mime_type.clone()),
None, // content
Vec::new(), // tags
None, // ocr_text
None, // ocr_confidence
None, // ocr_word_count
None, // ocr_processing_time_ms
"database_error".to_string(),
"ingestion".to_string(),
None, // existing_document_id
request.source_type.unwrap_or_else(|| "upload".to_string()),
Some(e.to_string()),
None, // retry_count
).await {
let failed_document = crate::models::FailedDocument {
id: Uuid::new_v4(),
user_id: request.user_id,
filename: request.filename.clone(),
original_filename: Some(request.original_filename.clone()),
original_path: None,
file_path: Some(file_path.clone()), // file was saved successfully
file_size: Some(file_size),
file_hash: Some(file_hash.clone()),
mime_type: Some(request.mime_type.clone()),
content: None,
tags: Vec::new(),
ocr_text: None,
ocr_confidence: None,
ocr_word_count: None,
ocr_processing_time_ms: None,
failure_reason: "database_error".to_string(),
failure_stage: "ingestion".to_string(),
existing_document_id: None,
ingestion_source: request.source_type.unwrap_or_else(|| "upload".to_string()),
error_message: Some(e.to_string()),
retry_count: Some(0),
last_retry_at: None,
created_at: Utc::now(),
updated_at: Utc::now(),
};
if let Err(failed_err) = self.db.create_failed_document(failed_document).await {
warn!("Failed to create failed document record for database error: {}", failed_err);
}

View File

@ -837,28 +837,34 @@ impl OcrQueueService {
let file_hash: Option<String> = row.get("file_hash");
// Create failed document record directly
if let Err(e) = self.db.create_failed_document(
let failed_document = crate::models::FailedDocument {
id: Uuid::new_v4(),
user_id,
filename,
Some(original_filename),
None, // original_path
Some(file_path),
Some(file_size),
original_filename: Some(original_filename),
original_path: None,
file_path: Some(file_path),
file_size: Some(file_size),
file_hash,
Some(mime_type),
None, // content
Vec::new(), // tags
None, // ocr_text
None, // ocr_confidence
None, // ocr_word_count
None, // ocr_processing_time_ms
failure_reason.to_string(),
"ocr".to_string(),
None, // existing_document_id
"ocr_queue".to_string(),
Some(error_message.to_string()),
Some(retry_count),
).await {
mime_type: Some(mime_type),
content: None,
tags: Vec::new(),
ocr_text: None,
ocr_confidence: None,
ocr_word_count: None,
ocr_processing_time_ms: None,
failure_reason: failure_reason.to_string(),
failure_stage: "ocr".to_string(),
existing_document_id: None,
ingestion_source: "ocr_queue".to_string(),
error_message: Some(error_message.to_string()),
retry_count: Some(retry_count),
last_retry_at: None,
created_at: Utc::now(),
updated_at: Utc::now(),
};
if let Err(e) = self.db.create_failed_document(failed_document).await {
error!("Failed to create failed document record: {}", e);
}
}

View File

@ -75,21 +75,28 @@ pub async fn upload_document(
info!("Uploading document: {} ({} bytes)", filename, data.len());
// Create ingestion service
let file_service = FileService::new(state.config.clone());
let file_service = FileService::new(state.config.upload_path.clone());
let ingestion_service = DocumentIngestionService::new(
state.db.clone(),
file_service,
state.config.clone(),
);
match ingestion_service.ingest_document(
data,
&filename,
&content_type,
auth_user.user.id,
"web_upload".to_string(),
).await {
Ok(IngestionResult::Success(document)) => {
let request = crate::ingestion::document_ingestion::DocumentIngestionRequest {
file_data: data,
filename: filename.clone(),
original_filename: filename,
mime_type: content_type,
user_id: auth_user.user.id,
source_type: Some("web_upload".to_string()),
source_id: None,
deduplication_policy: crate::ingestion::document_ingestion::DeduplicationPolicy::Skip,
original_created_at: None,
original_modified_at: None,
source_metadata: None,
};
match ingestion_service.ingest_document(request).await {
Ok(IngestionResult::Created(document)) => {
info!("Document uploaded successfully: {}", document.id);
Ok(Json(DocumentUploadResponse {
document_id: document.id,
@ -100,7 +107,7 @@ pub async fn upload_document(
message: "Document uploaded successfully".to_string(),
}))
}
Ok(IngestionResult::Duplicate(existing_doc)) => {
Ok(IngestionResult::ExistingDocument(existing_doc)) => {
warn!("Duplicate document upload attempted: {}", existing_doc.id);
Ok(Json(DocumentUploadResponse {
document_id: existing_doc.id,
@ -111,9 +118,13 @@ pub async fn upload_document(
message: "Document already exists".to_string(),
}))
}
Ok(IngestionResult::Failed(failed_doc)) => {
error!("Document ingestion failed: {}", failed_doc.error_message.as_deref().unwrap_or("Unknown error"));
Err(StatusCode::UNPROCESSABLE_ENTITY)
Ok(IngestionResult::Skipped { existing_document_id, reason }) => {
info!("Document upload skipped - {}: {}", reason, existing_document_id);
Err(StatusCode::CONFLICT)
}
Ok(IngestionResult::TrackedAsDuplicate { existing_document_id }) => {
info!("Document tracked as duplicate: {}", existing_document_id);
Err(StatusCode::CONFLICT)
}
Err(e) => {
error!("Failed to ingest document: {}", e);
@ -303,7 +314,7 @@ pub async fn delete_document(
}
// Delete associated files
let file_service = FileService::new(state.config.clone());
let file_service = FileService::new(state.config.upload_path.clone());
if let Err(e) = file_service.delete_document_files(&document).await {
warn!("Failed to delete files for document {}: {}", document_id, e);
// Continue anyway - database deletion succeeded
@ -346,9 +357,9 @@ pub async fn download_document(
})?
.ok_or(StatusCode::NOT_FOUND)?;
let file_service = FileService::new(state.config.clone());
let file_service = FileService::new(state.config.upload_path.clone());
let file_data = file_service
.read_document_file(&document)
.read_file(&document.file_path)
.await
.map_err(|e| {
error!("Failed to read document file {}: {}", document_id, e);
@ -403,9 +414,9 @@ pub async fn view_document(
})?
.ok_or(StatusCode::NOT_FOUND)?;
let file_service = FileService::new(state.config.clone());
let file_service = FileService::new(state.config.upload_path.clone());
let file_data = file_service
.read_document_file(&document)
.read_file(&document.file_path)
.await
.map_err(|e| {
error!("Failed to read document file {}: {}", document_id, e);

View File

@ -7,6 +7,7 @@ use axum::{
use std::sync::Arc;
use tracing::{debug, error, info, warn};
use std::collections::HashMap;
use sqlx::Row;
use crate::{
auth::AuthUser,

View File

@ -106,7 +106,7 @@ pub async fn retry_ocr(
}
// Add to OCR queue
match state.ocr_queue.enqueue_document(document.id, auth_user.user.id, 1).await {
match state.queue_service.enqueue_document(document.id, auth_user.user.id, 1).await {
Ok(_) => {
info!("Document {} queued for OCR retry", document_id);
Ok(Json(serde_json::json!({
@ -187,7 +187,7 @@ pub async fn cancel_ocr(
.ok_or(StatusCode::NOT_FOUND)?;
// Try to remove from queue
match state.ocr_queue.remove_from_queue(document_id).await {
match state.queue_service.remove_from_queue(document_id).await {
Ok(removed) => {
if removed {
info!("Document {} removed from OCR queue", document_id);

View File

@ -1,14 +1,14 @@
use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use utoipa::{ToSchema, IntoParams};
#[derive(Deserialize, ToSchema)]
#[derive(Deserialize, ToSchema, IntoParams)]
pub struct PaginationQuery {
pub limit: Option<i64>,
pub offset: Option<i64>,
pub ocr_status: Option<String>,
}
#[derive(Deserialize, ToSchema)]
#[derive(Deserialize, ToSchema, IntoParams)]
pub struct FailedDocumentsQuery {
pub limit: Option<i64>,
pub offset: Option<i64>,

View File

@ -98,11 +98,15 @@ async fn enhanced_search_documents(
// Generate suggestions before moving search_request
let suggestions = generate_search_suggestions(&search_request.query);
let (documents, total, query_time) = state
let start_time = std::time::Instant::now();
let documents = state
.db
.enhanced_search_documents_with_role(auth_user.user.id, auth_user.user.role, search_request)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let query_time = start_time.elapsed().as_millis() as u64;
let total = documents.len() as u64;
let response = SearchResponse {
documents,
@ -173,14 +177,8 @@ async fn get_search_facets(
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let response = SearchFacetsResponse {
mime_types: mime_type_facets
.into_iter()
.map(|(value, count)| FacetItem { value, count })
.collect(),
tags: tag_facets
.into_iter()
.map(|(value, count)| FacetItem { value, count })
.collect(),
mime_types: mime_type_facets,
tags: tag_facets,
};
Ok(Json(response))

View File

@ -43,7 +43,7 @@ pub async fn list_sources(
// Get document counts for all sources in one query
let counts = state
.db
.count_documents_for_sources(&source_ids)
.count_documents_for_sources(auth_user.user.id, &source_ids)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
@ -145,14 +145,14 @@ pub async fn get_source(
// Get recent documents for this source
let recent_documents = state
.db
.get_recent_documents_for_source(source_id, 10)
.get_recent_documents_for_source(auth_user.user.id, source_id, 10)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
// Get document counts
let (total_documents, total_documents_ocr) = state
.db
.count_documents_for_source(source_id)
.count_documents_for_source(auth_user.user.id, source_id)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
@ -237,7 +237,7 @@ pub async fn update_source(
// Get document counts
let (total_documents, total_documents_ocr) = state
.db
.count_documents_for_source(source_id)
.count_documents_for_source(auth_user.user.id, source_id)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;

View File

@ -100,7 +100,7 @@ async fn estimate_webdav_crawl_internal(
// Create WebDAV service and estimate crawl
match crate::services::webdav::WebDAVService::new(webdav_config) {
Ok(webdav_service) => {
match webdav_service.estimate_crawl(&config.watch_folders).await {
match webdav_service.estimate_crawl().await {
Ok(estimate) => Ok(Json(serde_json::to_value(estimate).unwrap())),
Err(e) => Ok(Json(serde_json::json!({
"error": format!("Crawl estimation failed: {}", e),

View File

@ -271,7 +271,7 @@ pub async fn trigger_deep_scan(
let start_time = chrono::Utc::now();
// Use guaranteed completeness deep scan method
match webdav_service.deep_scan_with_guaranteed_completeness(user_id, &state_clone).await {
match webdav_service.discover_all_files().await {
Ok(all_discovered_files) => {
info!("Deep scan with guaranteed completeness discovered {} files", all_discovered_files.len());

View File

@ -57,16 +57,17 @@ pub async fn test_connection(
let config: crate::models::WebDAVSourceConfig = serde_json::from_value(source.config)
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
match crate::services::webdav::test_webdav_connection(
&config.server_url,
&config.username,
&config.password,
)
.await
{
Ok(success) => Ok(Json(serde_json::json!({
"success": success,
"message": if success { "Connection successful" } else { "Connection failed" }
let test_config = crate::models::WebDAVTestConnection {
server_url: config.server_url,
username: config.username,
password: config.password,
server_type: config.server_type,
};
match crate::services::webdav::test_webdav_connection(&test_config).await {
Ok(result) => Ok(Json(serde_json::json!({
"success": result.success,
"message": result.message
}))),
Err(e) => Ok(Json(serde_json::json!({
"success": false,
@ -152,16 +153,17 @@ pub async fn test_connection_with_config(
let config: crate::models::WebDAVSourceConfig = serde_json::from_value(request.config)
.map_err(|_| StatusCode::BAD_REQUEST)?;
match crate::services::webdav::test_webdav_connection(
&config.server_url,
&config.username,
&config.password,
)
.await
{
Ok(success) => Ok(Json(serde_json::json!({
"success": success,
"message": if success { "WebDAV connection successful" } else { "WebDAV connection failed" }
let test_config = crate::models::WebDAVTestConnection {
server_url: config.server_url,
username: config.username,
password: config.password,
server_type: config.server_type,
};
match crate::services::webdav::test_webdav_connection(&test_config).await {
Ok(result) => Ok(Json(serde_json::json!({
"success": result.success,
"message": result.message
}))),
Err(e) => Ok(Json(serde_json::json!({
"success": false,

View File

@ -106,7 +106,7 @@ async fn test_webdav_connection(
// Create WebDAV service and test connection
match WebDAVService::new(webdav_config) {
Ok(webdav_service) => {
match webdav_service.test_connection(test_config).await {
match WebDAVService::test_connection_with_config(test_config).await {
Ok(result) => {
info!("WebDAV connection test completed: {}", result.message);
Ok(Json(result))
@ -182,7 +182,7 @@ async fn estimate_webdav_crawl(
// Create WebDAV service and estimate crawl
match WebDAVService::new(webdav_config) {
Ok(webdav_service) => {
match webdav_service.estimate_crawl(&folders).await {
match webdav_service.estimate_crawl().await {
Ok(estimate) => {
info!("Crawl estimation completed: {} total files, {} supported files",
estimate.total_files, estimate.total_supported_files);

View File

@ -115,7 +115,7 @@ async fn perform_sync_internal(
}
// Discover files in the folder
match webdav_service.discover_files_in_folder(folder_path).await {
match webdav_service.discover_files_in_directory(folder_path, true).await {
Ok(files) => {
info!("Found {} files in folder {}", files.len(), folder_path);

View File

@ -678,7 +678,7 @@ impl SourceScheduler {
let source_clone = source.clone();
let state_clone = state.clone();
tokio::spawn(async move {
match webdav_service.deep_scan_with_guaranteed_completeness(source_clone.user_id, &state_clone).await {
match webdav_service.discover_all_files().await {
Ok(files) => {
info!("🎉 Automatic deep scan completed for {}: {} files found", source_clone.name, files.len());
@ -970,7 +970,7 @@ impl SourceScheduler {
server_type: config.server_type,
};
webdav_service.test_connection(test_config).await
crate::services::webdav::WebDAVService::test_connection_with_config(test_config).await
.map_err(|e| format!("Connection test failed: {}", e))?;
Ok(())

View File

@ -126,8 +126,8 @@ impl SourceSyncService {
let service = webdav_service.clone();
let state_clone = self.state.clone();
async move {
info!("🚀 Using optimized WebDAV discovery for: {}", folder_path);
let result = service.discover_files_in_folder_optimized(&folder_path, source.user_id, &state_clone).await;
info!("🚀 Using WebDAV discovery for: {}", folder_path);
let result = service.discover_files_in_directory(&folder_path, true).await;
match &result {
Ok(files) => {
if files.is_empty() {

View File

@ -7,6 +7,7 @@ use tracing::{debug, error, info, warn};
use crate::models::{WebDAVConnectionResult, WebDAVTestConnection};
use super::config::{WebDAVConfig, RetryConfig};
#[derive(Clone)]
pub struct WebDAVConnection {
client: Client,
config: WebDAVConfig,
@ -186,8 +187,7 @@ impl WebDAVConnection {
</D:propfind>"#;
let response = self.client
.request(Method::from_bytes(b"PROPFIND")?)
.url(&url)
.request(Method::from_bytes(b"PROPFIND")?, &url)
.basic_auth(&self.config.username, Some(&self.config.password))
.header("Depth", "1")
.header("Content-Type", "application/xml")

View File

@ -154,25 +154,27 @@ impl WebDAVDiscovery {
.await?;
let body = response.text().await?;
let (files, directories) = parse_propfind_response_with_directories(&body)?;
let all_items = parse_propfind_response_with_directories(&body)?;
// Filter files by supported extensions
let filtered_files: Vec<FileInfo> = files
.into_iter()
.filter(|file| self.config.is_supported_extension(&file.name))
.collect();
// Convert directory paths to full paths
let full_dir_paths: Vec<String> = directories
.into_iter()
.map(|dir| {
if directory_path == "/" {
format!("/{}", dir.trim_start_matches('/'))
// Separate files and directories
let mut filtered_files = Vec::new();
let mut subdirectory_paths = Vec::new();
for item in all_items {
if item.is_directory {
// Convert directory path to full path
let full_path = if directory_path == "/" {
format!("/{}", item.path.trim_start_matches('/'))
} else {
format!("{}/{}", directory_path.trim_end_matches('/'), dir.trim_start_matches('/'))
}
})
.collect();
format!("{}/{}", directory_path.trim_end_matches('/'), item.path.trim_start_matches('/'))
};
subdirectory_paths.push(full_path);
} else if self.config.is_supported_extension(&item.name) {
filtered_files.push(item);
}
}
let full_dir_paths = subdirectory_paths;
debug!("Directory '{}': {} files, {} subdirectories",
directory_path, filtered_files.len(), full_dir_paths.len());
@ -294,9 +296,16 @@ impl WebDAVDiscovery {
.await?;
let body = response.text().await?;
let (_, directories) = parse_propfind_response_with_directories(&body)?;
let all_items = parse_propfind_response_with_directories(&body)?;
Ok(directories)
// Filter out only directories and extract their paths
let directory_paths: Vec<String> = all_items
.into_iter()
.filter(|item| item.is_directory)
.map(|item| item.path)
.collect();
Ok(directory_paths)
}
/// Calculates the ratio of supported files in a sample

View File

@ -150,8 +150,39 @@ impl WebDAVService {
self.discovery.discover_files(directory_path, recursive).await
}
/// Downloads a file from WebDAV server
pub async fn download_file(&self, file_info: &FileInfo) -> Result<Vec<u8>> {
/// Downloads a file from WebDAV server by path
pub async fn download_file(&self, file_path: &str) -> Result<Vec<u8>> {
let _permit = self.download_semaphore.acquire().await?;
debug!("⬇️ Downloading file: {}", file_path);
let url = self.connection.get_url_for_path(file_path);
let response = self.connection
.authenticated_request(
reqwest::Method::GET,
&url,
None,
None,
)
.await?;
if !response.status().is_success() {
return Err(anyhow!(
"Failed to download file '{}': HTTP {}",
file_path,
response.status()
));
}
let content = response.bytes().await?;
debug!("✅ Downloaded {} bytes for file: {}", content.len(), file_path);
Ok(content.to_vec())
}
/// Downloads a file from WebDAV server using FileInfo
pub async fn download_file_info(&self, file_info: &FileInfo) -> Result<Vec<u8>> {
let _permit = self.download_semaphore.acquire().await?;
debug!("⬇️ Downloading file: {}", file_info.path);
@ -190,7 +221,7 @@ impl WebDAVService {
let service_clone = self.clone();
async move {
let result = service_clone.download_file(&file_clone).await;
let result = service_clone.download_file_info(&file_clone).await;
(file_clone, result)
}
});
@ -285,7 +316,7 @@ impl WebDAVService {
.map(|s| s.to_string());
Ok(ServerCapabilities {
dav_compliance: dav_header,
dav_compliance: dav_header.clone(),
allowed_methods: allow_header,
server_software: server_header,
supports_etag: dav_header.contains("1") || dav_header.contains("2"),

View File

@ -24,7 +24,7 @@ pub struct ValidationIssue {
pub detected_at: chrono::DateTime<chrono::Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[derive(Debug, Clone, Serialize, Deserialize, Eq, Hash, PartialEq)]
pub enum ValidationIssueType {
/// Directory exists on server but not in our tracking
Untracked,