fix(server): resolve type issues and functions for compilation issues

This commit is contained in:
perf3ct 2025-07-04 00:53:32 +00:00
parent 0e84993afa
commit 497b34ce0a
19 changed files with 226 additions and 155 deletions

View File

@ -10,6 +10,7 @@ use uuid::Uuid;
use sha2::{Digest, Sha256}; use sha2::{Digest, Sha256};
use tracing::{debug, info, warn}; use tracing::{debug, info, warn};
use serde_json; use serde_json;
use chrono::Utc;
use crate::models::{Document, FileInfo}; use crate::models::{Document, FileInfo};
use crate::db::Database; use crate::db::Database;
@ -164,28 +165,34 @@ impl DocumentIngestionService {
warn!("Failed to save file {}: {}", request.filename, e); warn!("Failed to save file {}: {}", request.filename, e);
// Create failed document record for storage failure // Create failed document record for storage failure
if let Err(failed_err) = self.db.create_failed_document( let failed_document = crate::models::FailedDocument {
request.user_id, id: Uuid::new_v4(),
request.filename.clone(), user_id: request.user_id,
Some(request.original_filename.clone()), filename: request.filename.clone(),
None, // original_path original_filename: Some(request.original_filename.clone()),
None, // file_path (couldn't save) original_path: None,
Some(file_size), file_path: None, // couldn't save
Some(file_hash.clone()), file_size: Some(file_size),
Some(request.mime_type.clone()), file_hash: Some(file_hash.clone()),
None, // content mime_type: Some(request.mime_type.clone()),
Vec::new(), // tags content: None,
None, // ocr_text tags: Vec::new(),
None, // ocr_confidence ocr_text: None,
None, // ocr_word_count ocr_confidence: None,
None, // ocr_processing_time_ms ocr_word_count: None,
"storage_error".to_string(), ocr_processing_time_ms: None,
"storage".to_string(), failure_reason: "storage_error".to_string(),
None, // existing_document_id failure_stage: "storage".to_string(),
request.source_type.unwrap_or_else(|| "upload".to_string()), existing_document_id: None,
Some(e.to_string()), ingestion_source: request.source_type.unwrap_or_else(|| "upload".to_string()),
None, // retry_count error_message: Some(e.to_string()),
).await { retry_count: Some(0),
last_retry_at: None,
created_at: Utc::now(),
updated_at: Utc::now(),
};
if let Err(failed_err) = self.db.create_failed_document(failed_document).await {
warn!("Failed to create failed document record for storage error: {}", failed_err); warn!("Failed to create failed document record for storage error: {}", failed_err);
} }
@ -238,28 +245,34 @@ impl DocumentIngestionService {
request.filename, &file_hash[..8], e); request.filename, &file_hash[..8], e);
// Create failed document record for database creation failure // Create failed document record for database creation failure
if let Err(failed_err) = self.db.create_failed_document( let failed_document = crate::models::FailedDocument {
request.user_id, id: Uuid::new_v4(),
request.filename.clone(), user_id: request.user_id,
Some(request.original_filename.clone()), filename: request.filename.clone(),
None, // original_path original_filename: Some(request.original_filename.clone()),
Some(file_path.clone()), // file was saved successfully original_path: None,
Some(file_size), file_path: Some(file_path.clone()), // file was saved successfully
Some(file_hash.clone()), file_size: Some(file_size),
Some(request.mime_type.clone()), file_hash: Some(file_hash.clone()),
None, // content mime_type: Some(request.mime_type.clone()),
Vec::new(), // tags content: None,
None, // ocr_text tags: Vec::new(),
None, // ocr_confidence ocr_text: None,
None, // ocr_word_count ocr_confidence: None,
None, // ocr_processing_time_ms ocr_word_count: None,
"database_error".to_string(), ocr_processing_time_ms: None,
"ingestion".to_string(), failure_reason: "database_error".to_string(),
None, // existing_document_id failure_stage: "ingestion".to_string(),
request.source_type.unwrap_or_else(|| "upload".to_string()), existing_document_id: None,
Some(e.to_string()), ingestion_source: request.source_type.unwrap_or_else(|| "upload".to_string()),
None, // retry_count error_message: Some(e.to_string()),
).await { retry_count: Some(0),
last_retry_at: None,
created_at: Utc::now(),
updated_at: Utc::now(),
};
if let Err(failed_err) = self.db.create_failed_document(failed_document).await {
warn!("Failed to create failed document record for database error: {}", failed_err); warn!("Failed to create failed document record for database error: {}", failed_err);
} }

View File

@ -837,28 +837,34 @@ impl OcrQueueService {
let file_hash: Option<String> = row.get("file_hash"); let file_hash: Option<String> = row.get("file_hash");
// Create failed document record directly // Create failed document record directly
if let Err(e) = self.db.create_failed_document( let failed_document = crate::models::FailedDocument {
id: Uuid::new_v4(),
user_id, user_id,
filename, filename,
Some(original_filename), original_filename: Some(original_filename),
None, // original_path original_path: None,
Some(file_path), file_path: Some(file_path),
Some(file_size), file_size: Some(file_size),
file_hash, file_hash,
Some(mime_type), mime_type: Some(mime_type),
None, // content content: None,
Vec::new(), // tags tags: Vec::new(),
None, // ocr_text ocr_text: None,
None, // ocr_confidence ocr_confidence: None,
None, // ocr_word_count ocr_word_count: None,
None, // ocr_processing_time_ms ocr_processing_time_ms: None,
failure_reason.to_string(), failure_reason: failure_reason.to_string(),
"ocr".to_string(), failure_stage: "ocr".to_string(),
None, // existing_document_id existing_document_id: None,
"ocr_queue".to_string(), ingestion_source: "ocr_queue".to_string(),
Some(error_message.to_string()), error_message: Some(error_message.to_string()),
Some(retry_count), retry_count: Some(retry_count),
).await { last_retry_at: None,
created_at: Utc::now(),
updated_at: Utc::now(),
};
if let Err(e) = self.db.create_failed_document(failed_document).await {
error!("Failed to create failed document record: {}", e); error!("Failed to create failed document record: {}", e);
} }
} }

View File

@ -75,21 +75,28 @@ pub async fn upload_document(
info!("Uploading document: {} ({} bytes)", filename, data.len()); info!("Uploading document: {} ({} bytes)", filename, data.len());
// Create ingestion service // Create ingestion service
let file_service = FileService::new(state.config.clone()); let file_service = FileService::new(state.config.upload_path.clone());
let ingestion_service = DocumentIngestionService::new( let ingestion_service = DocumentIngestionService::new(
state.db.clone(), state.db.clone(),
file_service, file_service,
state.config.clone(),
); );
match ingestion_service.ingest_document( let request = crate::ingestion::document_ingestion::DocumentIngestionRequest {
data, file_data: data,
&filename, filename: filename.clone(),
&content_type, original_filename: filename,
auth_user.user.id, mime_type: content_type,
"web_upload".to_string(), user_id: auth_user.user.id,
).await { source_type: Some("web_upload".to_string()),
Ok(IngestionResult::Success(document)) => { source_id: None,
deduplication_policy: crate::ingestion::document_ingestion::DeduplicationPolicy::Skip,
original_created_at: None,
original_modified_at: None,
source_metadata: None,
};
match ingestion_service.ingest_document(request).await {
Ok(IngestionResult::Created(document)) => {
info!("Document uploaded successfully: {}", document.id); info!("Document uploaded successfully: {}", document.id);
Ok(Json(DocumentUploadResponse { Ok(Json(DocumentUploadResponse {
document_id: document.id, document_id: document.id,
@ -100,7 +107,7 @@ pub async fn upload_document(
message: "Document uploaded successfully".to_string(), message: "Document uploaded successfully".to_string(),
})) }))
} }
Ok(IngestionResult::Duplicate(existing_doc)) => { Ok(IngestionResult::ExistingDocument(existing_doc)) => {
warn!("Duplicate document upload attempted: {}", existing_doc.id); warn!("Duplicate document upload attempted: {}", existing_doc.id);
Ok(Json(DocumentUploadResponse { Ok(Json(DocumentUploadResponse {
document_id: existing_doc.id, document_id: existing_doc.id,
@ -111,9 +118,13 @@ pub async fn upload_document(
message: "Document already exists".to_string(), message: "Document already exists".to_string(),
})) }))
} }
Ok(IngestionResult::Failed(failed_doc)) => { Ok(IngestionResult::Skipped { existing_document_id, reason }) => {
error!("Document ingestion failed: {}", failed_doc.error_message.as_deref().unwrap_or("Unknown error")); info!("Document upload skipped - {}: {}", reason, existing_document_id);
Err(StatusCode::UNPROCESSABLE_ENTITY) Err(StatusCode::CONFLICT)
}
Ok(IngestionResult::TrackedAsDuplicate { existing_document_id }) => {
info!("Document tracked as duplicate: {}", existing_document_id);
Err(StatusCode::CONFLICT)
} }
Err(e) => { Err(e) => {
error!("Failed to ingest document: {}", e); error!("Failed to ingest document: {}", e);
@ -303,7 +314,7 @@ pub async fn delete_document(
} }
// Delete associated files // Delete associated files
let file_service = FileService::new(state.config.clone()); let file_service = FileService::new(state.config.upload_path.clone());
if let Err(e) = file_service.delete_document_files(&document).await { if let Err(e) = file_service.delete_document_files(&document).await {
warn!("Failed to delete files for document {}: {}", document_id, e); warn!("Failed to delete files for document {}: {}", document_id, e);
// Continue anyway - database deletion succeeded // Continue anyway - database deletion succeeded
@ -346,9 +357,9 @@ pub async fn download_document(
})? })?
.ok_or(StatusCode::NOT_FOUND)?; .ok_or(StatusCode::NOT_FOUND)?;
let file_service = FileService::new(state.config.clone()); let file_service = FileService::new(state.config.upload_path.clone());
let file_data = file_service let file_data = file_service
.read_document_file(&document) .read_file(&document.file_path)
.await .await
.map_err(|e| { .map_err(|e| {
error!("Failed to read document file {}: {}", document_id, e); error!("Failed to read document file {}: {}", document_id, e);
@ -403,9 +414,9 @@ pub async fn view_document(
})? })?
.ok_or(StatusCode::NOT_FOUND)?; .ok_or(StatusCode::NOT_FOUND)?;
let file_service = FileService::new(state.config.clone()); let file_service = FileService::new(state.config.upload_path.clone());
let file_data = file_service let file_data = file_service
.read_document_file(&document) .read_file(&document.file_path)
.await .await
.map_err(|e| { .map_err(|e| {
error!("Failed to read document file {}: {}", document_id, e); error!("Failed to read document file {}: {}", document_id, e);

View File

@ -7,6 +7,7 @@ use axum::{
use std::sync::Arc; use std::sync::Arc;
use tracing::{debug, error, info, warn}; use tracing::{debug, error, info, warn};
use std::collections::HashMap; use std::collections::HashMap;
use sqlx::Row;
use crate::{ use crate::{
auth::AuthUser, auth::AuthUser,

View File

@ -106,7 +106,7 @@ pub async fn retry_ocr(
} }
// Add to OCR queue // Add to OCR queue
match state.ocr_queue.enqueue_document(document.id, auth_user.user.id, 1).await { match state.queue_service.enqueue_document(document.id, auth_user.user.id, 1).await {
Ok(_) => { Ok(_) => {
info!("Document {} queued for OCR retry", document_id); info!("Document {} queued for OCR retry", document_id);
Ok(Json(serde_json::json!({ Ok(Json(serde_json::json!({
@ -187,7 +187,7 @@ pub async fn cancel_ocr(
.ok_or(StatusCode::NOT_FOUND)?; .ok_or(StatusCode::NOT_FOUND)?;
// Try to remove from queue // Try to remove from queue
match state.ocr_queue.remove_from_queue(document_id).await { match state.queue_service.remove_from_queue(document_id).await {
Ok(removed) => { Ok(removed) => {
if removed { if removed {
info!("Document {} removed from OCR queue", document_id); info!("Document {} removed from OCR queue", document_id);

View File

@ -1,14 +1,14 @@
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use utoipa::ToSchema; use utoipa::{ToSchema, IntoParams};
#[derive(Deserialize, ToSchema)] #[derive(Deserialize, ToSchema, IntoParams)]
pub struct PaginationQuery { pub struct PaginationQuery {
pub limit: Option<i64>, pub limit: Option<i64>,
pub offset: Option<i64>, pub offset: Option<i64>,
pub ocr_status: Option<String>, pub ocr_status: Option<String>,
} }
#[derive(Deserialize, ToSchema)] #[derive(Deserialize, ToSchema, IntoParams)]
pub struct FailedDocumentsQuery { pub struct FailedDocumentsQuery {
pub limit: Option<i64>, pub limit: Option<i64>,
pub offset: Option<i64>, pub offset: Option<i64>,

View File

@ -98,12 +98,16 @@ async fn enhanced_search_documents(
// Generate suggestions before moving search_request // Generate suggestions before moving search_request
let suggestions = generate_search_suggestions(&search_request.query); let suggestions = generate_search_suggestions(&search_request.query);
let (documents, total, query_time) = state let start_time = std::time::Instant::now();
let documents = state
.db .db
.enhanced_search_documents_with_role(auth_user.user.id, auth_user.user.role, search_request) .enhanced_search_documents_with_role(auth_user.user.id, auth_user.user.role, search_request)
.await .await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let query_time = start_time.elapsed().as_millis() as u64;
let total = documents.len() as u64;
let response = SearchResponse { let response = SearchResponse {
documents, documents,
total, total,
@ -173,14 +177,8 @@ async fn get_search_facets(
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let response = SearchFacetsResponse { let response = SearchFacetsResponse {
mime_types: mime_type_facets mime_types: mime_type_facets,
.into_iter() tags: tag_facets,
.map(|(value, count)| FacetItem { value, count })
.collect(),
tags: tag_facets
.into_iter()
.map(|(value, count)| FacetItem { value, count })
.collect(),
}; };
Ok(Json(response)) Ok(Json(response))

View File

@ -43,7 +43,7 @@ pub async fn list_sources(
// Get document counts for all sources in one query // Get document counts for all sources in one query
let counts = state let counts = state
.db .db
.count_documents_for_sources(&source_ids) .count_documents_for_sources(auth_user.user.id, &source_ids)
.await .await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
@ -145,14 +145,14 @@ pub async fn get_source(
// Get recent documents for this source // Get recent documents for this source
let recent_documents = state let recent_documents = state
.db .db
.get_recent_documents_for_source(source_id, 10) .get_recent_documents_for_source(auth_user.user.id, source_id, 10)
.await .await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
// Get document counts // Get document counts
let (total_documents, total_documents_ocr) = state let (total_documents, total_documents_ocr) = state
.db .db
.count_documents_for_source(source_id) .count_documents_for_source(auth_user.user.id, source_id)
.await .await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
@ -237,7 +237,7 @@ pub async fn update_source(
// Get document counts // Get document counts
let (total_documents, total_documents_ocr) = state let (total_documents, total_documents_ocr) = state
.db .db
.count_documents_for_source(source_id) .count_documents_for_source(auth_user.user.id, source_id)
.await .await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;

View File

@ -100,7 +100,7 @@ async fn estimate_webdav_crawl_internal(
// Create WebDAV service and estimate crawl // Create WebDAV service and estimate crawl
match crate::services::webdav::WebDAVService::new(webdav_config) { match crate::services::webdav::WebDAVService::new(webdav_config) {
Ok(webdav_service) => { Ok(webdav_service) => {
match webdav_service.estimate_crawl(&config.watch_folders).await { match webdav_service.estimate_crawl().await {
Ok(estimate) => Ok(Json(serde_json::to_value(estimate).unwrap())), Ok(estimate) => Ok(Json(serde_json::to_value(estimate).unwrap())),
Err(e) => Ok(Json(serde_json::json!({ Err(e) => Ok(Json(serde_json::json!({
"error": format!("Crawl estimation failed: {}", e), "error": format!("Crawl estimation failed: {}", e),

View File

@ -271,7 +271,7 @@ pub async fn trigger_deep_scan(
let start_time = chrono::Utc::now(); let start_time = chrono::Utc::now();
// Use guaranteed completeness deep scan method // Use guaranteed completeness deep scan method
match webdav_service.deep_scan_with_guaranteed_completeness(user_id, &state_clone).await { match webdav_service.discover_all_files().await {
Ok(all_discovered_files) => { Ok(all_discovered_files) => {
info!("Deep scan with guaranteed completeness discovered {} files", all_discovered_files.len()); info!("Deep scan with guaranteed completeness discovered {} files", all_discovered_files.len());

View File

@ -57,16 +57,17 @@ pub async fn test_connection(
let config: crate::models::WebDAVSourceConfig = serde_json::from_value(source.config) let config: crate::models::WebDAVSourceConfig = serde_json::from_value(source.config)
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
match crate::services::webdav::test_webdav_connection( let test_config = crate::models::WebDAVTestConnection {
&config.server_url, server_url: config.server_url,
&config.username, username: config.username,
&config.password, password: config.password,
) server_type: config.server_type,
.await };
{
Ok(success) => Ok(Json(serde_json::json!({ match crate::services::webdav::test_webdav_connection(&test_config).await {
"success": success, Ok(result) => Ok(Json(serde_json::json!({
"message": if success { "Connection successful" } else { "Connection failed" } "success": result.success,
"message": result.message
}))), }))),
Err(e) => Ok(Json(serde_json::json!({ Err(e) => Ok(Json(serde_json::json!({
"success": false, "success": false,
@ -152,16 +153,17 @@ pub async fn test_connection_with_config(
let config: crate::models::WebDAVSourceConfig = serde_json::from_value(request.config) let config: crate::models::WebDAVSourceConfig = serde_json::from_value(request.config)
.map_err(|_| StatusCode::BAD_REQUEST)?; .map_err(|_| StatusCode::BAD_REQUEST)?;
match crate::services::webdav::test_webdav_connection( let test_config = crate::models::WebDAVTestConnection {
&config.server_url, server_url: config.server_url,
&config.username, username: config.username,
&config.password, password: config.password,
) server_type: config.server_type,
.await };
{
Ok(success) => Ok(Json(serde_json::json!({ match crate::services::webdav::test_webdav_connection(&test_config).await {
"success": success, Ok(result) => Ok(Json(serde_json::json!({
"message": if success { "WebDAV connection successful" } else { "WebDAV connection failed" } "success": result.success,
"message": result.message
}))), }))),
Err(e) => Ok(Json(serde_json::json!({ Err(e) => Ok(Json(serde_json::json!({
"success": false, "success": false,

View File

@ -106,7 +106,7 @@ async fn test_webdav_connection(
// Create WebDAV service and test connection // Create WebDAV service and test connection
match WebDAVService::new(webdav_config) { match WebDAVService::new(webdav_config) {
Ok(webdav_service) => { Ok(webdav_service) => {
match webdav_service.test_connection(test_config).await { match WebDAVService::test_connection_with_config(test_config).await {
Ok(result) => { Ok(result) => {
info!("WebDAV connection test completed: {}", result.message); info!("WebDAV connection test completed: {}", result.message);
Ok(Json(result)) Ok(Json(result))
@ -182,7 +182,7 @@ async fn estimate_webdav_crawl(
// Create WebDAV service and estimate crawl // Create WebDAV service and estimate crawl
match WebDAVService::new(webdav_config) { match WebDAVService::new(webdav_config) {
Ok(webdav_service) => { Ok(webdav_service) => {
match webdav_service.estimate_crawl(&folders).await { match webdav_service.estimate_crawl().await {
Ok(estimate) => { Ok(estimate) => {
info!("Crawl estimation completed: {} total files, {} supported files", info!("Crawl estimation completed: {} total files, {} supported files",
estimate.total_files, estimate.total_supported_files); estimate.total_files, estimate.total_supported_files);

View File

@ -115,7 +115,7 @@ async fn perform_sync_internal(
} }
// Discover files in the folder // Discover files in the folder
match webdav_service.discover_files_in_folder(folder_path).await { match webdav_service.discover_files_in_directory(folder_path, true).await {
Ok(files) => { Ok(files) => {
info!("Found {} files in folder {}", files.len(), folder_path); info!("Found {} files in folder {}", files.len(), folder_path);

View File

@ -678,7 +678,7 @@ impl SourceScheduler {
let source_clone = source.clone(); let source_clone = source.clone();
let state_clone = state.clone(); let state_clone = state.clone();
tokio::spawn(async move { tokio::spawn(async move {
match webdav_service.deep_scan_with_guaranteed_completeness(source_clone.user_id, &state_clone).await { match webdav_service.discover_all_files().await {
Ok(files) => { Ok(files) => {
info!("🎉 Automatic deep scan completed for {}: {} files found", source_clone.name, files.len()); info!("🎉 Automatic deep scan completed for {}: {} files found", source_clone.name, files.len());
@ -970,7 +970,7 @@ impl SourceScheduler {
server_type: config.server_type, server_type: config.server_type,
}; };
webdav_service.test_connection(test_config).await crate::services::webdav::WebDAVService::test_connection_with_config(test_config).await
.map_err(|e| format!("Connection test failed: {}", e))?; .map_err(|e| format!("Connection test failed: {}", e))?;
Ok(()) Ok(())

View File

@ -126,8 +126,8 @@ impl SourceSyncService {
let service = webdav_service.clone(); let service = webdav_service.clone();
let state_clone = self.state.clone(); let state_clone = self.state.clone();
async move { async move {
info!("🚀 Using optimized WebDAV discovery for: {}", folder_path); info!("🚀 Using WebDAV discovery for: {}", folder_path);
let result = service.discover_files_in_folder_optimized(&folder_path, source.user_id, &state_clone).await; let result = service.discover_files_in_directory(&folder_path, true).await;
match &result { match &result {
Ok(files) => { Ok(files) => {
if files.is_empty() { if files.is_empty() {

View File

@ -7,6 +7,7 @@ use tracing::{debug, error, info, warn};
use crate::models::{WebDAVConnectionResult, WebDAVTestConnection}; use crate::models::{WebDAVConnectionResult, WebDAVTestConnection};
use super::config::{WebDAVConfig, RetryConfig}; use super::config::{WebDAVConfig, RetryConfig};
#[derive(Clone)]
pub struct WebDAVConnection { pub struct WebDAVConnection {
client: Client, client: Client,
config: WebDAVConfig, config: WebDAVConfig,
@ -186,8 +187,7 @@ impl WebDAVConnection {
</D:propfind>"#; </D:propfind>"#;
let response = self.client let response = self.client
.request(Method::from_bytes(b"PROPFIND")?) .request(Method::from_bytes(b"PROPFIND")?, &url)
.url(&url)
.basic_auth(&self.config.username, Some(&self.config.password)) .basic_auth(&self.config.username, Some(&self.config.password))
.header("Depth", "1") .header("Depth", "1")
.header("Content-Type", "application/xml") .header("Content-Type", "application/xml")

View File

@ -154,25 +154,27 @@ impl WebDAVDiscovery {
.await?; .await?;
let body = response.text().await?; let body = response.text().await?;
let (files, directories) = parse_propfind_response_with_directories(&body)?; let all_items = parse_propfind_response_with_directories(&body)?;
// Filter files by supported extensions // Separate files and directories
let filtered_files: Vec<FileInfo> = files let mut filtered_files = Vec::new();
.into_iter() let mut subdirectory_paths = Vec::new();
.filter(|file| self.config.is_supported_extension(&file.name))
.collect();
// Convert directory paths to full paths for item in all_items {
let full_dir_paths: Vec<String> = directories if item.is_directory {
.into_iter() // Convert directory path to full path
.map(|dir| { let full_path = if directory_path == "/" {
if directory_path == "/" { format!("/{}", item.path.trim_start_matches('/'))
format!("/{}", dir.trim_start_matches('/'))
} else { } else {
format!("{}/{}", directory_path.trim_end_matches('/'), dir.trim_start_matches('/')) format!("{}/{}", directory_path.trim_end_matches('/'), item.path.trim_start_matches('/'))
};
subdirectory_paths.push(full_path);
} else if self.config.is_supported_extension(&item.name) {
filtered_files.push(item);
} }
}) }
.collect();
let full_dir_paths = subdirectory_paths;
debug!("Directory '{}': {} files, {} subdirectories", debug!("Directory '{}': {} files, {} subdirectories",
directory_path, filtered_files.len(), full_dir_paths.len()); directory_path, filtered_files.len(), full_dir_paths.len());
@ -294,9 +296,16 @@ impl WebDAVDiscovery {
.await?; .await?;
let body = response.text().await?; let body = response.text().await?;
let (_, directories) = parse_propfind_response_with_directories(&body)?; let all_items = parse_propfind_response_with_directories(&body)?;
Ok(directories) // Filter out only directories and extract their paths
let directory_paths: Vec<String> = all_items
.into_iter()
.filter(|item| item.is_directory)
.map(|item| item.path)
.collect();
Ok(directory_paths)
} }
/// Calculates the ratio of supported files in a sample /// Calculates the ratio of supported files in a sample

View File

@ -150,8 +150,39 @@ impl WebDAVService {
self.discovery.discover_files(directory_path, recursive).await self.discovery.discover_files(directory_path, recursive).await
} }
/// Downloads a file from WebDAV server /// Downloads a file from WebDAV server by path
pub async fn download_file(&self, file_info: &FileInfo) -> Result<Vec<u8>> { pub async fn download_file(&self, file_path: &str) -> Result<Vec<u8>> {
let _permit = self.download_semaphore.acquire().await?;
debug!("⬇️ Downloading file: {}", file_path);
let url = self.connection.get_url_for_path(file_path);
let response = self.connection
.authenticated_request(
reqwest::Method::GET,
&url,
None,
None,
)
.await?;
if !response.status().is_success() {
return Err(anyhow!(
"Failed to download file '{}': HTTP {}",
file_path,
response.status()
));
}
let content = response.bytes().await?;
debug!("✅ Downloaded {} bytes for file: {}", content.len(), file_path);
Ok(content.to_vec())
}
/// Downloads a file from WebDAV server using FileInfo
pub async fn download_file_info(&self, file_info: &FileInfo) -> Result<Vec<u8>> {
let _permit = self.download_semaphore.acquire().await?; let _permit = self.download_semaphore.acquire().await?;
debug!("⬇️ Downloading file: {}", file_info.path); debug!("⬇️ Downloading file: {}", file_info.path);
@ -190,7 +221,7 @@ impl WebDAVService {
let service_clone = self.clone(); let service_clone = self.clone();
async move { async move {
let result = service_clone.download_file(&file_clone).await; let result = service_clone.download_file_info(&file_clone).await;
(file_clone, result) (file_clone, result)
} }
}); });
@ -285,7 +316,7 @@ impl WebDAVService {
.map(|s| s.to_string()); .map(|s| s.to_string());
Ok(ServerCapabilities { Ok(ServerCapabilities {
dav_compliance: dav_header, dav_compliance: dav_header.clone(),
allowed_methods: allow_header, allowed_methods: allow_header,
server_software: server_header, server_software: server_header,
supports_etag: dav_header.contains("1") || dav_header.contains("2"), supports_etag: dav_header.contains("1") || dav_header.contains("2"),

View File

@ -24,7 +24,7 @@ pub struct ValidationIssue {
pub detected_at: chrono::DateTime<chrono::Utc>, pub detected_at: chrono::DateTime<chrono::Utc>,
} }
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize, Eq, Hash, PartialEq)]
pub enum ValidationIssueType { pub enum ValidationIssueType {
/// Directory exists on server but not in our tracking /// Directory exists on server but not in our tracking
Untracked, Untracked,