From 497b34ce0a403e1247755c8ed520bcb903dffee3 Mon Sep 17 00:00:00 2001
From: perf3ct <jonfuller2012@gmail.com>
Date: Fri, 4 Jul 2025 00:53:32 +0000
Subject: [PATCH] fix(server): resolve type issues and functions for
 compilation issues

---
 src/ingestion/document_ingestion.rs | 101 ++++++++++++++++------------
 src/ocr/queue.rs                    |  44 ++++++------
 src/routes/documents/crud.rs        |  49 ++++++++------
 src/routes/documents/failed.rs      |   1 +
 src/routes/documents/ocr.rs         |   4 +-
 src/routes/documents/types.rs       |   6 +-
 src/routes/search.rs                |  16 ++---
 src/routes/sources/crud.rs          |   8 +--
 src/routes/sources/estimation.rs    |   2 +-
 src/routes/sources/sync.rs          |   2 +-
 src/routes/sources/validation.rs    |  42 ++++++------
 src/routes/webdav.rs                |   4 +-
 src/routes/webdav/webdav_sync.rs    |   2 +-
 src/scheduling/source_scheduler.rs  |   4 +-
 src/scheduling/source_sync.rs       |   4 +-
 src/services/webdav/connection.rs   |   4 +-
 src/services/webdav/discovery.rs    |  47 +++++++------
 src/services/webdav/service.rs      |  39 +++++++++--
 src/services/webdav/validation.rs   |   2 +-
 19 files changed, 226 insertions(+), 155 deletions(-)

diff --git a/src/ingestion/document_ingestion.rs b/src/ingestion/document_ingestion.rs
index 7c7d96c..1b30e92 100644
--- a/src/ingestion/document_ingestion.rs
+++ b/src/ingestion/document_ingestion.rs
@@ -10,6 +10,7 @@ use uuid::Uuid;
 use sha2::{Digest, Sha256};
 use tracing::{debug, info, warn};
 use serde_json;
+use chrono::Utc;
 
 use crate::models::{Document, FileInfo};
 use crate::db::Database;
@@ -164,28 +165,34 @@ impl DocumentIngestionService {
                     warn!("Failed to save file {}: {}", request.filename, e);
                     
                     // Create failed document record for storage failure
-                    if let Err(failed_err) = self.db.create_failed_document(
-                        request.user_id,
-                        request.filename.clone(),
-                        Some(request.original_filename.clone()),
-                        None, // original_path
-                        None, // file_path (couldn't save)
-                        Some(file_size),
-                        Some(file_hash.clone()),
-                        Some(request.mime_type.clone()),
-                        None, // content
-                        Vec::new(), // tags
-                        None, // ocr_text
-                        None, // ocr_confidence
-                        None, // ocr_word_count
-                        None, // ocr_processing_time_ms
-                        "storage_error".to_string(),
-                        "storage".to_string(),
-                        None, // existing_document_id
-                        request.source_type.unwrap_or_else(|| "upload".to_string()),
-                        Some(e.to_string()),
-                        None, // retry_count
-                    ).await {
+                    let failed_document = crate::models::FailedDocument {
+                        id: Uuid::new_v4(),
+                        user_id: request.user_id,
+                        filename: request.filename.clone(),
+                        original_filename: Some(request.original_filename.clone()),
+                        original_path: None,
+                        file_path: None, // couldn't save
+                        file_size: Some(file_size),
+                        file_hash: Some(file_hash.clone()),
+                        mime_type: Some(request.mime_type.clone()),
+                        content: None,
+                        tags: Vec::new(),
+                        ocr_text: None,
+                        ocr_confidence: None,
+                        ocr_word_count: None,
+                        ocr_processing_time_ms: None,
+                        failure_reason: "storage_error".to_string(),
+                        failure_stage: "storage".to_string(),
+                        existing_document_id: None,
+                        ingestion_source: request.source_type.unwrap_or_else(|| "upload".to_string()),
+                        error_message: Some(e.to_string()),
+                        retry_count: Some(0),
+                        last_retry_at: None,
+                        created_at: Utc::now(),
+                        updated_at: Utc::now(),
+                    };
+                    
+                    if let Err(failed_err) = self.db.create_failed_document(failed_document).await {
                         warn!("Failed to create failed document record for storage error: {}", failed_err);
                     }
                     
@@ -238,28 +245,34 @@ impl DocumentIngestionService {
                           request.filename, &file_hash[..8], e);
                     
                     // Create failed document record for database creation failure
-                    if let Err(failed_err) = self.db.create_failed_document(
-                        request.user_id,
-                        request.filename.clone(),
-                        Some(request.original_filename.clone()),
-                        None, // original_path
-                        Some(file_path.clone()), // file was saved successfully
-                        Some(file_size),
-                        Some(file_hash.clone()),
-                        Some(request.mime_type.clone()),
-                        None, // content
-                        Vec::new(), // tags
-                        None, // ocr_text
-                        None, // ocr_confidence
-                        None, // ocr_word_count
-                        None, // ocr_processing_time_ms
-                        "database_error".to_string(),
-                        "ingestion".to_string(),
-                        None, // existing_document_id
-                        request.source_type.unwrap_or_else(|| "upload".to_string()),
-                        Some(e.to_string()),
-                        None, // retry_count
-                    ).await {
+                    let failed_document = crate::models::FailedDocument {
+                        id: Uuid::new_v4(),
+                        user_id: request.user_id,
+                        filename: request.filename.clone(),
+                        original_filename: Some(request.original_filename.clone()),
+                        original_path: None,
+                        file_path: Some(file_path.clone()), // file was saved successfully
+                        file_size: Some(file_size),
+                        file_hash: Some(file_hash.clone()),
+                        mime_type: Some(request.mime_type.clone()),
+                        content: None,
+                        tags: Vec::new(),
+                        ocr_text: None,
+                        ocr_confidence: None,
+                        ocr_word_count: None,
+                        ocr_processing_time_ms: None,
+                        failure_reason: "database_error".to_string(),
+                        failure_stage: "ingestion".to_string(),
+                        existing_document_id: None,
+                        ingestion_source: request.source_type.unwrap_or_else(|| "upload".to_string()),
+                        error_message: Some(e.to_string()),
+                        retry_count: Some(0),
+                        last_retry_at: None,
+                        created_at: Utc::now(),
+                        updated_at: Utc::now(),
+                    };
+                    
+                    if let Err(failed_err) = self.db.create_failed_document(failed_document).await {
                         warn!("Failed to create failed document record for database error: {}", failed_err);
                     }
                     
diff --git a/src/ocr/queue.rs b/src/ocr/queue.rs
index 2d32e86..00b7101 100644
--- a/src/ocr/queue.rs
+++ b/src/ocr/queue.rs
@@ -837,28 +837,34 @@ impl OcrQueueService {
             let file_hash: Option<String> = row.get("file_hash");
             
             // Create failed document record directly
-            if let Err(e) = self.db.create_failed_document(
+            let failed_document = crate::models::FailedDocument {
+                id: Uuid::new_v4(),
                 user_id,
                 filename,
-                Some(original_filename),
-                None, // original_path
-                Some(file_path),
-                Some(file_size),
+                original_filename: Some(original_filename),
+                original_path: None,
+                file_path: Some(file_path),
+                file_size: Some(file_size),
                 file_hash,
-                Some(mime_type),
-                None, // content
-                Vec::new(), // tags
-                None, // ocr_text
-                None, // ocr_confidence
-                None, // ocr_word_count
-                None, // ocr_processing_time_ms
-                failure_reason.to_string(),
-                "ocr".to_string(),
-                None, // existing_document_id
-                "ocr_queue".to_string(),
-                Some(error_message.to_string()),
-                Some(retry_count),
-            ).await {
+                mime_type: Some(mime_type),
+                content: None,
+                tags: Vec::new(),
+                ocr_text: None,
+                ocr_confidence: None,
+                ocr_word_count: None,
+                ocr_processing_time_ms: None,
+                failure_reason: failure_reason.to_string(),
+                failure_stage: "ocr".to_string(),
+                existing_document_id: None,
+                ingestion_source: "ocr_queue".to_string(),
+                error_message: Some(error_message.to_string()),
+                retry_count: Some(retry_count),
+                last_retry_at: None,
+                created_at: Utc::now(),
+                updated_at: Utc::now(),
+            };
+            
+            if let Err(e) = self.db.create_failed_document(failed_document).await {
                 error!("Failed to create failed document record: {}", e);
             }
         }
diff --git a/src/routes/documents/crud.rs b/src/routes/documents/crud.rs
index 051a9ac..1bbcb57 100644
--- a/src/routes/documents/crud.rs
+++ b/src/routes/documents/crud.rs
@@ -75,21 +75,28 @@ pub async fn upload_document(
     info!("Uploading document: {} ({} bytes)", filename, data.len());
     
     // Create ingestion service
-    let file_service = FileService::new(state.config.clone());
+    let file_service = FileService::new(state.config.upload_path.clone());
     let ingestion_service = DocumentIngestionService::new(
         state.db.clone(),
         file_service,
-        state.config.clone(),
     );
     
-    match ingestion_service.ingest_document(
-        data,
-        &filename,
-        &content_type,
-        auth_user.user.id,
-        "web_upload".to_string(),
-    ).await {
-        Ok(IngestionResult::Success(document)) => {
+    let request = crate::ingestion::document_ingestion::DocumentIngestionRequest {
+        file_data: data,
+        filename: filename.clone(),
+        original_filename: filename,
+        mime_type: content_type,
+        user_id: auth_user.user.id,
+        source_type: Some("web_upload".to_string()),
+        source_id: None,
+        deduplication_policy: crate::ingestion::document_ingestion::DeduplicationPolicy::Skip,
+        original_created_at: None,
+        original_modified_at: None,
+        source_metadata: None,
+    };
+    
+    match ingestion_service.ingest_document(request).await {
+        Ok(IngestionResult::Created(document)) => {
             info!("Document uploaded successfully: {}", document.id);
             Ok(Json(DocumentUploadResponse {
                 document_id: document.id,
@@ -100,7 +107,7 @@ pub async fn upload_document(
                 message: "Document uploaded successfully".to_string(),
             }))
         }
-        Ok(IngestionResult::Duplicate(existing_doc)) => {
+        Ok(IngestionResult::ExistingDocument(existing_doc)) => {
             warn!("Duplicate document upload attempted: {}", existing_doc.id);
             Ok(Json(DocumentUploadResponse {
                 document_id: existing_doc.id,
@@ -111,9 +118,13 @@ pub async fn upload_document(
                 message: "Document already exists".to_string(),
             }))
         }
-        Ok(IngestionResult::Failed(failed_doc)) => {
-            error!("Document ingestion failed: {}", failed_doc.error_message.as_deref().unwrap_or("Unknown error"));
-            Err(StatusCode::UNPROCESSABLE_ENTITY)
+        Ok(IngestionResult::Skipped { existing_document_id, reason }) => {
+            info!("Document upload skipped - {}: {}", reason, existing_document_id);
+            Err(StatusCode::CONFLICT)
+        }
+        Ok(IngestionResult::TrackedAsDuplicate { existing_document_id }) => {
+            info!("Document tracked as duplicate: {}", existing_document_id);
+            Err(StatusCode::CONFLICT)
         }
         Err(e) => {
             error!("Failed to ingest document: {}", e);
@@ -303,7 +314,7 @@ pub async fn delete_document(
     }
 
     // Delete associated files
-    let file_service = FileService::new(state.config.clone());
+    let file_service = FileService::new(state.config.upload_path.clone());
     if let Err(e) = file_service.delete_document_files(&document).await {
         warn!("Failed to delete files for document {}: {}", document_id, e);
         // Continue anyway - database deletion succeeded
@@ -346,9 +357,9 @@ pub async fn download_document(
         })?
         .ok_or(StatusCode::NOT_FOUND)?;
 
-    let file_service = FileService::new(state.config.clone());
+    let file_service = FileService::new(state.config.upload_path.clone());
     let file_data = file_service
-        .read_document_file(&document)
+        .read_file(&document.file_path)
         .await
         .map_err(|e| {
             error!("Failed to read document file {}: {}", document_id, e);
@@ -403,9 +414,9 @@ pub async fn view_document(
         })?
         .ok_or(StatusCode::NOT_FOUND)?;
 
-    let file_service = FileService::new(state.config.clone());
+    let file_service = FileService::new(state.config.upload_path.clone());
     let file_data = file_service
-        .read_document_file(&document)
+        .read_file(&document.file_path)
         .await
         .map_err(|e| {
             error!("Failed to read document file {}: {}", document_id, e);
diff --git a/src/routes/documents/failed.rs b/src/routes/documents/failed.rs
index aaa698b..bd64b45 100644
--- a/src/routes/documents/failed.rs
+++ b/src/routes/documents/failed.rs
@@ -7,6 +7,7 @@ use axum::{
 use std::sync::Arc;
 use tracing::{debug, error, info, warn};
 use std::collections::HashMap;
+use sqlx::Row;
 
 use crate::{
     auth::AuthUser,
diff --git a/src/routes/documents/ocr.rs b/src/routes/documents/ocr.rs
index a250a56..fb4b697 100644
--- a/src/routes/documents/ocr.rs
+++ b/src/routes/documents/ocr.rs
@@ -106,7 +106,7 @@ pub async fn retry_ocr(
     }
 
     // Add to OCR queue
-    match state.ocr_queue.enqueue_document(document.id, auth_user.user.id, 1).await {
+    match state.queue_service.enqueue_document(document.id, auth_user.user.id, 1).await {
         Ok(_) => {
             info!("Document {} queued for OCR retry", document_id);
             Ok(Json(serde_json::json!({
@@ -187,7 +187,7 @@ pub async fn cancel_ocr(
         .ok_or(StatusCode::NOT_FOUND)?;
 
     // Try to remove from queue
-    match state.ocr_queue.remove_from_queue(document_id).await {
+    match state.queue_service.remove_from_queue(document_id).await {
         Ok(removed) => {
             if removed {
                 info!("Document {} removed from OCR queue", document_id);
diff --git a/src/routes/documents/types.rs b/src/routes/documents/types.rs
index 73377c6..17639b6 100644
--- a/src/routes/documents/types.rs
+++ b/src/routes/documents/types.rs
@@ -1,14 +1,14 @@
 use serde::{Deserialize, Serialize};
-use utoipa::ToSchema;
+use utoipa::{ToSchema, IntoParams};
 
-#[derive(Deserialize, ToSchema)]
+#[derive(Deserialize, ToSchema, IntoParams)]
 pub struct PaginationQuery {
     pub limit: Option<i64>,
     pub offset: Option<i64>,
     pub ocr_status: Option<String>,
 }
 
-#[derive(Deserialize, ToSchema)]
+#[derive(Deserialize, ToSchema, IntoParams)]
 pub struct FailedDocumentsQuery {
     pub limit: Option<i64>,
     pub offset: Option<i64>,
diff --git a/src/routes/search.rs b/src/routes/search.rs
index 5d43534..9b85e40 100644
--- a/src/routes/search.rs
+++ b/src/routes/search.rs
@@ -98,11 +98,15 @@ async fn enhanced_search_documents(
     // Generate suggestions before moving search_request
     let suggestions = generate_search_suggestions(&search_request.query);
     
-    let (documents, total, query_time) = state
+    let start_time = std::time::Instant::now();
+    let documents = state
         .db
         .enhanced_search_documents_with_role(auth_user.user.id, auth_user.user.role, search_request)
         .await
         .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
+    
+    let query_time = start_time.elapsed().as_millis() as u64;
+    let total = documents.len() as u64;
 
     let response = SearchResponse {
         documents,
@@ -173,14 +177,8 @@ async fn get_search_facets(
         .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
 
     let response = SearchFacetsResponse {
-        mime_types: mime_type_facets
-            .into_iter()
-            .map(|(value, count)| FacetItem { value, count })
-            .collect(),
-        tags: tag_facets
-            .into_iter()
-            .map(|(value, count)| FacetItem { value, count })
-            .collect(),
+        mime_types: mime_type_facets,
+        tags: tag_facets,
     };
 
     Ok(Json(response))
diff --git a/src/routes/sources/crud.rs b/src/routes/sources/crud.rs
index cb5421f..ae070f7 100644
--- a/src/routes/sources/crud.rs
+++ b/src/routes/sources/crud.rs
@@ -43,7 +43,7 @@ pub async fn list_sources(
     // Get document counts for all sources in one query
     let counts = state
         .db
-        .count_documents_for_sources(&source_ids)
+        .count_documents_for_sources(auth_user.user.id, &source_ids)
         .await
         .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
     
@@ -145,14 +145,14 @@ pub async fn get_source(
     // Get recent documents for this source
     let recent_documents = state
         .db
-        .get_recent_documents_for_source(source_id, 10)
+        .get_recent_documents_for_source(auth_user.user.id, source_id, 10)
         .await
         .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
 
     // Get document counts
     let (total_documents, total_documents_ocr) = state
         .db
-        .count_documents_for_source(source_id)
+        .count_documents_for_source(auth_user.user.id, source_id)
         .await
         .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
 
@@ -237,7 +237,7 @@ pub async fn update_source(
     // Get document counts
     let (total_documents, total_documents_ocr) = state
         .db
-        .count_documents_for_source(source_id)
+        .count_documents_for_source(auth_user.user.id, source_id)
         .await
         .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
 
diff --git a/src/routes/sources/estimation.rs b/src/routes/sources/estimation.rs
index ecb630d..1c5279b 100644
--- a/src/routes/sources/estimation.rs
+++ b/src/routes/sources/estimation.rs
@@ -100,7 +100,7 @@ async fn estimate_webdav_crawl_internal(
     // Create WebDAV service and estimate crawl
     match crate::services::webdav::WebDAVService::new(webdav_config) {
         Ok(webdav_service) => {
-            match webdav_service.estimate_crawl(&config.watch_folders).await {
+            match webdav_service.estimate_crawl().await {
                 Ok(estimate) => Ok(Json(serde_json::to_value(estimate).unwrap())),
                 Err(e) => Ok(Json(serde_json::json!({
                     "error": format!("Crawl estimation failed: {}", e),
diff --git a/src/routes/sources/sync.rs b/src/routes/sources/sync.rs
index a99d367..c7ff818 100644
--- a/src/routes/sources/sync.rs
+++ b/src/routes/sources/sync.rs
@@ -271,7 +271,7 @@ pub async fn trigger_deep_scan(
                 let start_time = chrono::Utc::now();
                 
                 // Use guaranteed completeness deep scan method
-                match webdav_service.deep_scan_with_guaranteed_completeness(user_id, &state_clone).await {
+                match webdav_service.discover_all_files().await {
                     Ok(all_discovered_files) => {
                         info!("Deep scan with guaranteed completeness discovered {} files", all_discovered_files.len());
                         
diff --git a/src/routes/sources/validation.rs b/src/routes/sources/validation.rs
index c047c2b..3b19719 100644
--- a/src/routes/sources/validation.rs
+++ b/src/routes/sources/validation.rs
@@ -57,16 +57,17 @@ pub async fn test_connection(
             let config: crate::models::WebDAVSourceConfig = serde_json::from_value(source.config)
                 .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
 
-            match crate::services::webdav::test_webdav_connection(
-                &config.server_url,
-                &config.username,
-                &config.password,
-            )
-            .await
-            {
-                Ok(success) => Ok(Json(serde_json::json!({
-                    "success": success,
-                    "message": if success { "Connection successful" } else { "Connection failed" }
+            let test_config = crate::models::WebDAVTestConnection {
+                server_url: config.server_url,
+                username: config.username,
+                password: config.password,
+                server_type: config.server_type,
+            };
+            
+            match crate::services::webdav::test_webdav_connection(&test_config).await {
+                Ok(result) => Ok(Json(serde_json::json!({
+                    "success": result.success,
+                    "message": result.message
                 }))),
                 Err(e) => Ok(Json(serde_json::json!({
                     "success": false,
@@ -152,16 +153,17 @@ pub async fn test_connection_with_config(
             let config: crate::models::WebDAVSourceConfig = serde_json::from_value(request.config)
                 .map_err(|_| StatusCode::BAD_REQUEST)?;
 
-            match crate::services::webdav::test_webdav_connection(
-                &config.server_url,
-                &config.username,
-                &config.password,
-            )
-            .await
-            {
-                Ok(success) => Ok(Json(serde_json::json!({
-                    "success": success,
-                    "message": if success { "WebDAV connection successful" } else { "WebDAV connection failed" }
+            let test_config = crate::models::WebDAVTestConnection {
+                server_url: config.server_url,
+                username: config.username,
+                password: config.password,
+                server_type: config.server_type,
+            };
+            
+            match crate::services::webdav::test_webdav_connection(&test_config).await {
+                Ok(result) => Ok(Json(serde_json::json!({
+                    "success": result.success,
+                    "message": result.message
                 }))),
                 Err(e) => Ok(Json(serde_json::json!({
                     "success": false,
diff --git a/src/routes/webdav.rs b/src/routes/webdav.rs
index eebcaa7..524647d 100644
--- a/src/routes/webdav.rs
+++ b/src/routes/webdav.rs
@@ -106,7 +106,7 @@ async fn test_webdav_connection(
     // Create WebDAV service and test connection
     match WebDAVService::new(webdav_config) {
         Ok(webdav_service) => {
-            match webdav_service.test_connection(test_config).await {
+            match WebDAVService::test_connection_with_config(test_config).await {
                 Ok(result) => {
                     info!("WebDAV connection test completed: {}", result.message);
                     Ok(Json(result))
@@ -182,7 +182,7 @@ async fn estimate_webdav_crawl(
     // Create WebDAV service and estimate crawl
     match WebDAVService::new(webdav_config) {
         Ok(webdav_service) => {
-            match webdav_service.estimate_crawl(&folders).await {
+            match webdav_service.estimate_crawl().await {
                 Ok(estimate) => {
                     info!("Crawl estimation completed: {} total files, {} supported files", 
                         estimate.total_files, estimate.total_supported_files);
diff --git a/src/routes/webdav/webdav_sync.rs b/src/routes/webdav/webdav_sync.rs
index 9dc2656..4c2e94c 100644
--- a/src/routes/webdav/webdav_sync.rs
+++ b/src/routes/webdav/webdav_sync.rs
@@ -115,7 +115,7 @@ async fn perform_sync_internal(
         }
         
         // Discover files in the folder
-        match webdav_service.discover_files_in_folder(folder_path).await {
+        match webdav_service.discover_files_in_directory(folder_path, true).await {
             Ok(files) => {
                 info!("Found {} files in folder {}", files.len(), folder_path);
                 
diff --git a/src/scheduling/source_scheduler.rs b/src/scheduling/source_scheduler.rs
index ca74ec2..96f2733 100644
--- a/src/scheduling/source_scheduler.rs
+++ b/src/scheduling/source_scheduler.rs
@@ -678,7 +678,7 @@ impl SourceScheduler {
             let source_clone = source.clone();
             let state_clone = state.clone();
             tokio::spawn(async move {
-                match webdav_service.deep_scan_with_guaranteed_completeness(source_clone.user_id, &state_clone).await {
+                match webdav_service.discover_all_files().await {
                     Ok(files) => {
                         info!("🎉 Automatic deep scan completed for {}: {} files found", source_clone.name, files.len());
                         
@@ -970,7 +970,7 @@ impl SourceScheduler {
             server_type: config.server_type,
         };
         
-        webdav_service.test_connection(test_config).await
+        crate::services::webdav::WebDAVService::test_connection_with_config(test_config).await
             .map_err(|e| format!("Connection test failed: {}", e))?;
 
         Ok(())
diff --git a/src/scheduling/source_sync.rs b/src/scheduling/source_sync.rs
index de56f95..3336fc2 100644
--- a/src/scheduling/source_sync.rs
+++ b/src/scheduling/source_sync.rs
@@ -126,8 +126,8 @@ impl SourceSyncService {
                 let service = webdav_service.clone();
                 let state_clone = self.state.clone();
                 async move { 
-                    info!("🚀 Using optimized WebDAV discovery for: {}", folder_path);
-                    let result = service.discover_files_in_folder_optimized(&folder_path, source.user_id, &state_clone).await;
+                    info!("🚀 Using WebDAV discovery for: {}", folder_path);
+                    let result = service.discover_files_in_directory(&folder_path, true).await;
                     match &result {
                         Ok(files) => {
                             if files.is_empty() {
diff --git a/src/services/webdav/connection.rs b/src/services/webdav/connection.rs
index b8f59c8..bda8f85 100644
--- a/src/services/webdav/connection.rs
+++ b/src/services/webdav/connection.rs
@@ -7,6 +7,7 @@ use tracing::{debug, error, info, warn};
 use crate::models::{WebDAVConnectionResult, WebDAVTestConnection};
 use super::config::{WebDAVConfig, RetryConfig};
 
+#[derive(Clone)]
 pub struct WebDAVConnection {
     client: Client,
     config: WebDAVConfig,
@@ -186,8 +187,7 @@ impl WebDAVConnection {
             </D:propfind>"#;
 
         let response = self.client
-            .request(Method::from_bytes(b"PROPFIND")?)
-            .url(&url)
+            .request(Method::from_bytes(b"PROPFIND")?, &url)
             .basic_auth(&self.config.username, Some(&self.config.password))
             .header("Depth", "1")
             .header("Content-Type", "application/xml")
diff --git a/src/services/webdav/discovery.rs b/src/services/webdav/discovery.rs
index b687e03..b3558a0 100644
--- a/src/services/webdav/discovery.rs
+++ b/src/services/webdav/discovery.rs
@@ -154,25 +154,27 @@ impl WebDAVDiscovery {
             .await?;
 
         let body = response.text().await?;
-        let (files, directories) = parse_propfind_response_with_directories(&body)?;
+        let all_items = parse_propfind_response_with_directories(&body)?;
         
-        // Filter files by supported extensions
-        let filtered_files: Vec<FileInfo> = files
-            .into_iter()
-            .filter(|file| self.config.is_supported_extension(&file.name))
-            .collect();
-
-        // Convert directory paths to full paths
-        let full_dir_paths: Vec<String> = directories
-            .into_iter()
-            .map(|dir| {
-                if directory_path == "/" {
-                    format!("/{}", dir.trim_start_matches('/'))
+        // Separate files and directories
+        let mut filtered_files = Vec::new();
+        let mut subdirectory_paths = Vec::new();
+        
+        for item in all_items {
+            if item.is_directory {
+                // Convert directory path to full path
+                let full_path = if directory_path == "/" {
+                    format!("/{}", item.path.trim_start_matches('/'))
                 } else {
-                    format!("{}/{}", directory_path.trim_end_matches('/'), dir.trim_start_matches('/'))
-                }
-            })
-            .collect();
+                    format!("{}/{}", directory_path.trim_end_matches('/'), item.path.trim_start_matches('/'))
+                };
+                subdirectory_paths.push(full_path);
+            } else if self.config.is_supported_extension(&item.name) {
+                filtered_files.push(item);
+            }
+        }
+        
+        let full_dir_paths = subdirectory_paths;
 
         debug!("Directory '{}': {} files, {} subdirectories", 
             directory_path, filtered_files.len(), full_dir_paths.len());
@@ -294,9 +296,16 @@ impl WebDAVDiscovery {
             .await?;
 
         let body = response.text().await?;
-        let (_, directories) = parse_propfind_response_with_directories(&body)?;
+        let all_items = parse_propfind_response_with_directories(&body)?;
         
-        Ok(directories)
+        // Filter out only directories and extract their paths
+        let directory_paths: Vec<String> = all_items
+            .into_iter()
+            .filter(|item| item.is_directory)
+            .map(|item| item.path)
+            .collect();
+        
+        Ok(directory_paths)
     }
 
     /// Calculates the ratio of supported files in a sample
diff --git a/src/services/webdav/service.rs b/src/services/webdav/service.rs
index 3205798..3a48758 100644
--- a/src/services/webdav/service.rs
+++ b/src/services/webdav/service.rs
@@ -150,8 +150,39 @@ impl WebDAVService {
         self.discovery.discover_files(directory_path, recursive).await
     }
 
-    /// Downloads a file from WebDAV server
-    pub async fn download_file(&self, file_info: &FileInfo) -> Result<Vec<u8>> {
+    /// Downloads a file from WebDAV server by path
+    pub async fn download_file(&self, file_path: &str) -> Result<Vec<u8>> {
+        let _permit = self.download_semaphore.acquire().await?;
+        
+        debug!("⬇️ Downloading file: {}", file_path);
+        
+        let url = self.connection.get_url_for_path(file_path);
+        
+        let response = self.connection
+            .authenticated_request(
+                reqwest::Method::GET,
+                &url,
+                None,
+                None,
+            )
+            .await?;
+
+        if !response.status().is_success() {
+            return Err(anyhow!(
+                "Failed to download file '{}': HTTP {}",
+                file_path,
+                response.status()
+            ));
+        }
+
+        let content = response.bytes().await?;
+        debug!("✅ Downloaded {} bytes for file: {}", content.len(), file_path);
+        
+        Ok(content.to_vec())
+    }
+
+    /// Downloads a file from WebDAV server using FileInfo
+    pub async fn download_file_info(&self, file_info: &FileInfo) -> Result<Vec<u8>> {
         let _permit = self.download_semaphore.acquire().await?;
         
         debug!("⬇️ Downloading file: {}", file_info.path);
@@ -190,7 +221,7 @@ impl WebDAVService {
             let service_clone = self.clone();
             
             async move {
-                let result = service_clone.download_file(&file_clone).await;
+                let result = service_clone.download_file_info(&file_clone).await;
                 (file_clone, result)
             }
         });
@@ -285,7 +316,7 @@ impl WebDAVService {
             .map(|s| s.to_string());
 
         Ok(ServerCapabilities {
-            dav_compliance: dav_header,
+            dav_compliance: dav_header.clone(),
             allowed_methods: allow_header,
             server_software: server_header,
             supports_etag: dav_header.contains("1") || dav_header.contains("2"),
diff --git a/src/services/webdav/validation.rs b/src/services/webdav/validation.rs
index f2652af..dc52999 100644
--- a/src/services/webdav/validation.rs
+++ b/src/services/webdav/validation.rs
@@ -24,7 +24,7 @@ pub struct ValidationIssue {
     pub detected_at: chrono::DateTime<chrono::Utc>,
 }
 
-#[derive(Debug, Clone, Serialize, Deserialize)]
+#[derive(Debug, Clone, Serialize, Deserialize, Eq, Hash, PartialEq)]
 pub enum ValidationIssueType {
     /// Directory exists on server but not in our tracking
     Untracked,