use anyhow::{anyhow, Result}; use reqwest::{Client, Method, Response}; use std::sync::Arc; use std::time::{Duration, Instant}; use std::collections::{HashMap, HashSet}; use tokio::sync::Semaphore; use tokio::time::sleep; use futures_util::stream; use tracing::{debug, error, info, warn}; use serde::{Deserialize, Serialize}; use crate::models::{ FileIngestionInfo, WebDAVConnectionResult, WebDAVCrawlEstimate, WebDAVTestConnection, WebDAVFolderInfo, }; use crate::webdav_xml_parser::{parse_propfind_response, parse_propfind_response_with_directories}; use crate::mime_detection::{detect_mime_from_content, update_mime_type_with_content, MimeDetectionResult}; use super::{config::{WebDAVConfig, RetryConfig, ConcurrencyConfig}, SyncProgress}; /// Results from WebDAV discovery including both files and directories #[derive(Debug, Clone)] pub struct WebDAVDiscoveryResult { pub files: Vec, pub directories: Vec, } /// Result of downloading a file with MIME type detection #[derive(Debug, Clone)] pub struct WebDAVDownloadResult { pub content: Vec, pub file_info: FileIngestionInfo, pub mime_detection: MimeDetectionResult, pub mime_type_updated: bool, } /// Server capabilities information #[derive(Debug, Clone)] pub struct ServerCapabilities { pub dav_compliance: String, pub allowed_methods: String, pub server_software: Option, pub supports_etag: bool, pub supports_depth_infinity: bool, /// Infinity depth support verified through testing pub infinity_depth_tested: bool, /// Whether infinity depth actually works in practice pub infinity_depth_works: bool, /// Timestamp when capabilities were last checked pub last_checked: std::time::Instant, } /// Health status information #[derive(Debug, Clone)] pub struct HealthStatus { pub healthy: bool, pub message: String, pub response_time_ms: u64, pub details: Option, } /// Validation report structures #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ValidationReport { pub overall_health_score: i32, // 0-100 pub issues: Vec, pub recommendations: Vec, pub summary: ValidationSummary, } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ValidationIssue { pub issue_type: ValidationIssueType, pub severity: ValidationSeverity, pub directory_path: String, pub description: String, pub details: Option, pub detected_at: chrono::DateTime, } #[derive(Debug, Clone, Serialize, Deserialize, Eq, Hash, PartialEq)] pub enum ValidationIssueType { /// Directory exists on server but not in our tracking Untracked, /// Directory in our tracking but missing on server Missing, /// ETag mismatch between server and our cache ETagMismatch, /// Directory hasn't been scanned in a very long time Stale, /// Server errors when accessing directory Inaccessible, /// ETag support seems unreliable for this directory ETagUnreliable, } #[derive(Debug, Clone, Serialize, Deserialize)] pub enum ValidationSeverity { Info, // No action needed, just FYI Warning, // Should investigate but not urgent Error, // Needs immediate attention Critical, // System integrity at risk } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ValidationRecommendation { pub action: ValidationAction, pub reason: String, pub affected_directories: Vec, pub priority: ValidationSeverity, } #[derive(Debug, Clone, Serialize, Deserialize)] pub enum ValidationAction { /// Run a deep scan of specific directories DeepScanRequired, /// Clear and rebuild directory tracking RebuildTracking, /// ETag support is unreliable, switch to periodic scans DisableETagOptimization, /// Clean up orphaned database entries CleanupDatabase, /// Server configuration issue needs attention CheckServerConfiguration, /// No action needed, system is healthy NoActionRequired, } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ValidationSummary { pub total_directories_checked: usize, pub healthy_directories: usize, pub directories_with_issues: usize, pub critical_issues: usize, pub warning_issues: usize, pub info_issues: usize, pub validation_duration_ms: u64, } /// Main WebDAV service that handles all WebDAV operations in a single, unified interface pub struct WebDAVService { client: Client, config: WebDAVConfig, retry_config: RetryConfig, concurrency_config: ConcurrencyConfig, scan_semaphore: Arc, download_semaphore: Arc, /// Stores the working protocol (updated after successful protocol detection) working_protocol: Arc>>, } impl WebDAVService { /// Creates a new WebDAV service with default configurations pub fn new(config: WebDAVConfig) -> Result { Self::new_with_configs(config, RetryConfig::default(), ConcurrencyConfig::default()) } /// Creates a new WebDAV service with custom retry configuration pub fn new_with_retry(config: WebDAVConfig, retry_config: RetryConfig) -> Result { Self::new_with_configs(config, retry_config, ConcurrencyConfig::default()) } /// Creates a new WebDAV service with all custom configurations pub fn new_with_configs( config: WebDAVConfig, retry_config: RetryConfig, concurrency_config: ConcurrencyConfig ) -> Result { // Validate configuration config.validate()?; // Create HTTP client with timeout let client = Client::builder() .timeout(config.timeout()) .build()?; // Create semaphores for concurrency control let scan_semaphore = Arc::new(Semaphore::new(concurrency_config.max_concurrent_scans)); let download_semaphore = Arc::new(Semaphore::new(concurrency_config.max_concurrent_downloads)); Ok(Self { client, config, retry_config, concurrency_config, scan_semaphore, download_semaphore, working_protocol: Arc::new(std::sync::RwLock::new(None)), }) } // ============================================================================ // Protocol Detection Methods // ============================================================================ /// Detects the working protocol by trying HTTPS first, then HTTP /// This method handles smart protocol detection for URLs without explicit protocols async fn detect_working_protocol(&self) -> Result { info!("๐Ÿ” Starting smart protocol detection for: {}", self.config.server_url); // If URL already has a protocol, use it directly if self.config.server_url.starts_with("http://") || self.config.server_url.starts_with("https://") { let protocol = if self.config.server_url.starts_with("https://") { "https" } else { "http" }; info!("โœ… Protocol already specified: {}", protocol); return Ok(protocol.to_string()); } // Try HTTPS first (more secure default) let https_url = format!("https://{}", self.config.server_url.trim()); info!("๐Ÿ” Trying HTTPS first: {}", https_url); match self.test_protocol_connection(&https_url).await { Ok(()) => { info!("โœ… HTTPS connection successful"); // Store the working protocol for future use if let Ok(mut working_protocol) = self.working_protocol.write() { *working_protocol = Some("https".to_string()); } return Ok("https".to_string()); } Err(https_error) => { warn!("โŒ HTTPS connection failed: {}", https_error); // Check if this is a connection-related error (not auth error) if self.is_connection_error(&https_error) { info!("๐Ÿ”„ HTTPS failed with connection error, trying HTTP fallback"); // Try HTTP fallback let http_url = format!("http://{}", self.config.server_url.trim()); info!("๐Ÿ”“ Trying HTTP fallback: {}", http_url); match self.test_protocol_connection(&http_url).await { Ok(()) => { warn!("โš ๏ธ HTTP connection successful - consider configuring HTTPS for security"); // Store the working protocol for future use if let Ok(mut working_protocol) = self.working_protocol.write() { *working_protocol = Some("http".to_string()); } return Ok("http".to_string()); } Err(http_error) => { error!("โŒ Both HTTPS and HTTP failed"); error!(" HTTPS error: {}", https_error); error!(" HTTP error: {}", http_error); return Err(anyhow!( "Protocol detection failed. Both HTTPS and HTTP connections failed. \ HTTPS error: {}. HTTP error: {}. \ Please verify the server URL and ensure WebDAV is properly configured.", https_error, http_error )); } } } else { // Auth or other non-connection error with HTTPS - don't try HTTP error!("โŒ HTTPS failed with non-connection error (likely auth or server config): {}", https_error); return Err(anyhow!( "HTTPS connection failed with authentication or server configuration error: {}. \ Please check your credentials and server settings.", https_error )); } } } } /// Tests connection with a specific protocol URL async fn test_protocol_connection(&self, full_url: &str) -> Result<()> { debug!("๐Ÿงช Testing protocol connection to: {}", full_url); // Create a temporary config with the full URL for testing let temp_config = WebDAVConfig { server_url: full_url.to_string(), username: self.config.username.clone(), password: self.config.password.clone(), watch_folders: self.config.watch_folders.clone(), file_extensions: self.config.file_extensions.clone(), timeout_seconds: self.config.timeout_seconds, server_type: self.config.server_type.clone(), }; // Test basic OPTIONS request let webdav_url = temp_config.webdav_url(); debug!("๐Ÿ“ Testing WebDAV URL: {}", webdav_url); let response = self.client .request(Method::OPTIONS, &webdav_url) .basic_auth(&self.config.username, Some(&self.config.password)) .send() .await .map_err(|e| anyhow!("Connection failed: {}", e))?; if !response.status().is_success() { return Err(anyhow!( "Protocol test failed with status: {} - {}", response.status(), response.text().await.unwrap_or_default() )); } debug!("โœ… Protocol connection test successful"); Ok(()) } /// Determines if an error is connection-related (vs auth or other errors) pub fn is_connection_error(&self, error: &anyhow::Error) -> bool { let error_str = error.to_string().to_lowercase(); // Connection-related errors that suggest trying different protocol error_str.contains("connection refused") || error_str.contains("timeout") || error_str.contains("dns") || error_str.contains("network") || error_str.contains("unreachable") || error_str.contains("tls") || error_str.contains("ssl") || error_str.contains("certificate") || error_str.contains("handshake") } /// Gets the currently working protocol (if detected) pub fn get_working_protocol(&self) -> Option { self.working_protocol.read().ok().and_then(|p| p.clone()) } /// Gets the effective server URL with the working protocol pub fn get_effective_server_url(&self) -> String { // If we have a detected working protocol, use it if let Some(protocol) = self.get_working_protocol() { if !self.config.server_url.starts_with("http://") && !self.config.server_url.starts_with("https://") { return format!("{}://{}", protocol, self.config.server_url.trim()); } } // Otherwise use the configured URL (normalized) WebDAVConfig::normalize_server_url(&self.config.server_url) } // ============================================================================ // Connection and Testing Methods // ============================================================================ /// Tests the WebDAV connection pub async fn test_connection(&self) -> Result { info!("๐Ÿ” Testing WebDAV connection to: {}", self.config.server_url); // Validate configuration first if let Err(e) = self.config.validate() { return Ok(WebDAVConnectionResult { success: false, message: format!("Configuration error: {}", e), server_version: None, server_type: None, }); } // Perform protocol detection if needed let working_protocol = match self.detect_working_protocol().await { Ok(protocol) => { info!("โœ… Protocol detection successful: {}", protocol); protocol } Err(e) => { error!("โŒ Protocol detection failed: {}", e); return Ok(WebDAVConnectionResult { success: false, message: format!("Protocol detection failed: {}", e), server_version: None, server_type: None, }); } }; // Test basic connectivity with OPTIONS request using detected protocol match self.test_options_request().await { Ok((server_version, server_type)) => { let effective_url = self.get_effective_server_url(); info!("โœ… WebDAV connection successful using {} ({})", working_protocol.to_uppercase(), effective_url); Ok(WebDAVConnectionResult { success: true, message: format!("Connection successful using {}", working_protocol.to_uppercase()), server_version, server_type, }) } Err(e) => { error!("โŒ WebDAV connection failed: {}", e); Ok(WebDAVConnectionResult { success: false, message: format!("Connection failed: {}", e), server_version: None, server_type: None, }) } } } /// Tests WebDAV connection with provided configuration (static method) pub async fn test_connection_with_config(test_config: &WebDAVTestConnection) -> Result { let config = WebDAVConfig { server_url: test_config.server_url.clone(), username: test_config.username.clone(), password: test_config.password.clone(), watch_folders: vec!["/".to_string()], file_extensions: vec![], timeout_seconds: 30, server_type: test_config.server_type.clone(), }; let service = Self::new(config)?; service.test_connection().await } /// Performs OPTIONS request to test basic connectivity async fn test_options_request(&self) -> Result<(Option, Option)> { // Create a temporary config with the effective server URL for WebDAV operations let effective_server_url = self.get_effective_server_url(); let temp_config = WebDAVConfig { server_url: effective_server_url, username: self.config.username.clone(), password: self.config.password.clone(), watch_folders: self.config.watch_folders.clone(), file_extensions: self.config.file_extensions.clone(), timeout_seconds: self.config.timeout_seconds, server_type: self.config.server_type.clone(), }; let webdav_url = temp_config.webdav_url(); let response = self.client .request(Method::OPTIONS, &webdav_url) .basic_auth(&self.config.username, Some(&self.config.password)) .send() .await?; if !response.status().is_success() { return Err(anyhow!( "OPTIONS request failed with status: {} - {}", response.status(), response.text().await.unwrap_or_default() )); } // Extract server information from headers let server_version = response .headers() .get("server") .and_then(|v| v.to_str().ok()) .map(|s| s.to_string()); let server_type = self.detect_server_type(&response, &server_version).await; Ok((server_version, server_type)) } /// Detects the WebDAV server type based on response headers and capabilities async fn detect_server_type( &self, response: &reqwest::Response, server_version: &Option, ) -> Option { // Check server header first if let Some(ref server) = server_version { let server_lower = server.to_lowercase(); if server_lower.contains("nextcloud") { return Some("nextcloud".to_string()); } if server_lower.contains("owncloud") { return Some("owncloud".to_string()); } if server_lower.contains("apache") || server_lower.contains("nginx") { // Could be generic WebDAV } } // Check DAV capabilities if let Some(dav_header) = response.headers().get("dav") { if let Ok(dav_str) = dav_header.to_str() { debug!("DAV capabilities: {}", dav_str); // Different servers expose different DAV levels if dav_str.contains("3") { return Some("webdav_level_3".to_string()); } } } // Test for Nextcloud/ownCloud specific endpoints if self.test_nextcloud_capabilities().await.is_ok() { return Some("nextcloud".to_string()); } Some("generic".to_string()) } /// Tests for Nextcloud-specific capabilities async fn test_nextcloud_capabilities(&self) -> Result<()> { let effective_server_url = self.get_effective_server_url(); let capabilities_url = format!("{}/ocs/v1.php/cloud/capabilities", effective_server_url.trim_end_matches('/')); let response = self.client .get(&capabilities_url) .basic_auth(&self.config.username, Some(&self.config.password)) .header("OCS-APIRequest", "true") .send() .await?; if response.status().is_success() { debug!("Nextcloud capabilities endpoint accessible"); Ok(()) } else { Err(anyhow!("Nextcloud capabilities not accessible")) } } /// Tests PROPFIND request on root directory pub async fn test_propfind(&self, path: &str) -> Result<()> { let url = self.get_url_for_path(path); debug!("๐Ÿงช Testing PROPFIND for path '{}' at URL '{}'", path, url); // First, check server capabilities if this is the first PROPFIND if path == "/" || path.is_empty() { match self.validate_webdav_capabilities(&url).await { Ok(capabilities) => { info!("โœ… WebDAV capabilities validated: DAV={}, Methods={}", capabilities.dav_compliance, capabilities.allowed_methods); } Err(e) => { warn!("โš ๏ธ WebDAV capability validation failed (continuing anyway): {}", e); } } } let propfind_body = r#" "#; let response = self.authenticated_request( Method::from_bytes(b"PROPFIND")?, &url, Some(propfind_body.to_string()), Some(vec![ ("Depth", "1"), ("Content-Type", "application/xml"), ]), ).await?; if response.status().as_u16() == 207 { debug!("โœ… PROPFIND successful for path: {}", path); Ok(()) } else { Err(anyhow!( "PROPFIND failed for path '{}' with status: {} - {}", path, response.status(), response.text().await.unwrap_or_default() )) } } /// Validates WebDAV server capabilities to help diagnose configuration issues async fn validate_webdav_capabilities(&self, url: &str) -> Result { debug!("๐Ÿ” Validating WebDAV capabilities for URL: {}", url); let options_response = self.authenticated_request( reqwest::Method::OPTIONS, url, None, None, ).await?; let dav_header = options_response .headers() .get("dav") .and_then(|v| v.to_str().ok()) .unwrap_or("") .to_string(); let allow_header = options_response .headers() .get("allow") .and_then(|v| v.to_str().ok()) .unwrap_or("") .to_string(); let server_header = options_response .headers() .get("server") .and_then(|v| v.to_str().ok()) .map(|s| s.to_string()); // Check if PROPFIND is in the allowed methods if !allow_header.to_uppercase().contains("PROPFIND") { warn!("โš ๏ธ PROPFIND method not listed in server's Allow header: {}", allow_header); warn!("๐Ÿ’ก This suggests WebDAV may not be properly enabled on this endpoint"); } // Check DAV compliance level if dav_header.is_empty() { warn!("โš ๏ธ No DAV header found - this endpoint may not support WebDAV"); } else { debug!("๐Ÿ“‹ Server DAV compliance: {}", dav_header); } if let Some(ref server) = server_header { debug!("๐Ÿ–ฅ๏ธ Server software: {}", server); } Ok(ServerCapabilities { dav_compliance: dav_header.clone(), allowed_methods: allow_header, server_software: server_header, supports_etag: dav_header.contains("1") || dav_header.contains("2"), supports_depth_infinity: dav_header.contains("1"), infinity_depth_tested: false, infinity_depth_works: false, last_checked: std::time::Instant::now(), }) } // ============================================================================ // HTTP Request Methods with Simple Retry Logic // ============================================================================ /// Performs authenticated request with simple retry logic (simplified from complex error recovery) pub async fn authenticated_request( &self, method: Method, url: &str, body: Option, headers: Option>, ) -> Result { let mut attempt = 0; let mut delay = self.retry_config.initial_delay_ms; // Enhanced debug logging for HTTP requests debug!("๐ŸŒ HTTP Request Details:"); debug!(" Method: {}", method); debug!(" URL: {}", url); debug!(" Username: {}", self.config.username); if let Some(ref headers_list) = headers { debug!(" Headers: {:?}", headers_list); } if let Some(ref body_content) = body { debug!(" Body length: {} bytes", body_content.len()); debug!(" Body preview: {}", if body_content.len() > 200 { format!("{}...", &body_content[..200]) } else { body_content.clone() }); } loop { let mut request = self.client .request(method.clone(), url) .basic_auth(&self.config.username, Some(&self.config.password)); if let Some(ref body_content) = body { request = request.body(body_content.clone()); } if let Some(ref headers_list) = headers { for (key, value) in headers_list { request = request.header(*key, *value); } } debug!("๐Ÿ“ค Sending HTTP {} request to: {}", method, url); match request.send().await { Ok(response) => { let status = response.status(); debug!("๐Ÿ“ฅ HTTP Response: {} {}", status.as_u16(), status.canonical_reason().unwrap_or("")); // Log response headers for debugging for (key, value) in response.headers() { if key.as_str().to_lowercase().contains("allow") || key.as_str().to_lowercase().contains("dav") || key.as_str().to_lowercase().contains("server") { debug!(" Response header: {}: {:?}", key, value); } } if status.is_success() || status.as_u16() == 207 { debug!("โœ… HTTP request successful: {} {}", status.as_u16(), status.canonical_reason().unwrap_or("")); return Ok(response); } // Handle rate limiting if status.as_u16() == 429 { warn!("Rate limited, backing off for {}ms", self.retry_config.rate_limit_backoff_ms); sleep(Duration::from_millis(self.retry_config.rate_limit_backoff_ms)).await; continue; } // Handle client errors (don't retry) if status.is_client_error() && status.as_u16() != 429 { let error_body = response.text().await.unwrap_or_default(); // Provide specific guidance for 405 Method Not Allowed errors if status.as_u16() == 405 { error!("๐Ÿšซ HTTP 405 Method Not Allowed for {} {}", method, url); error!("๐Ÿ” Request Details:"); error!(" Method: {}", method); error!(" URL: {}", url); error!(" Server type: {:?}", self.config.server_type); error!(" Username: {}", self.config.username); error!(" Server base URL: {}", self.config.server_url); error!(" WebDAV base URL: {}", self.config.webdav_url()); if let Some(ref headers_list) = headers { error!(" Request headers: {:?}", headers_list); } error!("๐Ÿ“ This usually indicates:"); error!(" 1. WebDAV is not enabled on the server"); error!(" 2. The URL endpoint doesn't support {} method", method); error!(" 3. Incorrect WebDAV endpoint URL"); error!(" 4. Authentication issues or insufficient permissions"); error!("๐Ÿ’ก Troubleshooting steps:"); error!(" - Verify WebDAV is enabled in your server settings"); error!(" - Check if the WebDAV endpoint URL is correct"); error!(" - Try testing with a WebDAV client like Cyberduck"); error!(" - Verify your user has WebDAV access permissions"); return Err(anyhow!( "WebDAV {} method not allowed (405) at URL: {}. This typically means WebDAV is not properly enabled on the server or the URL is incorrect. \ Server type: {:?}, Base URL: {}, WebDAV URL: {}. Error details: {}", method, url, self.config.server_type, self.config.server_url, self.config.webdav_url(), error_body )); } return Err(anyhow!("Client error: {} - {}", status, error_body)); } // Handle server errors (retry) if status.is_server_error() && attempt < self.retry_config.max_retries { warn!("Server error {}, retrying in {}ms (attempt {}/{})", status, delay, attempt + 1, self.retry_config.max_retries); sleep(Duration::from_millis(delay)).await; delay = std::cmp::min( (delay as f64 * self.retry_config.backoff_multiplier) as u64, self.retry_config.max_delay_ms ); attempt += 1; continue; } return Err(anyhow!("Request failed: {} - {}", status, response.text().await.unwrap_or_default())); } Err(e) => { if attempt < self.retry_config.max_retries { warn!("Request error: {}, retrying in {}ms (attempt {}/{})", e, delay, attempt + 1, self.retry_config.max_retries); sleep(Duration::from_millis(delay)).await; delay = std::cmp::min( (delay as f64 * self.retry_config.backoff_multiplier) as u64, self.retry_config.max_delay_ms ); attempt += 1; continue; } return Err(anyhow!("Request failed after {} attempts: {}", self.retry_config.max_retries, e)); } } } } // ============================================================================ // URL Management Helper Methods (Previously separate module) // ============================================================================ /// Gets the WebDAV URL for a specific path pub fn get_url_for_path(&self, path: &str) -> String { // Create a temporary config with the effective server URL let effective_server_url = self.get_effective_server_url(); let temp_config = WebDAVConfig { server_url: effective_server_url, username: self.config.username.clone(), password: self.config.password.clone(), watch_folders: self.config.watch_folders.clone(), file_extensions: self.config.file_extensions.clone(), timeout_seconds: self.config.timeout_seconds, server_type: self.config.server_type.clone(), }; let base_url = temp_config.webdav_url(); let clean_path = path.trim_start_matches('/'); let final_url = if clean_path.is_empty() { base_url.clone() } else { // Ensure no double slashes by normalizing the base URL let normalized_base = base_url.trim_end_matches('/'); format!("{}/{}", normalized_base, clean_path) }; debug!("๐Ÿ”— URL Construction:"); debug!(" Input path: '{}'", path); debug!(" Clean path: '{}'", clean_path); debug!(" Base WebDAV URL: '{}'", base_url); debug!(" Final URL: '{}'", final_url); debug!(" Server type: {:?}", self.config.server_type); debug!(" Server base URL: '{}'", self.config.server_url); final_url } /// Convert full WebDAV href (from XML response) to relative path /// /// Input: "/remote.php/dav/files/username/Photos/image.jpg" /// Output: "/Photos/image.jpg" pub fn href_to_relative_path(&self, href: &str) -> String { match self.config.server_type.as_deref() { Some("nextcloud") => { let prefix = format!("/remote.php/dav/files/{}", self.config.username); if href.starts_with(&prefix) { let relative = &href[prefix.len()..]; if relative.is_empty() { "/" } else { relative }.to_string() } else { href.to_string() } } Some("owncloud") => { if href.starts_with("/remote.php/webdav") { let relative = &href[18..]; // Remove "/remote.php/webdav" if relative.is_empty() { "/" } else { relative }.to_string() } else { href.to_string() } } Some("generic") => { if href.starts_with("/webdav") { let relative = &href[7..]; // Remove "/webdav" if relative.is_empty() { "/" } else { relative }.to_string() } else { href.to_string() } } _ => href.to_string() } } /// Convert file paths to the proper URL format for the server pub fn path_to_url(&self, relative_path: &str) -> String { let clean_path = relative_path.trim_start_matches('/'); // Create a temporary config with the effective server URL let effective_server_url = self.get_effective_server_url(); let temp_config = WebDAVConfig { server_url: effective_server_url, username: self.config.username.clone(), password: self.config.password.clone(), watch_folders: self.config.watch_folders.clone(), file_extensions: self.config.file_extensions.clone(), timeout_seconds: self.config.timeout_seconds, server_type: self.config.server_type.clone(), }; let base_url = temp_config.webdav_url(); if clean_path.is_empty() { base_url } else { format!("{}/{}", base_url.trim_end_matches('/'), clean_path) } } /// Converts a full WebDAV path to a relative path by removing server-specific prefixes pub fn convert_to_relative_path(&self, full_webdav_path: &str) -> String { // For Nextcloud/ownCloud, remove the server-specific prefixes if let Some(server_type) = &self.config.server_type { if server_type == "nextcloud" { let username = &self.config.username; let prefix = format!("/remote.php/dav/files/{}", username); if full_webdav_path.starts_with(&prefix) { let relative = &full_webdav_path[prefix.len()..]; return if relative.is_empty() { "/" } else { relative }.to_string(); } } else if server_type == "owncloud" { // ownCloud uses /remote.php/webdav prefix if full_webdav_path.starts_with("/remote.php/webdav") { let relative = &full_webdav_path[18..]; // Remove "/remote.php/webdav" return if relative.is_empty() { "/" } else { relative }.to_string(); } } else if server_type == "generic" { // For generic servers, remove the /webdav prefix if present if full_webdav_path.starts_with("/webdav") { let relative = &full_webdav_path[7..]; // Remove "/webdav" return if relative.is_empty() { "/" } else { relative }.to_string(); } } } // For other servers, return as-is full_webdav_path.to_string() } // ============================================================================ // File Discovery Methods (Previously separate discovery module) // ============================================================================ /// Discovers files in a directory with support for pagination and filtering pub async fn discover_files(&self, directory_path: &str, recursive: bool) -> Result> { info!("๐Ÿ” Discovering files in directory: {}", directory_path); if recursive { self.discover_files_recursive(directory_path).await } else { self.discover_files_single_directory(directory_path).await } } /// Discovers both files and directories with their ETags for directory tracking pub async fn discover_files_and_directories(&self, directory_path: &str, recursive: bool) -> Result { info!("๐Ÿ” Discovering files and directories in: {}", directory_path); if recursive { self.discover_files_and_directories_recursive(directory_path).await } else { self.discover_files_and_directories_single(directory_path).await } } /// Discovers both files and directories with basic progress tracking (simplified) pub async fn discover_files_and_directories_with_progress( &self, directory_path: &str, recursive: bool, _progress: Option<&SyncProgress> // Simplified: just placeholder for API compatibility ) -> Result { info!("๐Ÿ” Discovering files and directories in: {} (progress tracking simplified)", directory_path); if recursive { self.discover_files_and_directories_recursive(directory_path).await } else { self.discover_files_and_directories_single(directory_path).await } } /// Discovers files in a single directory (non-recursive) async fn discover_files_single_directory(&self, directory_path: &str) -> Result> { let url = self.get_url_for_path(directory_path); let propfind_body = r#" "#; let response = self.authenticated_request( Method::from_bytes(b"PROPFIND")?, &url, Some(propfind_body.to_string()), Some(vec![ ("Depth", "1"), ("Content-Type", "application/xml"), ]), ).await?; let body = response.text().await?; let files = parse_propfind_response(&body)?; // Filter out the directory itself and only return files let filtered_files: Vec = files .into_iter() .filter(|file| !file.is_directory && file.relative_path != directory_path) .collect(); debug!("Found {} files in directory: {}", filtered_files.len(), directory_path); Ok(filtered_files) } /// Discovers files recursively in all subdirectories async fn discover_files_recursive(&self, directory_path: &str) -> Result> { let mut all_files = Vec::new(); let mut directories_to_scan = vec![directory_path.to_string()]; let mut scanned_directories = std::collections::HashSet::new(); let semaphore = Arc::new(Semaphore::new(self.concurrency_config.max_concurrent_scans)); debug!("Starting recursive file scan from: {}", directory_path); while !directories_to_scan.is_empty() { // Take a batch of directories to process let batch_size = std::cmp::min(directories_to_scan.len(), self.concurrency_config.max_concurrent_scans); let current_batch: Vec = directories_to_scan.drain(..batch_size).collect(); debug!("Processing batch of {} directories, {} remaining in queue", current_batch.len(), directories_to_scan.len()); // Process directories concurrently let tasks = current_batch.into_iter().filter_map(|dir| { // Skip if already scanned if scanned_directories.contains(&dir) { debug!("Skipping already scanned directory: {}", dir); return None; } scanned_directories.insert(dir.clone()); let permit = semaphore.clone(); let service = self.clone(); Some(async move { let _permit = permit.acquire().await.unwrap(); let result = service.discover_files_and_directories_single(&dir).await; (dir, result) }) }); let results = futures_util::future::join_all(tasks).await; for (scanned_dir, result) in results { match result { Ok(discovery_result) => { debug!("Directory '{}' scan complete: {} files, {} subdirectories", scanned_dir, discovery_result.files.len(), discovery_result.directories.len()); all_files.extend(discovery_result.files); // Add subdirectories to the queue for the next iteration for dir in discovery_result.directories { if dir.is_directory && !scanned_directories.contains(&dir.relative_path) { directories_to_scan.push(dir.relative_path.clone()); debug!("Added subdirectory to scan queue: {}", dir.relative_path); } } } Err(e) => { warn!("Failed to scan directory '{}': {}", scanned_dir, e); } } } debug!("Batch complete. Total files found: {}. Queue size: {}", all_files.len(), directories_to_scan.len()); } info!("Recursive scan completed. Found {} files total", all_files.len()); Ok(all_files) } /// Discovers both files and directories in a single directory async fn discover_files_and_directories_single(&self, directory_path: &str) -> Result { // Try the primary URL first, then fallback URLs if we get a 405 error match self.discover_files_and_directories_single_with_url(directory_path, &self.get_url_for_path(directory_path)).await { Ok(result) => Ok(result), Err(e) => { // Check if this is a 405 Method Not Allowed error if e.to_string().contains("405") || e.to_string().contains("Method Not Allowed") { warn!("๐Ÿ”„ Primary WebDAV URL failed with 405 error, trying fallback URLs..."); self.try_fallback_discovery(directory_path).await } else { Err(e) } } } } /// Tries fallback URLs when the primary WebDAV URL fails with 405 async fn try_fallback_discovery(&self, directory_path: &str) -> Result { let fallback_urls = self.config.webdav_fallback_urls(); for (i, fallback_base_url) in fallback_urls.iter().enumerate() { let fallback_url = if directory_path == "/" || directory_path.is_empty() { fallback_base_url.clone() } else { format!("{}/{}", fallback_base_url.trim_end_matches('/'), directory_path.trim_start_matches('/')) }; info!("๐Ÿ”„ Trying fallback URL #{}: {}", i + 1, fallback_url); match self.discover_files_and_directories_single_with_url(directory_path, &fallback_url).await { Ok(result) => { info!("โœ… Fallback URL #{} succeeded: {}", i + 1, fallback_url); warn!("๐Ÿ’ก Consider updating your server type configuration to use this URL pattern"); return Ok(result); } Err(e) => { warn!("โŒ Fallback URL #{} failed: {} - {}", i + 1, fallback_url, e); } } } Err(anyhow!( "All WebDAV URLs failed for directory '{}'. Primary URL and {} fallback URLs were tried. \ This suggests WebDAV is not properly configured on the server or the server type is incorrect.", directory_path, fallback_urls.len() )) } /// Performs the actual discovery with a specific URL async fn discover_files_and_directories_single_with_url(&self, directory_path: &str, url: &str) -> Result { // Enhanced debug logging for WebDAV URL construction debug!("๐Ÿ” WebDAV directory scan - Path: '{}', URL: '{}', Server type: {:?}", directory_path, url, self.config.server_type); debug!("๐Ÿ”ง WebDAV config - Server URL: '{}', Username: '{}', WebDAV base URL: '{}'", self.config.server_url, self.config.username, self.config.webdav_url()); let propfind_body = r#" "#; debug!("๐Ÿ“ค Sending PROPFIND request to URL: {}", url); debug!("๐Ÿ“‹ PROPFIND body length: {} bytes", propfind_body.len()); let response = self.authenticated_request( Method::from_bytes(b"PROPFIND")?, url, Some(propfind_body.to_string()), Some(vec![ ("Depth", "1"), ("Content-Type", "application/xml"), ]), ).await.map_err(|e| { error!("โŒ PROPFIND request failed for directory '{}' at URL '{}': {}", directory_path, url, e); e })?; let body = response.text().await?; let all_items = parse_propfind_response_with_directories(&body)?; // Process the items to convert href to relative paths let processed_items = self.process_file_infos(all_items); // Separate files and directories, excluding the parent directory itself let mut files = Vec::new(); let mut directories = Vec::new(); for item in processed_items { // Skip the directory itself (handle both with and without trailing slash) let normalized_item_path = item.relative_path.trim_end_matches('/'); let normalized_directory_path = directory_path.trim_end_matches('/'); if normalized_item_path == normalized_directory_path { continue; // Skip the directory itself } if item.is_directory { directories.push(item); } else { files.push(item); } } debug!("Found {} files and {} directories in: {}", files.len(), directories.len(), directory_path); Ok(WebDAVDiscoveryResult { files, directories }) } /// Discovers files and directories recursively async fn discover_files_and_directories_recursive(&self, directory_path: &str) -> Result { let mut all_files = Vec::new(); let mut all_directories = Vec::new(); let mut directories_to_scan = vec![directory_path.to_string()]; let mut scanned_directories = std::collections::HashSet::new(); let semaphore = Arc::new(Semaphore::new(self.concurrency_config.max_concurrent_scans)); debug!("Starting recursive scan from: {}", directory_path); while !directories_to_scan.is_empty() { // Take a batch of directories to process (limit batch size for better progress tracking) let batch_size = std::cmp::min(directories_to_scan.len(), self.concurrency_config.max_concurrent_scans); let current_batch: Vec = directories_to_scan.drain(..batch_size).collect(); debug!("Processing batch of {} directories, {} remaining in queue", current_batch.len(), directories_to_scan.len()); // Process directories concurrently let tasks = current_batch.into_iter().filter_map(|dir| { // Skip if already scanned (prevent infinite loops) if scanned_directories.contains(&dir) { debug!("Skipping already scanned directory: {}", dir); return None; } scanned_directories.insert(dir.clone()); let permit = semaphore.clone(); let service = self.clone(); Some(async move { let _permit = permit.acquire().await.unwrap(); let result = service.discover_files_and_directories_single(&dir).await; (dir, result) }) }); let results = futures_util::future::join_all(tasks).await; for (scanned_dir, result) in results { match result { Ok(discovery_result) => { debug!("Directory '{}' scan complete: {} files, {} subdirectories", scanned_dir, discovery_result.files.len(), discovery_result.directories.len()); all_files.extend(discovery_result.files); // Add directories to our results and to the scan queue for dir in discovery_result.directories { // Only add to scan queue if not already scanned if !scanned_directories.contains(&dir.relative_path) { directories_to_scan.push(dir.relative_path.clone()); debug!("Added subdirectory to scan queue: {} (scanned set size: {})", dir.relative_path, scanned_directories.len()); } else { debug!("Skipping already scanned directory: {} (already in scanned set)", dir.relative_path); } all_directories.push(dir); } } Err(e) => { warn!("Failed to scan directory '{}': {}", scanned_dir, e); } } } debug!("Batch complete. Total progress: {} files, {} directories found. Queue size: {}", all_files.len(), all_directories.len(), directories_to_scan.len()); } info!("Recursive scan completed. Found {} files and {} directories", all_files.len(), all_directories.len()); Ok(WebDAVDiscoveryResult { files: all_files, directories: all_directories }) } /// Estimates crawl time and resource requirements pub async fn estimate_crawl(&self) -> Result { info!("๐Ÿ“Š Estimating WebDAV crawl requirements"); let start_time = Instant::now(); let mut total_directories = 0; let mut total_files = 0; let mut sample_scan_time = Duration::from_millis(0); // Sample the first few watch folders to estimate for (index, watch_folder) in self.config.watch_folders.iter().enumerate() { if index >= 3 { break; } // Only sample first 3 folders let scan_start = Instant::now(); match self.discover_files_and_directories(watch_folder, false).await { Ok(result) => { total_directories += result.directories.len(); total_files += result.files.len(); sample_scan_time += scan_start.elapsed(); } Err(e) => { warn!("Failed to scan folder '{}' for estimation: {}", watch_folder, e); } } } // Simple estimation based on sample let avg_scan_time_per_folder = if total_directories > 0 { sample_scan_time.as_millis() as f64 / total_directories as f64 } else { 100.0 // Default 100ms per folder }; let estimated_total_scan_time = Duration::from_millis( (avg_scan_time_per_folder * total_directories as f64 * self.config.watch_folders.len() as f64) as u64 ); Ok(WebDAVCrawlEstimate { folders: vec![], // Simplified: not building detailed folder info for basic estimation total_files: (total_files * self.config.watch_folders.len()) as i64, total_supported_files: (total_files * self.config.watch_folders.len()) as i64, // Assume all files are supported total_estimated_time_hours: estimated_total_scan_time.as_secs_f32() / 3600.0, total_size_mb: (total_files * 2) as f64, // Rough estimate in MB }) } /// Deduplicates files across multiple folders pub fn deduplicate_files(&self, files: Vec) -> Vec { let mut seen = HashSet::new(); files.into_iter().filter(|file| { seen.insert(file.relative_path.clone()) }).collect() } /// Filters files by date for incremental syncs pub fn filter_files_by_date(&self, files: Vec, since: chrono::DateTime) -> Vec { files.into_iter().filter(|file| { file.last_modified.map_or(false, |modified| modified > since) }).collect() } // ============================================================================ // File Operations // ============================================================================ /// Discovers all files in watch folders pub async fn discover_all_files(&self) -> Result> { info!("๐Ÿ” Discovering all files in watch folders"); let mut all_files = Vec::new(); for watch_folder in &self.config.watch_folders { info!("๐Ÿ“ Scanning watch folder: {}", watch_folder); match self.discover_files(watch_folder, true).await { Ok(files) => { info!("โœ… Found {} files in {}", files.len(), watch_folder); all_files.extend(files); } Err(e) => { error!("โŒ Failed to scan watch folder '{}': {}", watch_folder, e); return Err(anyhow!("Failed to scan watch folder '{}': {}", watch_folder, e)); } } } // Deduplicate files across folders let deduplicated_files = self.deduplicate_files(all_files); info!("๐ŸŽฏ Total unique files discovered: {}", deduplicated_files.len()); Ok(deduplicated_files) } /// Discovers files changed since a specific date (for incremental syncs) pub async fn discover_changed_files(&self, since: chrono::DateTime) -> Result> { info!("๐Ÿ” Discovering files changed since: {}", since); let all_files = self.discover_all_files().await?; let changed_files = self.filter_files_by_date(all_files, since); info!("๐Ÿ“ˆ Found {} files changed since {}", changed_files.len(), since); Ok(changed_files) } /// Discovers files in a specific directory pub async fn discover_files_in_directory(&self, directory_path: &str, recursive: bool) -> Result> { info!("๐Ÿ” Discovering files in directory: {} (recursive: {})", directory_path, recursive); self.discover_files(directory_path, recursive).await } /// Downloads a file from WebDAV server by path pub async fn download_file(&self, file_path: &str) -> Result> { let _permit = self.download_semaphore.acquire().await?; debug!("โฌ‡๏ธ Downloading file: {}", file_path); // Convert full WebDAV paths to relative paths to prevent double path construction let relative_path = self.convert_to_relative_path(file_path); let url = self.get_url_for_path(&relative_path); let response = self.authenticated_request( reqwest::Method::GET, &url, None, None, ).await?; if !response.status().is_success() { return Err(anyhow!( "Failed to download file '{}': HTTP {}", file_path, response.status() )); } let content = response.bytes().await?; debug!("โœ… Downloaded {} bytes for file: {}", content.len(), file_path); Ok(content.to_vec()) } /// Downloads a file from WebDAV server using FileIngestionInfo pub async fn download_file_info(&self, file_info: &FileIngestionInfo) -> Result> { let _permit = self.download_semaphore.acquire().await?; debug!("โฌ‡๏ธ Downloading file: {}", file_info.relative_path); // Use the relative path directly since it's already processed let relative_path = &file_info.relative_path; let url = self.get_url_for_path(&relative_path); let response = self.authenticated_request( reqwest::Method::GET, &url, None, None, ).await?; if !response.status().is_success() { return Err(anyhow!( "Failed to download file '{}': HTTP {}", file_info.relative_path, response.status() )); } let content = response.bytes().await?; debug!("โœ… Downloaded {} bytes for file: {}", content.len(), file_info.relative_path); Ok(content.to_vec()) } /// Downloads multiple files concurrently pub async fn download_files(&self, files: &[FileIngestionInfo]) -> Result>)>> { info!("โฌ‡๏ธ Downloading {} files concurrently", files.len()); let tasks = files.iter().map(|file| { let file_clone = file.clone(); let service_clone = self.clone(); async move { let result = service_clone.download_file_info(&file_clone).await; (file_clone, result) } }); let results = futures_util::future::join_all(tasks).await; let success_count = results.iter().filter(|(_, result)| result.is_ok()).count(); let failure_count = results.len() - success_count; info!("๐Ÿ“Š Download completed: {} successful, {} failed", success_count, failure_count); Ok(results) } /// Downloads a file with enhanced MIME type detection based on content /// /// This method downloads the file and performs content-based MIME type detection /// using magic bytes, providing more accurate type identification than the initial /// discovery phase which only has access to filenames and server-provided types. /// /// # Arguments /// * `file_info` - The file information from WebDAV discovery /// /// # Returns /// A `WebDAVDownloadResult` containing the file content, updated file info, and MIME detection details pub async fn download_file_with_mime_detection(&self, file_info: &FileIngestionInfo) -> Result { let _permit = self.download_semaphore.acquire().await?; debug!("โฌ‡๏ธ๐Ÿ” Downloading file with MIME detection: {}", file_info.relative_path); // Use the relative path directly since it's already processed let relative_path = &file_info.relative_path; let url = self.get_url_for_path(&relative_path); let response = self.authenticated_request( reqwest::Method::GET, &url, None, None, ).await?; if !response.status().is_success() { return Err(anyhow!( "Failed to download file '{}': HTTP {}", file_info.relative_path, response.status() )); } // Get server-provided content type from response headers let server_content_type = response .headers() .get("content-type") .and_then(|header| header.to_str().ok()) .map(|s| s.split(';').next().unwrap_or(s).trim().to_string()); // Remove charset info and convert to owned let content = response.bytes().await?; debug!("โœ… Downloaded {} bytes for file: {}", content.len(), file_info.relative_path); // Perform content-based MIME type detection let mime_detection_result = detect_mime_from_content( &content, &file_info.name, server_content_type.as_deref() ); // Check if MIME type should be updated let mime_type_updated = mime_detection_result.mime_type != file_info.mime_type; if mime_type_updated { debug!("๐Ÿ”„ MIME type updated for {}: '{}' -> '{}' (method: {:?}, confidence: {:?})", file_info.name, file_info.mime_type, mime_detection_result.mime_type, mime_detection_result.detection_method, mime_detection_result.confidence); } else { debug!("โœ… MIME type confirmed for {}: '{}' (method: {:?}, confidence: {:?})", file_info.name, mime_detection_result.mime_type, mime_detection_result.detection_method, mime_detection_result.confidence); } // Create updated file info if MIME type changed let updated_file_info = if mime_type_updated { let mut updated = file_info.clone(); updated.mime_type = mime_detection_result.mime_type.clone(); updated } else { file_info.clone() }; Ok(WebDAVDownloadResult { content: content.to_vec(), file_info: updated_file_info, mime_detection: mime_detection_result, mime_type_updated, }) } /// Downloads multiple files with MIME type detection concurrently /// /// Similar to `download_files` but includes content-based MIME type detection /// for each downloaded file. /// /// # Arguments /// * `files` - The files to download /// /// # Returns /// A vector of tuples containing the original file info and download result pub async fn download_files_with_mime_detection(&self, files: &[FileIngestionInfo]) -> Result)>> { info!("โฌ‡๏ธ๐Ÿ” Downloading {} files with MIME detection concurrently", files.len()); let tasks = files.iter().map(|file| { let file_clone = file.clone(); let service_clone = self.clone(); async move { let result = service_clone.download_file_with_mime_detection(&file_clone).await; (file_clone, result) } }); let results = futures_util::future::join_all(tasks).await; let success_count = results.iter().filter(|(_, result)| result.is_ok()).count(); let failure_count = results.len() - success_count; let mime_updated_count = results.iter() .filter_map(|(_, result)| result.as_ref().ok()) .filter(|download_result| download_result.mime_type_updated) .count(); info!("๐Ÿ“Š Download with MIME detection completed: {} successful, {} failed, {} MIME types updated", success_count, failure_count, mime_updated_count); Ok(results) } /// Gets file metadata without downloading content pub async fn get_file_metadata(&self, file_path: &str) -> Result { debug!("๐Ÿ“‹ Getting metadata for file: {}", file_path); // Convert full WebDAV paths to relative paths to prevent double path construction let relative_path = self.convert_to_relative_path(file_path); let url = self.get_url_for_path(&relative_path); let propfind_body = r#" "#; let response = self.authenticated_request( reqwest::Method::from_bytes(b"PROPFIND")?, &url, Some(propfind_body.to_string()), Some(vec![ ("Depth", "0"), ("Content-Type", "application/xml"), ]), ).await?; let body = response.text().await?; let files = parse_propfind_response(&body)?; files.into_iter() .find(|f| f.relative_path == file_path) .ok_or_else(|| anyhow!("File metadata not found: {}", file_path)) } /// Checks if a file exists on the WebDAV server pub async fn file_exists(&self, file_path: &str) -> Result { match self.get_file_metadata(file_path).await { Ok(_) => Ok(true), Err(_) => Ok(false), } } // ============================================================================ // Server Capabilities and Health Checks // ============================================================================ /// Gets the server capabilities and features pub async fn get_server_capabilities(&self) -> Result { debug!("๐Ÿ” Checking server capabilities"); // Create a temporary config with the effective server URL let effective_server_url = self.get_effective_server_url(); let temp_config = WebDAVConfig { server_url: effective_server_url, username: self.config.username.clone(), password: self.config.password.clone(), watch_folders: self.config.watch_folders.clone(), file_extensions: self.config.file_extensions.clone(), timeout_seconds: self.config.timeout_seconds, server_type: self.config.server_type.clone(), }; let options_response = self.authenticated_request( reqwest::Method::OPTIONS, &temp_config.webdav_url(), None, None, ).await?; let dav_header = options_response .headers() .get("dav") .and_then(|v| v.to_str().ok()) .unwrap_or("") .to_string(); let allow_header = options_response .headers() .get("allow") .and_then(|v| v.to_str().ok()) .unwrap_or("") .to_string(); let server_header = options_response .headers() .get("server") .and_then(|v| v.to_str().ok()) .map(|s| s.to_string()); Ok(ServerCapabilities { dav_compliance: dav_header.clone(), allowed_methods: allow_header, server_software: server_header, supports_etag: dav_header.contains("1") || dav_header.contains("2"), supports_depth_infinity: dav_header.contains("1"), infinity_depth_tested: false, // Will be tested separately if needed infinity_depth_works: false, // Will be updated after testing last_checked: std::time::Instant::now(), }) } /// Performs a health check on the WebDAV service pub async fn health_check(&self) -> Result { info!("๐Ÿฅ Performing WebDAV service health check"); let start_time = std::time::Instant::now(); // Test basic connectivity let connection_result = self.test_connection().await?; if !connection_result.success { return Ok(HealthStatus { healthy: false, message: format!("Connection failed: {}", connection_result.message), response_time_ms: start_time.elapsed().as_millis() as u64, details: None, }); } // Test each watch folder for folder in &self.config.watch_folders { if let Err(e) = self.test_propfind(folder).await { return Ok(HealthStatus { healthy: false, message: format!("Watch folder '{}' is inaccessible: {}", folder, e), response_time_ms: start_time.elapsed().as_millis() as u64, details: Some(serde_json::json!({ "failed_folder": folder, "error": e.to_string() })), }); } } let response_time = start_time.elapsed().as_millis() as u64; Ok(HealthStatus { healthy: true, message: "All systems operational".to_string(), response_time_ms: response_time, details: Some(serde_json::json!({ "tested_folders": self.config.watch_folders, "server_type": connection_result.server_type, "server_version": connection_result.server_version })), }) } // ============================================================================ // Validation Methods (Previously separate validation module) // ============================================================================ /// Performs comprehensive validation of WebDAV setup and directory tracking pub async fn validate_system(&self) -> Result { let start_time = std::time::Instant::now(); info!("๐Ÿ” Starting WebDAV system validation"); let mut issues = Vec::new(); let mut recommendations = Vec::new(); let mut directories_checked = 0; let mut healthy_directories = 0; // Test basic connectivity first match self.test_connection().await { Ok(result) if !result.success => { issues.push(ValidationIssue { issue_type: ValidationIssueType::Inaccessible, severity: ValidationSeverity::Critical, directory_path: "/".to_string(), description: "WebDAV server connection failed".to_string(), details: Some(serde_json::json!({ "error": result.message })), detected_at: chrono::Utc::now(), }); } Err(e) => { issues.push(ValidationIssue { issue_type: ValidationIssueType::Inaccessible, severity: ValidationSeverity::Critical, directory_path: "/".to_string(), description: "WebDAV server connection error".to_string(), details: Some(serde_json::json!({ "error": e.to_string() })), detected_at: chrono::Utc::now(), }); } _ => {} } // Test each watch folder for folder in &self.config.watch_folders { directories_checked += 1; match self.test_propfind(folder).await { Ok(_) => { healthy_directories += 1; debug!("โœ… Watch folder accessible: {}", folder); } Err(e) => { issues.push(ValidationIssue { issue_type: ValidationIssueType::Inaccessible, severity: ValidationSeverity::Error, directory_path: folder.clone(), description: format!("Watch folder '{}' is not accessible", folder), details: Some(serde_json::json!({ "error": e.to_string() })), detected_at: chrono::Utc::now(), }); } } } // Generate recommendations based on issues if issues.iter().any(|i| matches!(i.severity, ValidationSeverity::Critical)) { recommendations.push(ValidationRecommendation { action: ValidationAction::CheckServerConfiguration, reason: "Critical connectivity issues detected".to_string(), affected_directories: issues.iter() .filter(|i| matches!(i.severity, ValidationSeverity::Critical)) .map(|i| i.directory_path.clone()) .collect(), priority: ValidationSeverity::Critical, }); } if issues.iter().any(|i| matches!(i.issue_type, ValidationIssueType::Inaccessible)) { recommendations.push(ValidationRecommendation { action: ValidationAction::DeepScanRequired, reason: "Some directories are inaccessible and may need re-scanning".to_string(), affected_directories: issues.iter() .filter(|i| matches!(i.issue_type, ValidationIssueType::Inaccessible)) .map(|i| i.directory_path.clone()) .collect(), priority: ValidationSeverity::Warning, }); } if issues.is_empty() { recommendations.push(ValidationRecommendation { action: ValidationAction::NoActionRequired, reason: "System is healthy and functioning normally".to_string(), affected_directories: vec![], priority: ValidationSeverity::Info, }); } // Calculate health score let health_score = if directories_checked == 0 { 0 } else { (healthy_directories * 100 / directories_checked) as i32 }; let critical_issues = issues.iter().filter(|i| matches!(i.severity, ValidationSeverity::Critical)).count(); let warning_issues = issues.iter().filter(|i| matches!(i.severity, ValidationSeverity::Warning)).count(); let info_issues = issues.iter().filter(|i| matches!(i.severity, ValidationSeverity::Info)).count(); let summary = ValidationSummary { total_directories_checked: directories_checked, healthy_directories, directories_with_issues: directories_checked - healthy_directories, critical_issues, warning_issues, info_issues, validation_duration_ms: start_time.elapsed().as_millis() as u64, }; info!("โœ… WebDAV validation completed in {}ms. Health score: {}/100", summary.validation_duration_ms, health_score); Ok(ValidationReport { overall_health_score: health_score, issues, recommendations, summary, }) } // ============================================================================ // Utility Methods // ============================================================================ /// Tests if the server supports recursive ETag scanning pub async fn test_recursive_etag_support(&self) -> Result { debug!("๐Ÿ” Testing recursive ETag support"); // Get server capabilities to check ETag support let capabilities = self.get_server_capabilities().await?; // Check if server supports ETags at all if !capabilities.supports_etag { debug!("โŒ Server does not support ETags"); return Ok(false); } // Check server type for known recursive ETag support if let Some(server_software) = &capabilities.server_software { let server_lower = server_software.to_lowercase(); // Nextcloud and ownCloud support recursive ETags if server_lower.contains("nextcloud") || server_lower.contains("owncloud") { debug!("โœ… Server supports recursive ETags (Nextcloud/ownCloud)"); return Ok(true); } // Apache mod_dav typically supports recursive ETags if server_lower.contains("apache") && capabilities.dav_compliance.contains("1") { debug!("โœ… Server likely supports recursive ETags (Apache WebDAV)"); return Ok(true); } } // For unknown servers, assume basic ETag support but not recursive debug!("โš ๏ธ Unknown server type, assuming no recursive ETag support"); Ok(false) } /// Checks if a path is a direct child of a parent directory pub fn is_direct_child(&self, file_path: &str, parent_path: &str) -> bool { // Normalize paths by removing trailing slashes let normalized_parent = parent_path.trim_end_matches('/'); let normalized_file = file_path.trim_end_matches('/'); // Handle root case if normalized_parent.is_empty() || normalized_parent == "/" { return !normalized_file.is_empty() && normalized_file.matches('/').count() == 1; } // Check if file path starts with parent path if !normalized_file.starts_with(normalized_parent) { return false; } // Get the remainder after the parent path let remainder = &normalized_file[normalized_parent.len()..]; // Should start with '/' and contain no additional '/' characters remainder.starts_with('/') && remainder[1..].find('/').is_none() } /// Gets configuration information pub fn get_config(&self) -> &WebDAVConfig { &self.config } /// Gets retry configuration pub fn get_retry_config(&self) -> &RetryConfig { &self.retry_config } /// Gets concurrency configuration pub fn get_concurrency_config(&self) -> &ConcurrencyConfig { &self.concurrency_config } // ============================================================================ // URL Management Methods (for backward compatibility with WebDAVUrlManager) // ============================================================================ /// Processes a single FileIngestionInfo to convert full paths to relative paths pub fn process_file_info(&self, mut file_info: FileIngestionInfo) -> FileIngestionInfo { // Convert full_path to relative_path file_info.relative_path = self.href_to_relative_path(&file_info.full_path); // For backward compatibility, set the deprecated path field to relative_path #[allow(deprecated)] { file_info.path = file_info.relative_path.clone(); } file_info } /// Processes multiple FileIngestionInfo objects to convert full paths to relative paths pub fn process_file_infos(&self, file_infos: Vec) -> Vec { file_infos.into_iter().map(|file_info| self.process_file_info(file_info)).collect() } /// Converts a relative path to a full URL (alias for path_to_url for compatibility) pub fn relative_path_to_url(&self, relative_path: &str) -> String { self.path_to_url(relative_path) } } // Implement Clone to allow sharing the service impl Clone for WebDAVService { fn clone(&self) -> Self { Self { client: self.client.clone(), config: self.config.clone(), retry_config: self.retry_config.clone(), concurrency_config: self.concurrency_config.clone(), scan_semaphore: Arc::clone(&self.scan_semaphore), download_semaphore: Arc::clone(&self.download_semaphore), working_protocol: Arc::clone(&self.working_protocol), } } } /// Tests WebDAV connection with provided configuration (standalone function for backward compatibility) pub async fn test_webdav_connection(test_config: &WebDAVTestConnection) -> Result { WebDAVService::test_connection_with_config(test_config).await }