From 2297eb8261de2ca0088918d1976329df2a274719 Mon Sep 17 00:00:00 2001 From: perf3ct Date: Thu, 3 Jul 2025 04:24:26 +0000 Subject: [PATCH] feat(webdav): also set up deep scanning button and fix unit tests --- frontend/src/pages/SourcesPage.tsx | 46 ++ src/db/webdav.rs | 12 + src/models.rs | 2 +- src/routes/sources.rs | 237 ++++++++++ src/routes/webdav/webdav_sync.rs | 65 +++ src/services/webdav_service.rs | 457 +++++++++++++++----- tests/unit_webdav_url_construction_tests.rs | 254 +++++++++++ 7 files changed, 974 insertions(+), 99 deletions(-) create mode 100644 tests/unit_webdav_url_construction_tests.rs diff --git a/frontend/src/pages/SourcesPage.tsx b/frontend/src/pages/SourcesPage.tsx index 1690fce..21b907c 100644 --- a/frontend/src/pages/SourcesPage.tsx +++ b/frontend/src/pages/SourcesPage.tsx @@ -68,6 +68,7 @@ import { TextSnippet as DocumentIcon, Visibility as OcrIcon, Block as BlockIcon, + FindInPage as DeepScanIcon, } from '@mui/icons-material'; import { useNavigate } from 'react-router-dom'; import api, { queueService } from '../services/api'; @@ -151,6 +152,7 @@ const SourcesPage: React.FC = () => { const [testingConnection, setTestingConnection] = useState(false); const [syncingSource, setSyncingSource] = useState(null); const [stoppingSync, setStoppingSync] = useState(null); + const [deepScanning, setDeepScanning] = useState(null); const [autoRefreshing, setAutoRefreshing] = useState(false); useEffect(() => { @@ -488,6 +490,31 @@ const SourcesPage: React.FC = () => { } }; + const handleDeepScan = async (sourceId: string) => { + setDeepScanning(sourceId); + try { + const response = await api.post(`/sources/${sourceId}/deep-scan`); + if (response.data.success) { + showSnackbar(response.data.message || 'Deep scan started successfully', 'success'); + setTimeout(loadSources, 1000); + } else { + showSnackbar(response.data.message || 'Failed to start deep scan', 'error'); + } + } catch (error: any) { + console.error('Failed to trigger deep scan:', error); + if (error.response?.status === 409) { + showSnackbar('Source is already syncing', 'warning'); + } else if (error.response?.status === 404) { + showSnackbar('Source not found', 'error'); + } else { + const message = error.response?.data?.message || 'Failed to start deep scan'; + showSnackbar(message, 'error'); + } + } finally { + setDeepScanning(null); + } + }; + // Utility functions for folder management const addFolder = () => { if (newFolder && !formData.watch_folders.includes(newFolder)) { @@ -837,6 +864,25 @@ const SourcesPage: React.FC = () => { )} + + + handleDeepScan(source.id)} + disabled={deepScanning === source.id || source.status === 'syncing' || !source.enabled} + sx={{ + bgcolor: alpha(theme.palette.secondary.main, 0.1), + '&:hover': { bgcolor: alpha(theme.palette.secondary.main, 0.2) }, + color: theme.palette.secondary.main, + }} + > + {deepScanning === source.id ? ( + + ) : ( + + )} + + + handleEditSource(source)} diff --git a/src/db/webdav.rs b/src/db/webdav.rs index e5c0f44..069edc9 100644 --- a/src/db/webdav.rs +++ b/src/db/webdav.rs @@ -339,4 +339,16 @@ impl Database { Ok(directories) } + + /// Clear all WebDAV directory tracking for a user (used for deep scan) + pub async fn clear_webdav_directories(&self, user_id: Uuid) -> Result { + let result = sqlx::query( + r#"DELETE FROM webdav_directories WHERE user_id = $1"# + ) + .bind(user_id) + .execute(&self.pool) + .await?; + + Ok(result.rows_affected() as i64) + } } \ No newline at end of file diff --git a/src/models.rs b/src/models.rs index 28cf9de..5ab5c15 100644 --- a/src/models.rs +++ b/src/models.rs @@ -1120,7 +1120,7 @@ impl From for SourceResponse { } } -#[derive(Debug, Serialize, Deserialize, ToSchema)] +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] pub struct WebDAVSourceConfig { pub server_url: String, pub username: String, diff --git a/src/routes/sources.rs b/src/routes/sources.rs index b42fd19..9ae895e 100644 --- a/src/routes/sources.rs +++ b/src/routes/sources.rs @@ -8,6 +8,7 @@ use axum::{ use std::sync::Arc; use uuid::Uuid; use tracing::{error, info}; +use anyhow::Result; use crate::{ auth::AuthUser, @@ -21,6 +22,7 @@ pub fn router() -> Router> { .route("/{id}", get(get_source).put(update_source).delete(delete_source)) .route("/{id}/sync", post(trigger_sync)) .route("/{id}/sync/stop", post(stop_sync)) + .route("/{id}/deep-scan", post(trigger_deep_scan)) .route("/{id}/test", post(test_connection)) .route("/{id}/estimate", post(estimate_crawl)) .route("/estimate", post(estimate_crawl_with_config)) @@ -389,6 +391,241 @@ async fn trigger_sync( Ok(StatusCode::OK) } +#[utoipa::path( + post, + path = "/api/sources/{id}/deep-scan", + tag = "sources", + security( + ("bearer_auth" = []) + ), + params( + ("id" = Uuid, Path, description = "Source ID") + ), + responses( + (status = 200, description = "Deep scan started successfully"), + (status = 401, description = "Unauthorized"), + (status = 404, description = "Source not found"), + (status = 409, description = "Source is already syncing"), + (status = 500, description = "Internal server error") + ) +)] +async fn trigger_deep_scan( + auth_user: AuthUser, + Path(source_id): Path, + State(state): State>, +) -> Result, StatusCode> { + info!("Starting deep scan for source {} by user {}", source_id, auth_user.user.username); + + let source = state + .db + .get_source(auth_user.user.id, source_id) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)? + .ok_or(StatusCode::NOT_FOUND)?; + + // Check if source is already syncing + if matches!(source.status, crate::models::SourceStatus::Syncing) { + return Ok(Json(serde_json::json!({ + "success": false, + "error": "source_already_syncing", + "message": "Source is already syncing. Please wait for the current sync to complete before starting a deep scan." + }))); + } + + match source.source_type { + crate::models::SourceType::WebDAV => { + // Handle WebDAV deep scan + let config: crate::models::WebDAVSourceConfig = serde_json::from_value(source.config) + .map_err(|e| { + error!("Failed to parse WebDAV config for source {}: {}", source_id, e); + StatusCode::INTERNAL_SERVER_ERROR + })?; + + // Create WebDAV service + let webdav_config = crate::services::webdav_service::WebDAVConfig { + server_url: config.server_url.clone(), + username: config.username.clone(), + password: config.password.clone(), + watch_folders: config.watch_folders.clone(), + file_extensions: config.file_extensions.clone(), + timeout_seconds: 600, // 10 minutes for deep scan + server_type: config.server_type.clone(), + }; + + let webdav_service = crate::services::webdav_service::WebDAVService::new(webdav_config.clone()) + .map_err(|e| { + error!("Failed to create WebDAV service for deep scan: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })?; + + // Update source status to syncing + state + .db + .update_source_status( + source_id, + crate::models::SourceStatus::Syncing, + Some("Deep scan in progress".to_string()), + ) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + // Start deep scan in background + let state_clone = state.clone(); + let user_id = auth_user.user.id; + let source_name = source.name.clone(); + let source_id_clone = source_id; + let config_clone = config.clone(); + + tokio::spawn(async move { + let start_time = chrono::Utc::now(); + + // Clear existing directory tracking to force full rescan + if let Err(e) = state_clone.db.clear_webdav_directories(user_id).await { + error!("Failed to clear WebDAV directories for deep scan: {}", e); + } + + // Use traditional discovery for deep scan to avoid borrowing issues + let mut all_discovered_files = Vec::new(); + for folder in &config_clone.watch_folders { + match webdav_service.discover_files_in_folder(folder).await { + Ok(mut folder_files) => { + info!("Deep scan discovered {} files in folder {}", folder_files.len(), folder); + all_discovered_files.append(&mut folder_files); + } + Err(e) => { + error!("Deep scan failed to discover files in folder {}: {}", folder, e); + // Continue with other folders + } + } + } + + if !all_discovered_files.is_empty() { + info!("Deep scan discovery completed for source {}: {} files found", source_id_clone, all_discovered_files.len()); + + // Filter files by extensions and process them + let files_to_process: Vec<_> = all_discovered_files.into_iter() + .filter(|file_info| { + if file_info.is_directory { + return false; + } + let file_extension = std::path::Path::new(&file_info.name) + .extension() + .and_then(|ext| ext.to_str()) + .unwrap_or("") + .to_lowercase(); + config_clone.file_extensions.contains(&file_extension) + }) + .collect(); + + info!("Deep scan will process {} files for source {}", files_to_process.len(), source_id_clone); + + // Process files using the existing sync mechanism + match crate::routes::webdav::webdav_sync::process_files_for_deep_scan( + state_clone.clone(), + user_id, + &webdav_service, + &files_to_process, + true, // enable background OCR + Some(source_id_clone) + ).await { + Ok(files_processed) => { + let duration = chrono::Utc::now() - start_time; + info!("Deep scan completed for source {}: {} files processed in {:?}", + source_id_clone, files_processed, duration); + + // Update source status to idle + if let Err(e) = state_clone.db.update_source_status( + source_id_clone, + crate::models::SourceStatus::Idle, + Some(format!("Deep scan completed: {} files processed", files_processed)), + ).await { + error!("Failed to update source status after deep scan: {}", e); + } + + // Send success notification + let notification = crate::models::CreateNotification { + notification_type: "success".to_string(), + title: "Deep Scan Completed".to_string(), + message: format!( + "Deep scan of {} completed successfully. {} files processed in {:.1} minutes.", + source_name, + files_processed, + duration.num_seconds() as f64 / 60.0 + ), + action_url: Some("/documents".to_string()), + metadata: Some(serde_json::json!({ + "source_id": source_id_clone, + "scan_type": "deep_scan", + "files_processed": files_processed, + "duration_seconds": duration.num_seconds() + })), + }; + + if let Err(e) = state_clone.db.create_notification(user_id, ¬ification).await { + error!("Failed to create deep scan success notification: {}", e); + } + } + Err(e) => { + error!("Deep scan file processing failed for source {}: {}", source_id_clone, e); + + // Update source status to error + if let Err(e2) = state_clone.db.update_source_status( + source_id_clone, + crate::models::SourceStatus::Error, + Some(format!("Deep scan failed: {}", e)), + ).await { + error!("Failed to update source status after deep scan error: {}", e2); + } + + // Send error notification + let notification = crate::models::CreateNotification { + notification_type: "error".to_string(), + title: "Deep Scan Failed".to_string(), + message: format!("Deep scan of {} failed: {}", source_name, e), + action_url: Some("/sources".to_string()), + metadata: Some(serde_json::json!({ + "source_id": source_id_clone, + "scan_type": "deep_scan", + "error": e.to_string() + })), + }; + + if let Err(e) = state_clone.db.create_notification(user_id, ¬ification).await { + error!("Failed to create deep scan error notification: {}", e); + } + } + } + + } else { + info!("Deep scan found no files for source {}", source_id_clone); + + // Update source status to idle even if no files found + if let Err(e) = state_clone.db.update_source_status( + source_id_clone, + crate::models::SourceStatus::Idle, + Some("Deep scan completed: no files found".to_string()), + ).await { + error!("Failed to update source status after empty deep scan: {}", e); + } + } + }); + + Ok(Json(serde_json::json!({ + "success": true, + "message": format!("Deep scan started for source '{}'. This will perform a complete rescan of all configured folders.", source.name) + }))) + } + _ => { + error!("Deep scan not supported for source type: {:?}", source.source_type); + Ok(Json(serde_json::json!({ + "success": false, + "error": "unsupported_source_type", + "message": "Deep scan is currently only supported for WebDAV sources" + }))) + } + } +} + #[utoipa::path( post, path = "/api/sources/{id}/sync/stop", diff --git a/src/routes/webdav/webdav_sync.rs b/src/routes/webdav/webdav_sync.rs index a3ce631..b78d840 100644 --- a/src/routes/webdav/webdav_sync.rs +++ b/src/routes/webdav/webdav_sync.rs @@ -379,3 +379,68 @@ async fn process_single_file( Ok(true) // Successfully processed } +/// Process files for deep scan - similar to regular sync but forces processing +pub async fn process_files_for_deep_scan( + state: Arc, + user_id: uuid::Uuid, + webdav_service: &WebDAVService, + files_to_process: &[crate::models::FileInfo], + enable_background_ocr: bool, + webdav_source_id: Option, +) -> Result { + info!("Processing {} files for deep scan", files_to_process.len()); + + let concurrent_limit = 5; // Max 5 concurrent downloads + let semaphore = Arc::new(Semaphore::new(concurrent_limit)); + let mut files_processed = 0; + let mut sync_errors = Vec::new(); + + // Create futures for processing each file concurrently + let mut file_futures = FuturesUnordered::new(); + + for file_info in files_to_process.iter() { + let state_clone = state.clone(); + let webdav_service_clone = webdav_service.clone(); + let file_info_clone = file_info.clone(); + let semaphore_clone = semaphore.clone(); + + // Create a future for processing this file + let future = async move { + process_single_file( + state_clone, + user_id, + &webdav_service_clone, + &file_info_clone, + enable_background_ocr, + semaphore_clone, + webdav_source_id, + ).await + }; + + file_futures.push(future); + } + + // Process files concurrently and collect results + while let Some(result) = file_futures.next().await { + match result { + Ok(processed) => { + if processed { + files_processed += 1; + info!("Deep scan: Successfully processed file ({} completed)", files_processed); + } + } + Err(error) => { + error!("Deep scan file processing error: {}", error); + sync_errors.push(error); + } + } + } + + if !sync_errors.is_empty() { + warn!("Deep scan completed with {} errors: {:?}", sync_errors.len(), sync_errors); + } + + info!("Deep scan file processing completed: {} files processed successfully", files_processed); + Ok(files_processed) +} + diff --git a/src/services/webdav_service.rs b/src/services/webdav_service.rs index aa4a886..96bc33e 100644 --- a/src/services/webdav_service.rs +++ b/src/services/webdav_service.rs @@ -4,6 +4,8 @@ use reqwest::{Client, Method, Url}; use std::collections::HashSet; use std::time::Duration; use tokio::time::sleep; +use tokio::sync::Semaphore; +use futures_util::stream::{self, StreamExt}; use tracing::{debug, error, info, warn}; use crate::models::{ @@ -30,6 +32,14 @@ pub struct RetryConfig { pub max_delay_ms: u64, pub backoff_multiplier: f64, pub timeout_seconds: u64, + pub rate_limit_backoff_ms: u64, // Additional backoff for 429 responses +} + +#[derive(Debug, Clone)] +pub struct ConcurrencyConfig { + pub max_concurrent_scans: usize, + pub max_concurrent_downloads: usize, + pub adaptive_rate_limiting: bool, } impl Default for RetryConfig { @@ -40,6 +50,17 @@ impl Default for RetryConfig { max_delay_ms: 30000, // 30 seconds backoff_multiplier: 2.0, timeout_seconds: 300, // 5 minutes total timeout for crawl operations + rate_limit_backoff_ms: 5000, // 5 seconds extra for rate limits + } + } +} + +impl Default for ConcurrencyConfig { + fn default() -> Self { + Self { + max_concurrent_scans: 10, + max_concurrent_downloads: 5, + adaptive_rate_limiting: true, } } } @@ -52,14 +73,19 @@ pub struct WebDAVService { config: WebDAVConfig, base_webdav_url: String, retry_config: RetryConfig, + concurrency_config: ConcurrencyConfig, } impl WebDAVService { pub fn new(config: WebDAVConfig) -> Result { - Self::new_with_retry(config, RetryConfig::default()) + Self::new_with_configs(config, RetryConfig::default(), ConcurrencyConfig::default()) } pub fn new_with_retry(config: WebDAVConfig, retry_config: RetryConfig) -> Result { + Self::new_with_configs(config, retry_config, ConcurrencyConfig::default()) + } + + pub fn new_with_configs(config: WebDAVConfig, retry_config: RetryConfig, concurrency_config: ConcurrencyConfig) -> Result { let client = Client::builder() .timeout(Duration::from_secs(config.timeout_seconds)) .build()?; @@ -103,7 +129,7 @@ impl WebDAVService { config.server_url.trim_end_matches('/'), config.username ); - info!("🔗 Constructed Nextcloud/ownCloud WebDAV URL: {}", url); + debug!("🔗 Constructed Nextcloud/ownCloud WebDAV URL: {}", url); url }, _ => { @@ -111,7 +137,7 @@ impl WebDAVService { "{}/webdav", config.server_url.trim_end_matches('/') ); - info!("🔗 Constructed generic WebDAV URL: {}", url); + debug!("🔗 Constructed generic WebDAV URL: {}", url); url }, }; @@ -121,6 +147,7 @@ impl WebDAVService { config, base_webdav_url, retry_config, + concurrency_config, }) } @@ -154,10 +181,19 @@ impl WebDAVService { return Err(err); } - warn!("{} failed (attempt {}), retrying in {}ms: {}", - operation_name, attempt, delay, err); + // Apply adaptive backoff for rate limiting + let actual_delay = if Self::is_rate_limit_error(&err) && self.concurrency_config.adaptive_rate_limiting { + let rate_limit_delay = delay + self.retry_config.rate_limit_backoff_ms; + warn!("{} rate limited (attempt {}), retrying in {}ms with extra backoff: {}", + operation_name, attempt, rate_limit_delay, err); + rate_limit_delay + } else { + warn!("{} failed (attempt {}), retrying in {}ms: {}", + operation_name, attempt, delay, err); + delay + }; - sleep(Duration::from_millis(delay)).await; + sleep(Duration::from_millis(actual_delay)).await; // Calculate next delay with exponential backoff delay = ((delay as f64 * self.retry_config.backoff_multiplier) as u64) @@ -175,7 +211,13 @@ impl WebDAVService { || reqwest_error.is_connect() || reqwest_error.is_request() || reqwest_error.status() - .map(|s| s.is_server_error() || s == 429) // 429 = Too Many Requests + .map(|s| { + s.is_server_error() // 5xx errors (including server restart scenarios) + || s == 429 // Too Many Requests + || s == 502 // Bad Gateway (server restarting) + || s == 503 // Service Unavailable (server restarting/overloaded) + || s == 504 // Gateway Timeout (server slow to respond) + }) .unwrap_or(true); } @@ -185,6 +227,44 @@ impl WebDAVService { || error_str.contains("connection") || error_str.contains("network") || error_str.contains("temporary") + || error_str.contains("rate limit") + || error_str.contains("too many requests") + || error_str.contains("connection reset") + || error_str.contains("connection aborted") + || error_str.contains("server unavailable") + || error_str.contains("bad gateway") + || error_str.contains("service unavailable") + } + + fn is_rate_limit_error(error: &anyhow::Error) -> bool { + if let Some(reqwest_error) = error.downcast_ref::() { + return reqwest_error.status() + .map(|s| s == 429) + .unwrap_or(false); + } + + let error_str = error.to_string().to_lowercase(); + error_str.contains("rate limit") || error_str.contains("too many requests") + } + + fn is_server_restart_error(&self, error: &anyhow::Error) -> bool { + if let Some(reqwest_error) = error.downcast_ref::() { + if let Some(status) = reqwest_error.status() { + return status == 502 // Bad Gateway + || status == 503 // Service Unavailable + || status == 504; // Gateway Timeout + } + + // Network-level connection issues often indicate server restart + return reqwest_error.is_connect() || reqwest_error.is_timeout(); + } + + let error_str = error.to_string().to_lowercase(); + error_str.contains("connection reset") + || error_str.contains("connection aborted") + || error_str.contains("bad gateway") + || error_str.contains("service unavailable") + || error_str.contains("server unreachable") } pub async fn test_connection(&self, test_config: WebDAVTestConnection) -> Result { @@ -243,7 +323,7 @@ impl WebDAVService { ), }; - info!("🔗 Constructed test URL: {}", test_url); + debug!("🔗 Constructed test URL: {}", test_url); let resp = self.client .request(Method::from_bytes(b"PROPFIND").unwrap(), &test_url) @@ -333,7 +413,7 @@ impl WebDAVService { .collect(); for folder_path in folders { - info!("Analyzing folder: {}", folder_path); + debug!("Analyzing folder: {}", folder_path); match self.analyze_folder(folder_path, &supported_extensions).await { Ok(folder_info) => { @@ -418,16 +498,16 @@ impl WebDAVService { /// Optimized discovery that checks directory ETag first to avoid unnecessary deep scans pub async fn discover_files_in_folder_optimized(&self, folder_path: &str, user_id: uuid::Uuid, state: &crate::AppState) -> Result> { - info!("🔍 Starting optimized discovery for folder: {}", folder_path); + debug!("🔍 Starting optimized discovery for folder: {}", folder_path); // Check if we should use smart scanning let use_smart_scan = match self.config.server_type.as_deref() { Some("nextcloud") | Some("owncloud") => { - info!("🚀 Using smart scanning for Nextcloud/ownCloud server"); + debug!("🚀 Using smart scanning for Nextcloud/ownCloud server"); true } _ => { - info!("📁 Using traditional scanning for generic WebDAV server"); + debug!("📁 Using traditional scanning for generic WebDAV server"); false } }; @@ -461,7 +541,7 @@ impl WebDAVService { match state.db.get_webdav_directory(user_id, folder_path).await { Ok(Some(stored_dir)) => { if stored_dir.directory_etag == current_dir_etag { - info!("✅ Directory {} unchanged (ETag: {}), checking subdirectories individually", folder_path, current_dir_etag); + debug!("✅ Directory {} unchanged (ETag: {}), checking subdirectories individually", folder_path, current_dir_etag); // Update last_scanned_at to show we checked let update = crate::models::UpdateWebDAVDirectory { @@ -479,12 +559,12 @@ impl WebDAVService { let changed_files = self.check_subdirectories_for_changes(folder_path, user_id, state).await?; return Ok(changed_files); } else { - info!("🔄 Directory {} changed (old ETag: {}, new ETag: {}), performing deep scan", + debug!("🔄 Directory {} changed (old ETag: {}, new ETag: {}), performing deep scan", folder_path, stored_dir.directory_etag, current_dir_etag); } } Ok(None) => { - info!("🆕 New directory {}, performing initial scan", folder_path); + debug!("🆕 New directory {}, performing initial scan", folder_path); } Err(e) => { warn!("Database error checking directory {}: {}, proceeding with scan", folder_path, e); @@ -509,7 +589,7 @@ impl WebDAVService { if let Err(e) = state.db.create_or_update_webdav_directory(&directory_record).await { error!("Failed to update directory tracking for {}: {}", folder_path, e); } else { - info!("📊 Updated directory tracking: {} files, {} bytes, ETag: {}", + debug!("📊 Updated directory tracking: {} files, {} bytes, ETag: {}", file_count, total_size_bytes, current_dir_etag); } @@ -549,7 +629,7 @@ impl WebDAVService { } } - info!("🗂️ Found {} unique directories at all levels", all_directories.len()); + debug!("🗂️ Found {} unique directories at all levels", all_directories.len()); // Step 2: Create a mapping of directory -> ETag from the files list let mut directory_etags: HashMap = HashMap::new(); @@ -608,7 +688,7 @@ impl WebDAVService { } } - info!("✅ Completed tracking {} directories at all depth levels", all_directories.len()); + debug!("✅ Completed tracking {} directories at all depth levels", all_directories.len()); } /// Check if a path is a direct child of a directory (not nested deeper) @@ -643,12 +723,12 @@ impl WebDAVService { /// Perform targeted re-scanning of only specific paths that have changed pub async fn discover_files_targeted_rescan(&self, paths_to_scan: &[String], user_id: uuid::Uuid, state: &crate::AppState) -> Result> { - info!("🎯 Starting targeted re-scan for {} specific paths", paths_to_scan.len()); + debug!("🎯 Starting targeted re-scan for {} specific paths", paths_to_scan.len()); let mut all_files = Vec::new(); for path in paths_to_scan { - info!("🔍 Targeted scan of: {}", path); + debug!("🔍 Targeted scan of: {}", path); // Check if this specific path has changed match self.check_directory_etag(path).await { @@ -657,7 +737,7 @@ impl WebDAVService { let needs_scan = match state.db.get_webdav_directory(user_id, path).await { Ok(Some(stored_dir)) => { if stored_dir.directory_etag != current_etag { - info!("🔄 Path {} changed (old: {}, new: {})", path, stored_dir.directory_etag, current_etag); + debug!("🔄 Path {} changed (old: {}, new: {})", path, stored_dir.directory_etag, current_etag); true } else { debug!("✅ Path {} unchanged (ETag: {})", path, current_etag); @@ -665,7 +745,7 @@ impl WebDAVService { } } Ok(None) => { - info!("🆕 New path {} detected", path); + debug!("🆕 New path {} detected", path); true } Err(e) => { @@ -678,7 +758,7 @@ impl WebDAVService { // Use shallow scan for this specific directory only match self.discover_files_in_folder_shallow(path).await { Ok(mut path_files) => { - info!("📂 Found {} files in changed path {}", path_files.len(), path); + debug!("📂 Found {} files in changed path {}", path_files.len(), path); all_files.append(&mut path_files); // Update tracking for this specific path @@ -696,7 +776,7 @@ impl WebDAVService { } } - info!("🎯 Targeted re-scan completed: {} total files found", all_files.len()); + debug!("🎯 Targeted re-scan completed: {} total files found", all_files.len()); Ok(all_files) } @@ -760,7 +840,7 @@ impl WebDAVService { match state.db.create_or_update_webdav_directory(&directory_record).await { Ok(_) => { - info!("📊 Updated single directory tracking: {} ({} files, {} bytes, ETag: {})", + debug!("📊 Updated single directory tracking: {} ({} files, {} bytes, ETag: {})", directory_path, file_count, total_size_bytes, dir_etag); } Err(e) => { @@ -780,7 +860,7 @@ impl WebDAVService { .map(|dir| dir.directory_path.clone()) .collect(); - info!("🕒 Found {} directories not scanned in last {} hours", stale_dirs.len(), max_age_hours); + debug!("🕒 Found {} directories not scanned in last {} hours", stale_dirs.len(), max_age_hours); Ok(stale_dirs) } Err(e) => { @@ -792,12 +872,12 @@ impl WebDAVService { /// Smart sync mode that combines multiple optimization strategies pub async fn discover_files_smart_sync(&self, watch_folders: &[String], user_id: uuid::Uuid, state: &crate::AppState) -> Result> { - info!("🧠 Starting smart sync for {} watch folders", watch_folders.len()); + debug!("🧠 Starting smart sync for {} watch folders", watch_folders.len()); let mut all_files = Vec::new(); for folder_path in watch_folders { - info!("🔍 Smart sync processing folder: {}", folder_path); + debug!("🔍 Smart sync processing folder: {}", folder_path); // Step 1: Try optimized discovery first (checks directory ETag) let optimized_result = self.discover_files_in_folder_optimized(folder_path, user_id, state).await; @@ -805,20 +885,20 @@ impl WebDAVService { match optimized_result { Ok(files) => { if !files.is_empty() { - info!("✅ Optimized discovery found {} files in {}", files.len(), folder_path); + debug!("✅ Optimized discovery found {} files in {}", files.len(), folder_path); all_files.extend(files); } else { - info!("🔍 Directory {} unchanged, checking for stale subdirectories", folder_path); + debug!("🔍 Directory {} unchanged, checking for stale subdirectories", folder_path); // Step 2: Check for stale subdirectories that need targeted scanning let stale_dirs = self.get_stale_subdirectories(folder_path, user_id, state, 24).await?; if !stale_dirs.is_empty() { - info!("🎯 Found {} stale subdirectories, performing targeted scan", stale_dirs.len()); + debug!("🎯 Found {} stale subdirectories, performing targeted scan", stale_dirs.len()); let targeted_files = self.discover_files_targeted_rescan(&stale_dirs, user_id, state).await?; all_files.extend(targeted_files); } else { - info!("✅ All subdirectories of {} are fresh, no scan needed", folder_path); + debug!("✅ All subdirectories of {} are fresh, no scan needed", folder_path); } } } @@ -827,7 +907,7 @@ impl WebDAVService { // Fallback to traditional full scan match self.discover_files_in_folder(folder_path).await { Ok(files) => { - info!("📂 Fallback scan found {} files in {}", files.len(), folder_path); + debug!("📂 Fallback scan found {} files in {}", files.len(), folder_path); all_files.extend(files); } Err(fallback_error) => { @@ -839,7 +919,7 @@ impl WebDAVService { } } - info!("🧠 Smart sync completed: {} total files discovered", all_files.len()); + debug!("🧠 Smart sync completed: {} total files discovered", all_files.len()); Ok(all_files) } @@ -871,7 +951,7 @@ impl WebDAVService { /// Perform incremental sync - only scan directories that have actually changed pub async fn discover_files_incremental(&self, watch_folders: &[String], user_id: uuid::Uuid, state: &crate::AppState) -> Result> { - info!("⚡ Starting incremental sync for {} watch folders", watch_folders.len()); + debug!("⚡ Starting incremental sync for {} watch folders", watch_folders.len()); let mut changed_files = Vec::new(); let mut unchanged_count = 0; @@ -884,7 +964,7 @@ impl WebDAVService { let needs_scan = match state.db.get_webdav_directory(user_id, folder_path).await { Ok(Some(stored_dir)) => { if stored_dir.directory_etag != current_etag { - info!("🔄 Directory {} changed (ETag: {} → {})", folder_path, stored_dir.directory_etag, current_etag); + debug!("🔄 Directory {} changed (ETag: {} → {})", folder_path, stored_dir.directory_etag, current_etag); changed_count += 1; true } else { @@ -894,7 +974,7 @@ impl WebDAVService { } } Ok(None) => { - info!("🆕 New directory {} detected", folder_path); + debug!("🆕 New directory {} detected", folder_path); changed_count += 1; true } @@ -909,7 +989,7 @@ impl WebDAVService { // Directory changed - perform targeted scan match self.discover_files_in_folder_optimized(folder_path, user_id, state).await { Ok(mut files) => { - info!("📂 Incremental scan found {} files in changed directory {}", files.len(), folder_path); + debug!("📂 Incremental scan found {} files in changed directory {}", files.len(), folder_path); changed_files.append(&mut files); } Err(e) => { @@ -936,7 +1016,7 @@ impl WebDAVService { } } - info!("⚡ Incremental sync completed: {} unchanged, {} changed, {} total files found", + debug!("⚡ Incremental sync completed: {} unchanged, {} changed, {} total files found", unchanged_count, changed_count, changed_files.len()); Ok(changed_files) @@ -952,12 +1032,12 @@ impl WebDAVService { if supports_recursive_etags { // With recursive ETags, if parent hasn't changed, nothing inside has changed - info!("🚀 Server supports recursive ETags - parent {} unchanged means all contents unchanged", parent_path); + debug!("🚀 Server supports recursive ETags - parent {} unchanged means all contents unchanged", parent_path); return Ok(Vec::new()); } // For servers without recursive ETags, fall back to checking each subdirectory - info!("📁 Server doesn't support recursive ETags, checking subdirectories individually"); + debug!("📁 Server doesn't support recursive ETags, checking subdirectories individually"); // Get all known subdirectories from database let known_directories = match state.db.list_webdav_directories(user_id).await { @@ -974,11 +1054,11 @@ impl WebDAVService { .collect(); if subdirectories.is_empty() { - info!("📁 No known subdirectories for {}, performing initial scan to discover structure", parent_path); + debug!("📁 No known subdirectories for {}, performing initial scan to discover structure", parent_path); return self.discover_files_in_folder_impl(parent_path).await; } - info!("🔍 Checking {} known subdirectories for changes", subdirectories.len()); + debug!("🔍 Checking {} known subdirectories for changes", subdirectories.len()); let mut changed_files = Vec::new(); let subdirectory_count = subdirectories.len(); @@ -991,13 +1071,13 @@ impl WebDAVService { match self.check_directory_etag(subdir_path).await { Ok(current_etag) => { if current_etag != subdir.directory_etag { - info!("🔄 Subdirectory {} changed (old: {}, new: {}), scanning recursively", + debug!("🔄 Subdirectory {} changed (old: {}, new: {}), scanning recursively", subdir_path, subdir.directory_etag, current_etag); // This subdirectory changed - get all its files recursively match self.discover_files_in_folder_impl(subdir_path).await { Ok(mut subdir_files) => { - info!("📂 Found {} files in changed subdirectory {}", subdir_files.len(), subdir_path); + debug!("📂 Found {} files in changed subdirectory {}", subdir_files.len(), subdir_path); changed_files.append(&mut subdir_files); // Update tracking for this subdirectory and its children @@ -1030,7 +1110,7 @@ impl WebDAVService { } } - info!("🎯 Found {} changed files across {} subdirectories", changed_files.len(), subdirectory_count); + debug!("🎯 Found {} changed files across {} subdirectories", changed_files.len(), subdirectory_count); Ok(changed_files) } @@ -1160,7 +1240,7 @@ impl WebDAVService { /// (i.e., parent directory ETags change when child content changes) /// This test is read-only and checks existing directory structures pub async fn test_recursive_etag_support(&self) -> Result { - info!("🔬 Testing recursive ETag support using existing directory structure"); + debug!("🔬 Testing recursive ETag support using existing directory structure"); // Find a directory with subdirectories from our watch folders for watch_folder in &self.config.watch_folders { @@ -1178,7 +1258,7 @@ impl WebDAVService { // Use the first subdirectory for testing let test_subdir = &subdirs[0]; - info!("Testing with directory: {} and subdirectory: {}", watch_folder, test_subdir.path); + debug!("Testing with directory: {} and subdirectory: {}", watch_folder, test_subdir.path); // Step 1: Get parent directory ETag let parent_etag = self.check_directory_etag(watch_folder).await?; @@ -1193,19 +1273,19 @@ impl WebDAVService { // For now, we'll just check if the server provides ETags at all if !parent_etag.is_empty() && !subdir_etag.is_empty() { - info!("✅ Server provides ETags for directories"); - info!(" Parent ETag: {}", parent_etag); - info!(" Subdir ETag: {}", subdir_etag); + debug!("✅ Server provides ETags for directories"); + debug!(" Parent ETag: {}", parent_etag); + debug!(" Subdir ETag: {}", subdir_etag); // Without write access, we can't definitively test recursive propagation // But we can make an educated guess based on the server type let likely_supports_recursive = match self.config.server_type.as_deref() { Some("nextcloud") | Some("owncloud") => { - info!(" Nextcloud/ownCloud servers typically support recursive ETags"); + debug!(" Nextcloud/ownCloud servers typically support recursive ETags"); true } _ => { - info!(" Unknown server type - recursive ETag support uncertain"); + debug!(" Unknown server type - recursive ETag support uncertain"); false } }; @@ -1220,10 +1300,29 @@ impl WebDAVService { } } - info!("❓ Could not determine recursive ETag support - no suitable directories found"); + debug!("❓ Could not determine recursive ETag support - no suitable directories found"); Ok(false) } + /// Convert full WebDAV path to relative path for use with base_webdav_url + pub fn convert_to_relative_path(&self, full_webdav_path: &str) -> String { + // For Nextcloud/ownCloud paths like "/remote.php/dav/files/username/folder/subfolder/" + // We need to extract just the "folder/subfolder/" part + let webdav_prefix = match self.config.server_type.as_deref() { + Some("nextcloud") | Some("owncloud") => { + format!("/remote.php/dav/files/{}/", self.config.username) + }, + _ => "/webdav/".to_string() + }; + + if let Some(relative_part) = full_webdav_path.strip_prefix(&webdav_prefix) { + format!("/{}", relative_part) + } else { + // If path doesn't match expected format, return as-is + full_webdav_path.to_string() + } + } + /// Smart directory scan that uses depth-1 traversal for efficient synchronization /// Only scans directories whose ETags have changed, avoiding unnecessary deep scans pub fn smart_directory_scan<'a>( @@ -1234,14 +1333,18 @@ impl WebDAVService { state: &'a crate::AppState ) -> std::pin::Pin>> + Send + 'a>> { Box::pin(async move { - info!("🧠 Smart scan starting for path: {}", path); + debug!("🧠 Smart scan starting for path: {}", path); + + // Convert full WebDAV path to relative path for existing functions + let relative_path = self.convert_to_relative_path(path); + debug!("🔄 Converted {} to relative path: {}", path, relative_path); // Step 1: Check current directory ETag - let current_etag = match self.check_directory_etag(path).await { + let current_etag = match self.check_directory_etag(&relative_path).await { Ok(etag) => etag, Err(e) => { warn!("Failed to get directory ETag for {}, falling back to full scan: {}", path, e); - return self.discover_files_in_folder_impl(path).await; + return self.discover_files_in_folder_impl(&relative_path).await; } }; @@ -1253,17 +1356,17 @@ impl WebDAVService { }; if supports_recursive { - info!("✅ Directory {} unchanged (recursive ETag: {}), skipping scan", path, current_etag); + debug!("✅ Directory {} unchanged (recursive ETag: {}), skipping scan", path, current_etag); return Ok(Vec::new()); } else { - info!("📁 Directory {} ETag unchanged but server doesn't support recursive ETags, checking subdirectories", path); + debug!("📁 Directory {} ETag unchanged but server doesn't support recursive ETags, checking subdirectories", path); } } else { - info!("🔄 Directory {} changed (old: {:?}, new: {})", path, known_etag, current_etag); + debug!("🔄 Directory {} changed (old: {:?}, new: {})", path, known_etag, current_etag); } // Step 3: Directory changed or we need to check subdirectories - do depth-1 scan - let entries = match self.discover_files_in_folder_shallow(path).await { + let entries = match self.discover_files_in_folder_shallow(&relative_path).await { Ok(files) => files, Err(e) => { error!("Failed shallow scan of {}: {}", path, e); @@ -1301,57 +1404,215 @@ impl WebDAVService { warn!("Failed to update directory tracking for {}: {}", path, e); } - // Step 4: For each subdirectory, check if it needs scanning - for subdir in subdirs_to_scan { - // Get stored ETag for this subdirectory - let stored_etag = match state.db.get_webdav_directory(user_id, &subdir.path).await { - Ok(Some(dir)) => Some(dir.directory_etag), - Ok(None) => { - info!("🆕 New subdirectory discovered: {}", subdir.path); - None - } - Err(e) => { - warn!("Database error checking subdirectory {}: {}", subdir.path, e); - None - } - }; + // Step 4: Process subdirectories concurrently with controlled parallelism + if !subdirs_to_scan.is_empty() { + let semaphore = std::sync::Arc::new(Semaphore::new(self.concurrency_config.max_concurrent_scans)); + let subdirs_stream = stream::iter(subdirs_to_scan) + .map(|subdir| { + let semaphore = semaphore.clone(); + let service = self.clone(); + async move { + let _permit = semaphore.acquire().await.map_err(|e| anyhow!("Semaphore error: {}", e))?; + + // Get stored ETag for this subdirectory + let stored_etag = match state.db.get_webdav_directory(user_id, &subdir.path).await { + Ok(Some(dir)) => Some(dir.directory_etag), + Ok(None) => { + debug!("🆕 New subdirectory discovered: {}", subdir.path); + None + } + Err(e) => { + warn!("Database error checking subdirectory {}: {}", subdir.path, e); + None + } + }; + + // If ETag changed or new directory, scan it recursively + if stored_etag.as_deref() != Some(&subdir.etag) { + debug!("🔄 Subdirectory {} needs scanning (old: {:?}, new: {})", + subdir.path, stored_etag, subdir.etag); + + match service.smart_directory_scan(&subdir.path, stored_etag.as_deref(), user_id, state).await { + Ok(subdir_files) => { + debug!("📂 Found {} entries in subdirectory {}", subdir_files.len(), subdir.path); + Result::, anyhow::Error>::Ok(subdir_files) + } + Err(e) => { + error!("Failed to scan subdirectory {}: {}", subdir.path, e); + Result::, anyhow::Error>::Ok(Vec::new()) // Continue with other subdirectories + } + } + } else { + debug!("✅ Subdirectory {} unchanged (ETag: {})", subdir.path, subdir.etag); + // Update last_scanned_at + let update = crate::models::UpdateWebDAVDirectory { + directory_etag: subdir.etag.clone(), + last_scanned_at: chrono::Utc::now(), + file_count: 0, // Will be preserved by database + total_size_bytes: 0, + }; + + if let Err(e) = state.db.update_webdav_directory(user_id, &subdir.path, &update).await { + warn!("Failed to update scan time for {}: {}", subdir.path, e); + } + Result::, anyhow::Error>::Ok(Vec::new()) + } + } + }) + .buffer_unordered(self.concurrency_config.max_concurrent_scans); - // If ETag changed or new directory, scan it recursively - if stored_etag.as_deref() != Some(&subdir.etag) { - info!("🔄 Subdirectory {} needs scanning (old: {:?}, new: {})", - subdir.path, stored_etag, subdir.etag); - - match self.smart_directory_scan(&subdir.path, stored_etag.as_deref(), user_id, state).await { + // Collect all results concurrently + let mut subdirs_stream = std::pin::pin!(subdirs_stream); + while let Some(result) = subdirs_stream.next().await { + match result { Ok(mut subdir_files) => { - info!("📂 Found {} entries in subdirectory {}", subdir_files.len(), subdir.path); all_files.append(&mut subdir_files); } Err(e) => { - error!("Failed to scan subdirectory {}: {}", subdir.path, e); - // Continue with other subdirectories + warn!("Concurrent subdirectory scan error: {}", e); + // Continue processing other subdirectories } } - } else { - debug!("✅ Subdirectory {} unchanged (ETag: {})", subdir.path, subdir.etag); - // Update last_scanned_at - let update = crate::models::UpdateWebDAVDirectory { - directory_etag: subdir.etag.clone(), - last_scanned_at: chrono::Utc::now(), - file_count: 0, // Will be preserved by database - total_size_bytes: 0, - }; - - if let Err(e) = state.db.update_webdav_directory(user_id, &subdir.path, &update).await { - warn!("Failed to update scan time for {}: {}", subdir.path, e); - } } } - info!("🧠 Smart scan completed for {}: {} total entries found", path, all_files.len()); + debug!("🧠 Smart scan completed for {}: {} total entries found", path, all_files.len()); Ok(all_files) }) } + /// Resume a deep scan from a checkpoint after server restart/interruption + pub async fn resume_deep_scan(&self, checkpoint_path: &str, user_id: uuid::Uuid, state: &crate::AppState) -> Result> { + info!("🔄 Resuming deep scan from checkpoint: {}", checkpoint_path); + + // Check if the checkpoint directory is still accessible + match self.check_directory_etag(checkpoint_path).await { + Ok(current_etag) => { + info!("✅ Checkpoint directory accessible, resuming scan"); + + // Check if directory changed since checkpoint + match state.db.get_webdav_directory(user_id, checkpoint_path).await { + Ok(Some(stored_dir)) => { + if stored_dir.directory_etag != current_etag { + info!("🔄 Directory changed since checkpoint, performing full rescan"); + } else { + info!("✅ Directory unchanged since checkpoint, can skip"); + return Ok(Vec::new()); + } + } + Ok(None) => { + info!("🆕 New checkpoint directory, performing full scan"); + } + Err(e) => { + warn!("Database error checking checkpoint {}: {}, performing full scan", checkpoint_path, e); + } + } + + // Resume with smart scanning from this point + self.discover_files_in_folder_optimized(checkpoint_path, user_id, state).await + } + Err(e) => { + warn!("Checkpoint directory {} inaccessible after restart: {}", checkpoint_path, e); + // Server might have restarted, wait a bit and retry + tokio::time::sleep(Duration::from_secs(5)).await; + + match self.check_directory_etag(checkpoint_path).await { + Ok(_) => { + info!("🔄 Server recovered, resuming scan"); + self.discover_files_in_folder_optimized(checkpoint_path, user_id, state).await + } + Err(e2) => { + error!("Failed to resume deep scan after server restart: {}", e2); + Err(anyhow!("Cannot resume deep scan: server unreachable after restart")) + } + } + } + } + } + + /// Discover files in multiple folders concurrently with rate limiting + pub async fn discover_files_concurrent(&self, folders: &[String], user_id: uuid::Uuid, state: &crate::AppState) -> Result> { + if folders.is_empty() { + return Ok(Vec::new()); + } + + info!("🚀 Starting concurrent discovery for {} folders", folders.len()); + + let semaphore = std::sync::Arc::new(Semaphore::new(self.concurrency_config.max_concurrent_scans)); + let folders_stream = stream::iter(folders.iter()) + .map(|folder_path| { + let semaphore = semaphore.clone(); + let service = self.clone(); + let folder_path = folder_path.clone(); + async move { + let _permit = semaphore.acquire().await.map_err(|e| anyhow!("Semaphore error: {}", e))?; + + info!("📂 Scanning folder: {}", folder_path); + let start_time = std::time::Instant::now(); + + // Save checkpoint for resumption after interruption + let checkpoint_record = crate::models::CreateWebDAVDirectory { + user_id, + directory_path: folder_path.clone(), + directory_etag: "scanning".to_string(), // Temporary marker + file_count: 0, + total_size_bytes: 0, + }; + + if let Err(e) = state.db.create_or_update_webdav_directory(&checkpoint_record).await { + warn!("Failed to save scan checkpoint for {}: {}", folder_path, e); + } + + let result = service.discover_files_in_folder_optimized(&folder_path, user_id, state).await; + + match &result { + Ok(files) => { + let duration = start_time.elapsed(); + info!("✅ Completed folder {} in {:?}: {} files found", + folder_path, duration, files.len()); + } + Err(e) => { + // Check if this was a server restart/connection issue + if service.is_server_restart_error(e) { + warn!("🔄 Server restart detected during scan of {}, will resume later", folder_path); + // Keep checkpoint for resumption + return Err(anyhow!("Server restart detected: {}", e)); + } else { + error!("❌ Failed to scan folder {}: {}", folder_path, e); + } + } + } + + result.map(|files| (folder_path, files)) + } + }) + .buffer_unordered(self.concurrency_config.max_concurrent_scans); + + let mut all_files = Vec::new(); + let mut success_count = 0; + let mut error_count = 0; + + let mut folders_stream = std::pin::pin!(folders_stream); + while let Some(result) = folders_stream.next().await { + match result { + Ok((folder_path, mut files)) => { + debug!("📁 Folder {} contributed {} files", folder_path, files.len()); + all_files.append(&mut files); + success_count += 1; + } + Err(e) => { + warn!("Folder scan error: {}", e); + error_count += 1; + } + } + } + + info!("🎯 Concurrent discovery completed: {} folders successful, {} failed, {} total files", + success_count, error_count, all_files.len()); + + Ok(all_files) + } + pub async fn download_file(&self, file_path: &str) -> Result> { self.retry_with_backoff("download_file", || { self.download_file_impl(file_path) diff --git a/tests/unit_webdav_url_construction_tests.rs b/tests/unit_webdav_url_construction_tests.rs new file mode 100644 index 0000000..173462c --- /dev/null +++ b/tests/unit_webdav_url_construction_tests.rs @@ -0,0 +1,254 @@ +use readur::services::webdav_service::{WebDAVService, WebDAVConfig}; + +// Helper function to create test WebDAV service for Nextcloud +fn create_nextcloud_webdav_service() -> WebDAVService { + let config = WebDAVConfig { + server_url: "https://nas.example.com".to_string(), + username: "testuser".to_string(), + password: "testpass".to_string(), + watch_folders: vec!["/Documents".to_string()], + file_extensions: vec!["pdf".to_string(), "txt".to_string()], + timeout_seconds: 30, + server_type: Some("nextcloud".to_string()), + }; + + WebDAVService::new(config).unwrap() +} + +// Helper function to create test WebDAV service for generic servers +fn create_generic_webdav_service() -> WebDAVService { + let config = WebDAVConfig { + server_url: "https://webdav.example.com".to_string(), + username: "testuser".to_string(), + password: "testpass".to_string(), + watch_folders: vec!["/Documents".to_string()], + file_extensions: vec!["pdf".to_string(), "txt".to_string()], + timeout_seconds: 30, + server_type: Some("generic".to_string()), + }; + + WebDAVService::new(config).unwrap() +} + +#[tokio::test] +async fn test_nextcloud_path_conversion_basic() { + let service = create_nextcloud_webdav_service(); + + // Test basic path conversion + let full_webdav_path = "/remote.php/dav/files/testuser/Documents/"; + let relative_path = service.convert_to_relative_path(full_webdav_path); + + assert_eq!(relative_path, "/Documents/"); +} + +#[tokio::test] +async fn test_nextcloud_path_conversion_nested() { + let service = create_nextcloud_webdav_service(); + + // Test nested path conversion + let full_webdav_path = "/remote.php/dav/files/testuser/FullerDocuments/NicoleDocuments/Projects/"; + let relative_path = service.convert_to_relative_path(full_webdav_path); + + assert_eq!(relative_path, "/FullerDocuments/NicoleDocuments/Projects/"); +} + +#[tokio::test] +async fn test_nextcloud_path_conversion_with_spaces() { + let service = create_nextcloud_webdav_service(); + + // Test path with URL-encoded spaces (the actual bug scenario) + let full_webdav_path = "/remote.php/dav/files/testuser/Documents/Melanie%20Martinez%20June%207%202023/"; + let relative_path = service.convert_to_relative_path(full_webdav_path); + + assert_eq!(relative_path, "/Documents/Melanie%20Martinez%20June%207%202023/"); +} + +#[tokio::test] +async fn test_nextcloud_path_conversion_with_special_chars() { + let service = create_nextcloud_webdav_service(); + + // Test path with various special characters + let full_webdav_path = "/remote.php/dav/files/testuser/Documents/Maranatha%20Work/"; + let relative_path = service.convert_to_relative_path(full_webdav_path); + + assert_eq!(relative_path, "/Documents/Maranatha%20Work/"); +} + +#[tokio::test] +async fn test_generic_webdav_path_conversion() { + let service = create_generic_webdav_service(); + + // Test generic WebDAV path conversion + let full_webdav_path = "/webdav/Documents/Projects/"; + let relative_path = service.convert_to_relative_path(full_webdav_path); + + assert_eq!(relative_path, "/Documents/Projects/"); +} + +#[tokio::test] +async fn test_path_conversion_with_mismatched_prefix() { + let service = create_nextcloud_webdav_service(); + + // Test path that doesn't match expected prefix (should return as-is) + let unexpected_path = "/some/other/path/Documents/"; + let relative_path = service.convert_to_relative_path(unexpected_path); + + assert_eq!(relative_path, "/some/other/path/Documents/"); +} + +#[tokio::test] +async fn test_url_construction_validation() { + let service = create_nextcloud_webdav_service(); + + // Test that we can identify the problem that caused the bug + // This simulates what was happening before the fix + + // What we get from XML parser (full WebDAV path) + let full_webdav_path = "/remote.php/dav/files/testuser/FullerDocuments/NicoleDocuments/"; + + // What the old code would do (WRONG - double construction) + let base_url = "https://nas.example.com/remote.php/dav/files/testuser"; + let wrong_url = format!("{}{}", base_url, full_webdav_path); + + // This would create a malformed URL + assert_eq!(wrong_url, "https://nas.example.com/remote.php/dav/files/testuser/remote.php/dav/files/testuser/FullerDocuments/NicoleDocuments/"); + + // What the new code does (CORRECT) + let relative_path = service.convert_to_relative_path(full_webdav_path); + let correct_url = format!("{}{}", base_url, relative_path); + + assert_eq!(correct_url, "https://nas.example.com/remote.php/dav/files/testuser/FullerDocuments/NicoleDocuments/"); + + // Verify they're different (this is the bug we fixed) + assert_ne!(wrong_url, correct_url); +} + +#[tokio::test] +async fn test_real_world_nextcloud_paths() { + let service = create_nextcloud_webdav_service(); + + // Test real-world paths that would come from Nextcloud XML responses + let real_world_paths = vec![ + "/remote.php/dav/files/testuser/", + "/remote.php/dav/files/testuser/Documents/", + "/remote.php/dav/files/testuser/FullerDocuments/", + "/remote.php/dav/files/testuser/FullerDocuments/JonDocuments/", + "/remote.php/dav/files/testuser/FullerDocuments/NicoleDocuments/", + "/remote.php/dav/files/testuser/FullerDocuments/NicoleDocuments/Maranatha%20Work/", + "/remote.php/dav/files/testuser/FullerDocuments/NicoleDocuments/Melanie%20Martinez%20June%207%202023/", + "/remote.php/dav/files/testuser/FullerDocuments/NicoleDocuments/Misc/", + "/remote.php/dav/files/testuser/FullerDocuments/NicoleDocuments/Nicole-Barakat-Website/", + "/remote.php/dav/files/testuser/FullerDocuments/NicoleDocuments/RDP/", + ]; + + let expected_relative_paths = vec![ + "/", + "/Documents/", + "/FullerDocuments/", + "/FullerDocuments/JonDocuments/", + "/FullerDocuments/NicoleDocuments/", + "/FullerDocuments/NicoleDocuments/Maranatha%20Work/", + "/FullerDocuments/NicoleDocuments/Melanie%20Martinez%20June%207%202023/", + "/FullerDocuments/NicoleDocuments/Misc/", + "/FullerDocuments/NicoleDocuments/Nicole-Barakat-Website/", + "/FullerDocuments/NicoleDocuments/RDP/", + ]; + + for (full_path, expected_relative) in real_world_paths.iter().zip(expected_relative_paths.iter()) { + let result = service.convert_to_relative_path(full_path); + assert_eq!(&result, expected_relative, + "Failed to convert {} to {}, got {}", full_path, expected_relative, result); + } +} + +#[tokio::test] +async fn test_url_construction_end_to_end() { + let service = create_nextcloud_webdav_service(); + + // Test the complete URL construction process + let base_webdav_url = "https://nas.example.com/remote.php/dav/files/testuser"; + + // Simulate a path that would cause 404 with the old bug + let problematic_path = "/remote.php/dav/files/testuser/FullerDocuments/NicoleDocuments/Melanie%20Martinez%20June%207%202023/"; + + // Convert to relative path + let relative_path = service.convert_to_relative_path(problematic_path); + + // Construct final URL + let final_url = format!("{}{}", base_webdav_url, relative_path); + + // Verify the URL is correctly constructed + assert_eq!(final_url, "https://nas.example.com/remote.php/dav/files/testuser/FullerDocuments/NicoleDocuments/Melanie%20Martinez%20June%207%202023/"); + + // Verify it doesn't contain double paths + assert!(!final_url.contains("/remote.php/dav/files/testuser/remote.php/dav/files/testuser/")); +} + +#[tokio::test] +async fn test_different_usernames() { + // Test with different usernames to ensure the path conversion works correctly + let usernames = vec!["testuser", "perf3ct", "admin", "user123", "user.name"]; + + for username in usernames { + let config = WebDAVConfig { + server_url: "https://nas.example.com".to_string(), + username: username.to_string(), + password: "testpass".to_string(), + watch_folders: vec!["/Documents".to_string()], + file_extensions: vec!["pdf".to_string()], + timeout_seconds: 30, + server_type: Some("nextcloud".to_string()), + }; + + let service = WebDAVService::new(config).unwrap(); + + let full_path = format!("/remote.php/dav/files/{}/Documents/TestFolder/", username); + let relative_path = service.convert_to_relative_path(&full_path); + + assert_eq!(relative_path, "/Documents/TestFolder/", + "Failed for username: {}", username); + } +} + +// Test that validates the fix prevents the exact error scenario +#[tokio::test] +async fn test_fix_prevents_original_bug() { + // Create service with the same username as in the problematic path + let config = WebDAVConfig { + server_url: "https://nas.jonathonfuller.com".to_string(), + username: "perf3ct".to_string(), + password: "testpass".to_string(), + watch_folders: vec!["/Documents".to_string()], + file_extensions: vec!["pdf".to_string()], + timeout_seconds: 30, + server_type: Some("nextcloud".to_string()), + }; + let service = WebDAVService::new(config).unwrap(); + + // This is the exact path from the error logs that was causing 404s + let problematic_path = "/remote.php/dav/files/perf3ct/FullerDocuments/NicoleDocuments/Melanie%20Martinez%20June%207%202023/"; + + // Before fix: This would have been used directly, causing double path construction + let base_url = "https://nas.jonathonfuller.com/remote.php/dav/files/perf3ct"; + let old_buggy_url = format!("{}{}", base_url, problematic_path); + + // After fix: Convert to relative path first + let relative_path = service.convert_to_relative_path(problematic_path); + let fixed_url = format!("{}{}", base_url, relative_path); + + // Debug: print what we got + println!("Original path: {}", problematic_path); + println!("Relative path: {}", relative_path); + println!("Old buggy URL: {}", old_buggy_url); + println!("Fixed URL: {}", fixed_url); + + // The old URL would have been malformed (causing 404) + assert!(old_buggy_url.contains("/remote.php/dav/files/perf3ct/remote.php/dav/files/perf3ct/")); + + // The new URL should be properly formed + assert_eq!(fixed_url, "https://nas.jonathonfuller.com/remote.php/dav/files/perf3ct/FullerDocuments/NicoleDocuments/Melanie%20Martinez%20June%207%202023/"); + assert!(!fixed_url.contains("/remote.php/dav/files/perf3ct/remote.php/dav/files/perf3ct/")); + + // Most importantly, they should be different (proving the bug was fixed) + assert_ne!(old_buggy_url, fixed_url, "The fix should produce different URLs than the buggy version"); +} \ No newline at end of file