From c242a84326f1f5fbadcfa9e59625ed55778dfe8b Mon Sep 17 00:00:00 2001 From: perf3ct Date: Tue, 1 Jul 2025 22:03:06 +0000 Subject: [PATCH] feat(webdav): also fix the parser to include directories, and add tests --- src/services/webdav_service.rs | 309 ++++++++++++++++++- src/webdav_xml_parser.rs | 219 +++++++++++++ tests/unit_webdav_targeted_rescan_tests.rs | 341 +++++++++++++++++++++ 3 files changed, 868 insertions(+), 1 deletion(-) create mode 100644 tests/unit_webdav_targeted_rescan_tests.rs diff --git a/src/services/webdav_service.rs b/src/services/webdav_service.rs index f4e5aed..ff2922f 100644 --- a/src/services/webdav_service.rs +++ b/src/services/webdav_service.rs @@ -10,7 +10,7 @@ use crate::models::{ FileInfo, WebDAVConnectionResult, WebDAVCrawlEstimate, WebDAVFolderInfo, WebDAVTestConnection, }; -use crate::webdav_xml_parser::parse_propfind_response; +use crate::webdav_xml_parser::{parse_propfind_response, parse_propfind_response_with_directories}; #[derive(Debug, Clone)] pub struct WebDAVConfig { @@ -613,6 +613,307 @@ impl WebDAVService { !remaining.contains('/') && !remaining.is_empty() } + /// Perform targeted re-scanning of only specific paths that have changed + pub async fn discover_files_targeted_rescan(&self, paths_to_scan: &[String], user_id: uuid::Uuid, state: &crate::AppState) -> Result> { + info!("🎯 Starting targeted re-scan for {} specific paths", paths_to_scan.len()); + + let mut all_files = Vec::new(); + + for path in paths_to_scan { + info!("🔍 Targeted scan of: {}", path); + + // Check if this specific path has changed + match self.check_directory_etag(path).await { + Ok(current_etag) => { + // Check cached ETag + let needs_scan = match state.db.get_webdav_directory(user_id, path).await { + Ok(Some(stored_dir)) => { + if stored_dir.directory_etag != current_etag { + info!("🔄 Path {} changed (old: {}, new: {})", path, stored_dir.directory_etag, current_etag); + true + } else { + debug!("✅ Path {} unchanged (ETag: {})", path, current_etag); + false + } + } + Ok(None) => { + info!("🆕 New path {} detected", path); + true + } + Err(e) => { + warn!("Database error for path {}: {}", path, e); + true // Scan on error to be safe + } + }; + + if needs_scan { + // Use shallow scan for this specific directory only + match self.discover_files_in_folder_shallow(path).await { + Ok(mut path_files) => { + info!("📂 Found {} files in changed path {}", path_files.len(), path); + all_files.append(&mut path_files); + + // Update tracking for this specific path + self.update_single_directory_tracking(path, &path_files, user_id, state).await; + } + Err(e) => { + error!("Failed to scan changed path {}: {}", path, e); + } + } + } + } + Err(e) => { + warn!("Failed to check ETag for path {}: {}, skipping", path, e); + } + } + } + + info!("🎯 Targeted re-scan completed: {} total files found", all_files.len()); + Ok(all_files) + } + + /// Discover files in a single directory only (shallow scan, no recursion) + async fn discover_files_in_folder_shallow(&self, folder_path: &str) -> Result> { + let folder_url = format!("{}{}", self.base_webdav_url, folder_path); + + debug!("Shallow scan of directory: {}", folder_url); + + let propfind_body = r#" + + + "#; + + let response = self.client + .request(Method::from_bytes(b"PROPFIND").unwrap(), &folder_url) + .basic_auth(&self.config.username, Some(&self.config.password)) + .header("Depth", "1") // Only direct children, not recursive + .header("Content-Type", "application/xml") + .body(propfind_body) + .send() + .await?; + + if !response.status().is_success() { + return Err(anyhow!("PROPFIND request failed: {}", response.status())); + } + + let response_text = response.text().await?; + debug!("Shallow WebDAV response received, parsing..."); + + // Use the parser that includes directories for shallow scans + self.parse_webdav_response_with_directories(&response_text) + } + + /// Update tracking for a single directory without recursive processing + async fn update_single_directory_tracking(&self, directory_path: &str, files: &[FileInfo], user_id: uuid::Uuid, state: &crate::AppState) { + // Get the directory's own ETag + let dir_etag = files.iter() + .find(|f| f.is_directory && f.path == directory_path) + .map(|f| f.etag.clone()) + .unwrap_or_else(|| { + warn!("No ETag found for directory {}, using timestamp-based fallback", directory_path); + chrono::Utc::now().timestamp().to_string() + }); + + // Count direct files in this directory only + let direct_files: Vec<_> = files.iter() + .filter(|f| !f.is_directory && self.is_direct_child(&f.path, directory_path)) + .collect(); + + let file_count = direct_files.len() as i64; + let total_size_bytes = direct_files.iter().map(|f| f.size).sum::(); + + let directory_record = crate::models::CreateWebDAVDirectory { + user_id, + directory_path: directory_path.to_string(), + directory_etag: dir_etag.clone(), + file_count, + total_size_bytes, + }; + + match state.db.create_or_update_webdav_directory(&directory_record).await { + Ok(_) => { + info!("📊 Updated single directory tracking: {} ({} files, {} bytes, ETag: {})", + directory_path, file_count, total_size_bytes, dir_etag); + } + Err(e) => { + error!("Failed to update single directory tracking for {}: {}", directory_path, e); + } + } + } + + /// Get a list of directories that need targeted scanning based on recent changes + pub async fn get_directories_needing_scan(&self, user_id: uuid::Uuid, state: &crate::AppState, max_age_hours: i64) -> Result> { + let cutoff_time = chrono::Utc::now() - chrono::Duration::hours(max_age_hours); + + match state.db.list_webdav_directories(user_id).await { + Ok(directories) => { + let stale_dirs: Vec = directories.iter() + .filter(|dir| dir.last_scanned_at < cutoff_time) + .map(|dir| dir.directory_path.clone()) + .collect(); + + info!("🕒 Found {} directories not scanned in last {} hours", stale_dirs.len(), max_age_hours); + Ok(stale_dirs) + } + Err(e) => { + error!("Failed to get directories needing scan: {}", e); + Err(e.into()) + } + } + } + + /// Smart sync mode that combines multiple optimization strategies + pub async fn discover_files_smart_sync(&self, watch_folders: &[String], user_id: uuid::Uuid, state: &crate::AppState) -> Result> { + info!("🧠 Starting smart sync for {} watch folders", watch_folders.len()); + + let mut all_files = Vec::new(); + + for folder_path in watch_folders { + info!("🔍 Smart sync processing folder: {}", folder_path); + + // Step 1: Try optimized discovery first (checks directory ETag) + let optimized_result = self.discover_files_in_folder_optimized(folder_path, user_id, state).await; + + match optimized_result { + Ok(files) => { + if !files.is_empty() { + info!("✅ Optimized discovery found {} files in {}", files.len(), folder_path); + all_files.extend(files); + } else { + info!("🔍 Directory {} unchanged, checking for stale subdirectories", folder_path); + + // Step 2: Check for stale subdirectories that need targeted scanning + let stale_dirs = self.get_stale_subdirectories(folder_path, user_id, state, 24).await?; + + if !stale_dirs.is_empty() { + info!("🎯 Found {} stale subdirectories, performing targeted scan", stale_dirs.len()); + let targeted_files = self.discover_files_targeted_rescan(&stale_dirs, user_id, state).await?; + all_files.extend(targeted_files); + } else { + info!("✅ All subdirectories of {} are fresh, no scan needed", folder_path); + } + } + } + Err(e) => { + warn!("Optimized discovery failed for {}, falling back to full scan: {}", folder_path, e); + // Fallback to traditional full scan + match self.discover_files_in_folder(folder_path).await { + Ok(files) => { + info!("📂 Fallback scan found {} files in {}", files.len(), folder_path); + all_files.extend(files); + } + Err(fallback_error) => { + error!("Both optimized and fallback scans failed for {}: {}", folder_path, fallback_error); + return Err(fallback_error); + } + } + } + } + } + + info!("🧠 Smart sync completed: {} total files discovered", all_files.len()); + Ok(all_files) + } + + /// Get subdirectories of a parent that haven't been scanned recently + async fn get_stale_subdirectories(&self, parent_path: &str, user_id: uuid::Uuid, state: &crate::AppState, max_age_hours: i64) -> Result> { + let cutoff_time = chrono::Utc::now() - chrono::Duration::hours(max_age_hours); + + match state.db.list_webdav_directories(user_id).await { + Ok(directories) => { + let stale_subdirs: Vec = directories.iter() + .filter(|dir| { + dir.directory_path.starts_with(parent_path) && + dir.directory_path != parent_path && + dir.last_scanned_at < cutoff_time + }) + .map(|dir| dir.directory_path.clone()) + .collect(); + + debug!("🕒 Found {} stale subdirectories under {} (not scanned in {} hours)", + stale_subdirs.len(), parent_path, max_age_hours); + Ok(stale_subdirs) + } + Err(e) => { + error!("Failed to get stale subdirectories: {}", e); + Err(e.into()) + } + } + } + + /// Perform incremental sync - only scan directories that have actually changed + pub async fn discover_files_incremental(&self, watch_folders: &[String], user_id: uuid::Uuid, state: &crate::AppState) -> Result> { + info!("⚡ Starting incremental sync for {} watch folders", watch_folders.len()); + + let mut changed_files = Vec::new(); + let mut unchanged_count = 0; + let mut changed_count = 0; + + for folder_path in watch_folders { + // Check directory ETag to see if it changed + match self.check_directory_etag(folder_path).await { + Ok(current_etag) => { + let needs_scan = match state.db.get_webdav_directory(user_id, folder_path).await { + Ok(Some(stored_dir)) => { + if stored_dir.directory_etag != current_etag { + info!("🔄 Directory {} changed (ETag: {} → {})", folder_path, stored_dir.directory_etag, current_etag); + changed_count += 1; + true + } else { + debug!("✅ Directory {} unchanged (ETag: {})", folder_path, current_etag); + unchanged_count += 1; + false + } + } + Ok(None) => { + info!("🆕 New directory {} detected", folder_path); + changed_count += 1; + true + } + Err(e) => { + warn!("Database error for {}: {}, scanning to be safe", folder_path, e); + changed_count += 1; + true + } + }; + + if needs_scan { + // Directory changed - perform targeted scan + match self.discover_files_in_folder_optimized(folder_path, user_id, state).await { + Ok(mut files) => { + info!("📂 Incremental scan found {} files in changed directory {}", files.len(), folder_path); + changed_files.append(&mut files); + } + Err(e) => { + error!("Failed incremental scan of {}: {}", folder_path, e); + } + } + } else { + // Directory unchanged - just update scan timestamp + let update = crate::models::UpdateWebDAVDirectory { + directory_etag: current_etag, + last_scanned_at: chrono::Utc::now(), + file_count: 0, // Will be updated by the database layer + total_size_bytes: 0, + }; + + if let Err(e) = state.db.update_webdav_directory(user_id, folder_path, &update).await { + warn!("Failed to update scan timestamp for {}: {}", folder_path, e); + } + } + } + Err(e) => { + error!("Failed to check directory ETag for {}: {}", folder_path, e); + } + } + } + + info!("⚡ Incremental sync completed: {} unchanged, {} changed, {} total files found", + unchanged_count, changed_count, changed_files.len()); + + Ok(changed_files) + } + /// Check subdirectories individually for changes when parent directory is unchanged async fn check_subdirectories_for_changes(&self, parent_path: &str, user_id: uuid::Uuid, state: &crate::AppState) -> Result> { // Get all known subdirectories from database @@ -806,6 +1107,12 @@ impl WebDAVService { parse_propfind_response(xml_text) } + /// Parse WebDAV response including both files and directories + /// Used for shallow directory scans where we need to track directory structure + pub fn parse_webdav_response_with_directories(&self, xml_text: &str) -> Result> { + parse_propfind_response_with_directories(xml_text) + } + pub async fn download_file(&self, file_path: &str) -> Result> { self.retry_with_backoff("download_file", || { self.download_file_impl(file_path) diff --git a/src/webdav_xml_parser.rs b/src/webdav_xml_parser.rs index a98df56..9339ef6 100644 --- a/src/webdav_xml_parser.rs +++ b/src/webdav_xml_parser.rs @@ -246,6 +246,225 @@ pub fn parse_propfind_response(xml_text: &str) -> Result> { Ok(files) } +/// Parse PROPFIND response including both files and directories +/// This is used for shallow directory scans where we need to track directory structure +pub fn parse_propfind_response_with_directories(xml_text: &str) -> Result> { + let mut reader = Reader::from_str(xml_text); + reader.config_mut().trim_text(true); + + let mut files = Vec::new(); + let mut current_response: Option = None; + let mut current_element = String::new(); + let mut in_response = false; + let mut in_propstat = false; + let mut in_prop = false; + let mut in_resourcetype = false; + let mut status_ok = false; + + let mut buf = Vec::new(); + + loop { + match reader.read_event_into(&mut buf) { + Ok(Event::Start(e)) | Ok(Event::Empty(e)) => { + let name = get_local_name(&e)?; + + match name.as_str() { + "response" => { + in_response = true; + current_response = Some(PropFindResponse::default()); + } + "propstat" => { + in_propstat = true; + } + "prop" => { + in_prop = true; + } + "resourcetype" => { + in_resourcetype = true; + } + "collection" if in_resourcetype => { + if let Some(ref mut resp) = current_response { + resp.is_collection = true; + } + } + _ => { + current_element = name; + } + } + } + Ok(Event::Text(e)) => { + let text = e.unescape()?.to_string(); + + if in_response && !text.trim().is_empty() { + if let Some(ref mut resp) = current_response { + match current_element.as_str() { + "href" => { + resp.href = text.trim().to_string(); + } + "displayname" => { + resp.displayname = text.trim().to_string(); + } + "getcontentlength" => { + resp.content_length = text.trim().parse().ok(); + } + "getlastmodified" => { + resp.last_modified = Some(text.trim().to_string()); + } + "getcontenttype" => { + resp.content_type = Some(text.trim().to_string()); + } + "getetag" => { + resp.etag = Some(normalize_etag(&text)); + } + "creationdate" => { + resp.creation_date = Some(text.trim().to_string()); + } + "owner" => { + resp.owner = Some(text.trim().to_string()); + } + "group" => { + resp.group = Some(text.trim().to_string()); + } + "status" if in_propstat => { + // Check if status is 200 OK + if text.contains("200") { + status_ok = true; + } + } + _ => { + // Store any other properties as generic metadata + if !text.trim().is_empty() && in_prop { + if resp.metadata.is_none() { + resp.metadata = Some(serde_json::Value::Object(serde_json::Map::new())); + } + + if let Some(serde_json::Value::Object(ref mut map)) = resp.metadata { + match current_element.as_str() { + "permissions" | "oc:permissions" => { + resp.permissions = Some(text.trim().to_string()); + map.insert("permissions_raw".to_string(), serde_json::Value::String(text.trim().to_string())); + } + "fileid" | "oc:fileid" => { + map.insert("file_id".to_string(), serde_json::Value::String(text.trim().to_string())); + } + "owner-id" | "oc:owner-id" => { + map.insert("owner_id".to_string(), serde_json::Value::String(text.trim().to_string())); + } + "owner-display-name" | "oc:owner-display-name" => { + resp.owner_display_name = Some(text.trim().to_string()); + map.insert("owner_display_name".to_string(), serde_json::Value::String(text.trim().to_string())); + } + "has-preview" | "nc:has-preview" => { + if let Ok(val) = text.trim().parse::() { + map.insert("has_preview".to_string(), serde_json::Value::Bool(val)); + } + } + _ => { + map.insert(current_element.clone(), serde_json::Value::String(text.trim().to_string())); + } + } + } + } + } + } + } + } + } + Ok(Event::End(e)) => { + let name = get_local_name_from_end(&e)?; + + match name.as_str() { + "response" => { + if let Some(resp) = current_response.take() { + // Include both files AND directories with valid properties + if status_ok && !resp.href.is_empty() { + // Extract name from href + let name = if resp.displayname.is_empty() { + resp.href + .split('/') + .filter(|s| !s.is_empty()) + .last() + .unwrap_or("") + .to_string() + } else { + resp.displayname.clone() + }; + + // Decode URL-encoded characters + let name = urlencoding::decode(&name) + .unwrap_or_else(|_| std::borrow::Cow::Borrowed(&name)) + .to_string(); + + // Parse creation date + let created_at = resp.creation_date + .as_ref() + .and_then(|d| parse_http_date(d)); + + // Parse permissions + let permissions_int = resp.permissions + .as_ref() + .and_then(|p| { + if p.chars().all(|c| c.is_uppercase()) { + let mut perms = 0u32; + if p.contains('R') { perms |= 0o444; } + if p.contains('W') { perms |= 0o222; } + if p.contains('D') { perms |= 0o111; } + Some(perms) + } else { + p.parse().ok() + } + }); + + let file_info = FileInfo { + path: resp.href.clone(), + name, + size: resp.content_length.unwrap_or(0), + mime_type: if resp.is_collection { + "".to_string() + } else { + resp.content_type.unwrap_or_else(|| "application/octet-stream".to_string()) + }, + last_modified: parse_http_date(&resp.last_modified.unwrap_or_default()), + etag: resp.etag.unwrap_or_else(|| format!("\"{}\"", uuid::Uuid::new_v4())), + is_directory: resp.is_collection, + created_at, + permissions: permissions_int, + owner: resp.owner.or(resp.owner_display_name), + group: resp.group, + metadata: resp.metadata, + }; + + files.push(file_info); + } + } + in_response = false; + status_ok = false; + } + "propstat" => { + in_propstat = false; + } + "prop" => { + in_prop = false; + } + "resourcetype" => { + in_resourcetype = false; + } + _ => {} + } + + current_element.clear(); + } + Ok(Event::Eof) => break, + Err(e) => return Err(anyhow!("XML parsing error: {}", e)), + _ => {} + } + + buf.clear(); + } + + Ok(files) +} + fn get_local_name(e: &BytesStart) -> Result { let qname = e.name(); let local = qname.local_name(); diff --git a/tests/unit_webdav_targeted_rescan_tests.rs b/tests/unit_webdav_targeted_rescan_tests.rs new file mode 100644 index 0000000..2fca8ae --- /dev/null +++ b/tests/unit_webdav_targeted_rescan_tests.rs @@ -0,0 +1,341 @@ +use readur::services::webdav_service::{WebDAVService, WebDAVConfig}; +use readur::models::FileInfo; +use tokio; +use chrono::Utc; + +// Helper function to create test WebDAV service +fn create_test_webdav_service() -> WebDAVService { + let config = WebDAVConfig { + server_url: "https://test.example.com".to_string(), + username: "testuser".to_string(), + password: "testpass".to_string(), + watch_folders: vec!["/Documents".to_string()], + file_extensions: vec!["pdf".to_string(), "png".to_string()], + timeout_seconds: 30, + server_type: Some("nextcloud".to_string()), + }; + + WebDAVService::new(config).unwrap() +} + +#[tokio::test] +async fn test_discover_files_in_folder_shallow() { + let service = create_test_webdav_service(); + + // Mock XML response for shallow directory scan (Depth: 1) + let mock_response = r#" + + + /remote.php/dav/files/admin/Documents/ + + + Documents + + + + "docs-etag" + + HTTP/1.1 200 OK + + + + /remote.php/dav/files/admin/Documents/file1.pdf + + + file1.pdf + 1024 + application/pdf + "file1-etag" + + + HTTP/1.1 200 OK + + + + /remote.php/dav/files/admin/Documents/SubFolder/ + + + SubFolder + + + + "subfolder-etag" + + HTTP/1.1 200 OK + + + "#; + + // Test that shallow parsing works correctly + let files = service.parse_webdav_response_with_directories(mock_response).unwrap(); + + // Debug print to see what files we actually got + for file in &files { + println!("Parsed file: {} (is_directory: {}, path: {})", file.name, file.is_directory, file.path); + } + + // Should have directory, direct file, and direct subdirectory (but no nested files) + assert_eq!(files.len(), 3); + + // Check that we got the right items + let directory = files.iter().find(|f| f.name == "Documents").unwrap(); + assert!(directory.is_directory); + assert_eq!(directory.etag, "docs-etag"); + + let file = files.iter().find(|f| f.name == "file1.pdf").unwrap(); + assert!(!file.is_directory); + assert_eq!(file.size, 1024); + assert_eq!(file.etag, "file1-etag"); + + let subfolder = files.iter().find(|f| f.name == "SubFolder").unwrap(); + assert!(subfolder.is_directory); + assert_eq!(subfolder.etag, "subfolder-etag"); +} + +#[tokio::test] +async fn test_update_single_directory_tracking() { + let service = create_test_webdav_service(); + + // Create mock files representing a shallow directory scan + let files = vec![ + FileInfo { + path: "/Documents".to_string(), + name: "Documents".to_string(), + size: 0, + mime_type: "".to_string(), + last_modified: Some(Utc::now()), + etag: "docs-etag-123".to_string(), + is_directory: true, + created_at: Some(Utc::now()), + permissions: Some(755), + owner: Some("admin".to_string()), + group: Some("admin".to_string()), + metadata: None, + }, + FileInfo { + path: "/Documents/file1.pdf".to_string(), + name: "file1.pdf".to_string(), + size: 1024000, + mime_type: "application/pdf".to_string(), + last_modified: Some(Utc::now()), + etag: "file1-etag".to_string(), + is_directory: false, + created_at: Some(Utc::now()), + permissions: Some(644), + owner: Some("admin".to_string()), + group: Some("admin".to_string()), + metadata: None, + }, + FileInfo { + path: "/Documents/file2.pdf".to_string(), + name: "file2.pdf".to_string(), + size: 2048000, + mime_type: "application/pdf".to_string(), + last_modified: Some(Utc::now()), + etag: "file2-etag".to_string(), + is_directory: false, + created_at: Some(Utc::now()), + permissions: Some(644), + owner: Some("admin".to_string()), + group: Some("admin".to_string()), + metadata: None, + }, + FileInfo { + path: "/Documents/SubFolder".to_string(), + name: "SubFolder".to_string(), + size: 0, + mime_type: "".to_string(), + last_modified: Some(Utc::now()), + etag: "subfolder-etag".to_string(), + is_directory: true, + created_at: Some(Utc::now()), + permissions: Some(755), + owner: Some("admin".to_string()), + group: Some("admin".to_string()), + metadata: None, + }, + ]; + + // Test that direct file counting works correctly + let direct_files: Vec<_> = files.iter() + .filter(|f| !f.is_directory && service.is_direct_child(&f.path, "/Documents")) + .collect(); + + assert_eq!(direct_files.len(), 2); // file1.pdf and file2.pdf + + let total_size: i64 = direct_files.iter().map(|f| f.size).sum(); + assert_eq!(total_size, 3072000); // 1024000 + 2048000 + + // Test that directory ETag extraction works + let dir_etag = files.iter() + .find(|f| f.is_directory && f.path == "/Documents") + .map(|f| f.etag.clone()) + .unwrap(); + + assert_eq!(dir_etag, "docs-etag-123"); +} + +#[tokio::test] +async fn test_targeted_rescan_logic() { + let service = create_test_webdav_service(); + + // Test the logic that determines which paths need scanning + let paths_to_check = vec![ + "/Documents".to_string(), + "/Documents/2024".to_string(), + "/Documents/Archive".to_string(), + ]; + + // This tests the core logic used in discover_files_targeted_rescan + // In a real implementation, this would involve database calls and network requests + + // Simulate ETag checking logic + let mut paths_needing_scan = Vec::new(); + + for path in &paths_to_check { + // Simulate: current_etag != stored_etag (directory changed) + let current_etag = format!("{}-current", path.replace('/', "-")); + let stored_etag = format!("{}-stored", path.replace('/', "-")); + + if current_etag != stored_etag { + paths_needing_scan.push(path.clone()); + } + } + + // All paths should need scanning in this test scenario + assert_eq!(paths_needing_scan.len(), 3); + assert!(paths_needing_scan.contains(&"/Documents".to_string())); + assert!(paths_needing_scan.contains(&"/Documents/2024".to_string())); + assert!(paths_needing_scan.contains(&"/Documents/Archive".to_string())); +} + +#[tokio::test] +async fn test_stale_directory_detection() { + let service = create_test_webdav_service(); + + // Test the logic for detecting stale subdirectories + let parent_path = "/Documents"; + let directories = vec![ + ("/Documents", chrono::Utc::now()), // Fresh parent + ("/Documents/2024", chrono::Utc::now() - chrono::Duration::hours(25)), // Stale (25 hours old) + ("/Documents/Archive", chrono::Utc::now() - chrono::Duration::hours(1)), // Fresh (1 hour old) + ("/Documents/2024/Q1", chrono::Utc::now() - chrono::Duration::hours(30)), // Stale (30 hours old) + ("/Other", chrono::Utc::now() - chrono::Duration::hours(48)), // Stale but not under parent + ]; + + let max_age_hours = 24; + let cutoff_time = chrono::Utc::now() - chrono::Duration::hours(max_age_hours); + + // Test the filtering logic + let stale_subdirs: Vec = directories.iter() + .filter(|(path, last_scanned)| { + path.starts_with(parent_path) && + *path != parent_path && + *last_scanned < cutoff_time + }) + .map(|(path, _)| path.to_string()) + .collect(); + + assert_eq!(stale_subdirs.len(), 2); + assert!(stale_subdirs.contains(&"/Documents/2024".to_string())); + assert!(stale_subdirs.contains(&"/Documents/2024/Q1".to_string())); + assert!(!stale_subdirs.contains(&"/Documents/Archive".to_string())); // Fresh + assert!(!stale_subdirs.contains(&"/Other".to_string())); // Different parent +} + +#[tokio::test] +async fn test_incremental_sync_logic() { + let service = create_test_webdav_service(); + + // Test the change detection logic used in incremental sync + let watch_folders = vec![ + "/Documents".to_string(), + "/Photos".to_string(), + "/Archive".to_string(), + ]; + + // Simulate stored ETags vs current ETags + let stored_etags = [ + ("/Documents", "docs-etag-old"), + ("/Photos", "photos-etag-same"), + ("/Archive", "archive-etag-old"), + ]; + + let current_etags = [ + ("/Documents", "docs-etag-new"), // Changed + ("/Photos", "photos-etag-same"), // Unchanged + ("/Archive", "archive-etag-new"), // Changed + ]; + + let mut changed_folders = Vec::new(); + let mut unchanged_folders = Vec::new(); + + for folder in &watch_folders { + let stored = stored_etags.iter().find(|(path, _)| path == folder).map(|(_, etag)| *etag); + let current = current_etags.iter().find(|(path, _)| path == folder).map(|(_, etag)| *etag); + + match (stored, current) { + (Some(stored_etag), Some(current_etag)) => { + if stored_etag != current_etag { + changed_folders.push(folder.clone()); + } else { + unchanged_folders.push(folder.clone()); + } + } + _ => { + // New folder or missing data - assume changed + changed_folders.push(folder.clone()); + } + } + } + + assert_eq!(changed_folders.len(), 2); + assert!(changed_folders.contains(&"/Documents".to_string())); + assert!(changed_folders.contains(&"/Archive".to_string())); + + assert_eq!(unchanged_folders.len(), 1); + assert!(unchanged_folders.contains(&"/Photos".to_string())); +} + +#[tokio::test] +async fn test_smart_sync_strategy_selection() { + let service = create_test_webdav_service(); + + // Test the logic for choosing between different sync strategies + + // Scenario 1: Directory unchanged, no stale subdirectories -> no scan needed + let scenario1_main_dir_changed = false; + let scenario1_stale_subdirs = 0; + let scenario1_action = if scenario1_main_dir_changed { + "full_scan" + } else if scenario1_stale_subdirs > 0 { + "targeted_scan" + } else { + "no_scan" + }; + assert_eq!(scenario1_action, "no_scan"); + + // Scenario 2: Directory unchanged, has stale subdirectories -> targeted scan + let scenario2_main_dir_changed = false; + let scenario2_stale_subdirs = 3; + let scenario2_action = if scenario2_main_dir_changed { + "full_scan" + } else if scenario2_stale_subdirs > 0 { + "targeted_scan" + } else { + "no_scan" + }; + assert_eq!(scenario2_action, "targeted_scan"); + + // Scenario 3: Directory changed -> full scan (optimized) + let scenario3_main_dir_changed = true; + let scenario3_stale_subdirs = 0; + let scenario3_action = if scenario3_main_dir_changed { + "full_scan" + } else if scenario3_stale_subdirs > 0 { + "targeted_scan" + } else { + "no_scan" + }; + assert_eq!(scenario3_action, "full_scan"); +} \ No newline at end of file