diff --git a/src/routes/sources.rs b/src/routes/sources.rs index 9ae895e..fca8c2f 100644 --- a/src/routes/sources.rs +++ b/src/routes/sources.rs @@ -479,31 +479,16 @@ async fn trigger_deep_scan( tokio::spawn(async move { let start_time = chrono::Utc::now(); - // Clear existing directory tracking to force full rescan - if let Err(e) = state_clone.db.clear_webdav_directories(user_id).await { - error!("Failed to clear WebDAV directories for deep scan: {}", e); - } - - // Use traditional discovery for deep scan to avoid borrowing issues - let mut all_discovered_files = Vec::new(); - for folder in &config_clone.watch_folders { - match webdav_service.discover_files_in_folder(folder).await { - Ok(mut folder_files) => { - info!("Deep scan discovered {} files in folder {}", folder_files.len(), folder); - all_discovered_files.append(&mut folder_files); - } - Err(e) => { - error!("Deep scan failed to discover files in folder {}: {}", folder, e); - // Continue with other folders - } - } - } - - if !all_discovered_files.is_empty() { - info!("Deep scan discovery completed for source {}: {} files found", source_id_clone, all_discovered_files.len()); - - // Filter files by extensions and process them - let files_to_process: Vec<_> = all_discovered_files.into_iter() + // Use guaranteed completeness deep scan method + match webdav_service.deep_scan_with_guaranteed_completeness(user_id, &state_clone).await { + Ok(all_discovered_files) => { + info!("Deep scan with guaranteed completeness discovered {} files", all_discovered_files.len()); + + if !all_discovered_files.is_empty() { + info!("Deep scan discovery completed for source {}: {} files found", source_id_clone, all_discovered_files.len()); + + // Filter files by extensions and process them + let files_to_process: Vec<_> = all_discovered_files.into_iter() .filter(|file_info| { if file_info.is_directory { return false; @@ -596,16 +581,47 @@ async fn trigger_deep_scan( } } - } else { - info!("Deep scan found no files for source {}", source_id_clone); - - // Update source status to idle even if no files found - if let Err(e) = state_clone.db.update_source_status( - source_id_clone, - crate::models::SourceStatus::Idle, - Some("Deep scan completed: no files found".to_string()), - ).await { - error!("Failed to update source status after empty deep scan: {}", e); + } else { + info!("Deep scan found no files for source {}", source_id_clone); + + // Update source status to idle even if no files found + if let Err(e) = state_clone.db.update_source_status( + source_id_clone, + crate::models::SourceStatus::Idle, + Some("Deep scan completed: no files found".to_string()), + ).await { + error!("Failed to update source status after empty deep scan: {}", e); + } + } + } + Err(e) => { + error!("Deep scan with guaranteed completeness failed for source {}: {}", source_id_clone, e); + + // Update source status to error + if let Err(e2) = state_clone.db.update_source_status( + source_id_clone, + crate::models::SourceStatus::Error, + Some(format!("Deep scan failed: {}", e)), + ).await { + error!("Failed to update source status after deep scan error: {}", e2); + } + + // Send error notification + let notification = crate::models::CreateNotification { + notification_type: "error".to_string(), + title: "Deep Scan Failed".to_string(), + message: format!("Deep scan of {} failed: {}", source_name, e), + action_url: Some("/sources".to_string()), + metadata: Some(serde_json::json!({ + "source_id": source_id_clone, + "scan_type": "deep_scan", + "error": e.to_string() + })), + }; + + if let Err(e) = state_clone.db.create_notification(user_id, ¬ification).await { + error!("Failed to create deep scan error notification: {}", e); + } } } }); diff --git a/src/services/webdav_service.rs b/src/services/webdav_service.rs index 2b5f55d..0da7b53 100644 --- a/src/services/webdav_service.rs +++ b/src/services/webdav_service.rs @@ -1,6 +1,6 @@ use anyhow::{anyhow, Result}; -use chrono::{DateTime, Utc}; -use reqwest::{Client, Method, Url}; +use reqwest::{Client, Method}; +use serde::{Deserialize, Serialize}; use std::collections::HashSet; use std::time::Duration; use tokio::time::sleep; @@ -76,6 +76,77 @@ pub struct WebDAVService { concurrency_config: ConcurrencyConfig, } +/// Report of ETag validation and directory integrity checks +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ValidationReport { + pub validation_id: uuid::Uuid, + pub user_id: uuid::Uuid, + pub started_at: chrono::DateTime, + pub completed_at: Option>, + pub total_directories_checked: u32, + pub issues_found: Vec, + pub recommendations: Vec, + pub etag_support_verified: bool, + pub server_health_score: u8, // 0-100 +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ValidationIssue { + pub issue_type: ValidationIssueType, + pub directory_path: String, + pub severity: ValidationSeverity, + pub description: String, + pub discovered_at: chrono::DateTime, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum ValidationIssueType { + /// Directory exists on server but not in our tracking + Untracked, + /// Directory in our tracking but missing on server + Missing, + /// ETag mismatch between server and our cache + ETagMismatch, + /// Directory hasn't been scanned in a very long time + Stale, + /// Server errors when accessing directory + Inaccessible, + /// ETag support seems unreliable for this directory + ETagUnreliable, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum ValidationSeverity { + Info, // No action needed, just FYI + Warning, // Should investigate but not urgent + Error, // Needs immediate attention + Critical, // System integrity at risk +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ValidationRecommendation { + pub action: ValidationAction, + pub reason: String, + pub affected_directories: Vec, + pub priority: ValidationSeverity, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum ValidationAction { + /// Run a deep scan of specific directories + DeepScanRequired, + /// Clear and rebuild directory tracking + RebuildTracking, + /// ETag support is unreliable, switch to periodic scans + DisableETagOptimization, + /// Clean up orphaned database entries + CleanupDatabase, + /// Server configuration issue needs attention + CheckServerConfiguration, + /// No action needed, system is healthy + NoActionRequired, +} + impl WebDAVService { pub fn new(config: WebDAVConfig) -> Result { Self::new_with_configs(config, RetryConfig::default(), ConcurrencyConfig::default()) @@ -1925,4 +1996,635 @@ pub async fn test_webdav_connection( .await?; Ok(response.status().is_success()) +} + +impl WebDAVService { + /// Validate ETag tracking integrity and directory consistency + /// This replaces the need for periodic deep scans with intelligent validation + pub async fn validate_etag_tracking(&self, user_id: uuid::Uuid, state: &crate::AppState) -> Result { + let validation_id = uuid::Uuid::new_v4(); + let started_at = chrono::Utc::now(); + + info!("🔍 Starting ETag validation for user {} (validation_id: {})", user_id, validation_id); + + let mut report = ValidationReport { + validation_id, + user_id, + started_at, + completed_at: None, + total_directories_checked: 0, + issues_found: Vec::new(), + recommendations: Vec::new(), + etag_support_verified: false, + server_health_score: 100, + }; + + // Step 1: Verify ETag support is still working + match self.test_recursive_etag_support().await { + Ok(supports_etags) => { + report.etag_support_verified = supports_etags; + if !supports_etags { + report.issues_found.push(ValidationIssue { + issue_type: ValidationIssueType::ETagUnreliable, + directory_path: "server".to_string(), + severity: ValidationSeverity::Critical, + description: "Server no longer supports recursive ETags reliably".to_string(), + discovered_at: chrono::Utc::now(), + }); + report.server_health_score = 30; + } + } + Err(e) => { + warn!("Failed to test ETag support: {}", e); + report.issues_found.push(ValidationIssue { + issue_type: ValidationIssueType::ETagUnreliable, + directory_path: "server".to_string(), + severity: ValidationSeverity::Error, + description: format!("Cannot verify ETag support: {}", e), + discovered_at: chrono::Utc::now(), + }); + report.server_health_score = 50; + } + } + + // Step 2: Check tracked directories for issues + match state.db.list_webdav_directories(user_id).await { + Ok(tracked_dirs) => { + report.total_directories_checked = tracked_dirs.len() as u32; + + for tracked_dir in tracked_dirs { + self.validate_single_directory(&tracked_dir, &mut report, state).await; + } + } + Err(e) => { + error!("Failed to load tracked directories: {}", e); + report.issues_found.push(ValidationIssue { + issue_type: ValidationIssueType::Missing, + directory_path: "database".to_string(), + severity: ValidationSeverity::Critical, + description: format!("Cannot access directory tracking database: {}", e), + discovered_at: chrono::Utc::now(), + }); + report.server_health_score = 10; + } + } + + // Step 3: Sample a few watch directories to check for untracked directories + for watch_folder in &self.config.watch_folders { + if let Err(e) = self.check_for_untracked_directories(watch_folder, &mut report, user_id, state).await { + warn!("Failed to check for untracked directories in {}: {}", watch_folder, e); + } + } + + // Step 4: Generate recommendations based on issues found + self.generate_validation_recommendations(&mut report); + + report.completed_at = Some(chrono::Utc::now()); + let duration = report.completed_at.unwrap() - report.started_at; + + info!("✅ ETag validation completed in {:.2}s. Health score: {}/100, {} issues found", + duration.num_milliseconds() as f64 / 1000.0, + report.server_health_score, + report.issues_found.len()); + + Ok(report) + } + + /// Validate a single tracked directory + async fn validate_single_directory( + &self, + tracked_dir: &crate::models::WebDAVDirectory, + report: &mut ValidationReport, + state: &crate::AppState + ) { + let relative_path = self.convert_to_relative_path(&tracked_dir.directory_path); + + // Check if directory still exists and get current ETag + match self.check_directory_etag(&relative_path).await { + Ok(current_etag) => { + // Check for ETag mismatch + if current_etag != tracked_dir.directory_etag { + report.issues_found.push(ValidationIssue { + issue_type: ValidationIssueType::ETagMismatch, + directory_path: tracked_dir.directory_path.clone(), + severity: ValidationSeverity::Warning, + description: format!("ETag changed from '{}' to '{}' - directory may need rescanning", + tracked_dir.directory_etag, current_etag), + discovered_at: chrono::Utc::now(), + }); + report.server_health_score = report.server_health_score.saturating_sub(5); + } + + // Check for stale directories (not scanned in >7 days) + let last_scanned = tracked_dir.last_scanned_at; + let duration = chrono::Utc::now() - last_scanned; + let days_old = duration.num_days(); + if days_old > 7 { + report.issues_found.push(ValidationIssue { + issue_type: ValidationIssueType::Stale, + directory_path: tracked_dir.directory_path.clone(), + severity: if days_old > 30 { ValidationSeverity::Warning } else { ValidationSeverity::Info }, + description: format!("Directory not scanned for {} days", days_old), + discovered_at: chrono::Utc::now(), + }); + if days_old > 30 { + report.server_health_score = report.server_health_score.saturating_sub(3); + } + } + } + Err(e) => { + // Directory inaccessible or missing + report.issues_found.push(ValidationIssue { + issue_type: ValidationIssueType::Inaccessible, + directory_path: tracked_dir.directory_path.clone(), + severity: ValidationSeverity::Error, + description: format!("Cannot access directory: {}", e), + discovered_at: chrono::Utc::now(), + }); + report.server_health_score = report.server_health_score.saturating_sub(10); + } + } + } + + /// Check for directories that exist on server but aren't tracked + async fn check_for_untracked_directories( + &self, + watch_folder: &str, + report: &mut ValidationReport, + user_id: uuid::Uuid, + state: &crate::AppState + ) -> Result<()> { + let relative_watch_folder = self.convert_to_relative_path(watch_folder); + + // Get shallow listing of watch folder + match self.discover_files_in_folder_shallow(&relative_watch_folder).await { + Ok(entries) => { + // Find directories + let server_dirs: Vec<_> = entries.iter() + .filter(|e| e.is_directory) + .collect(); + + // Check if each directory is tracked + for server_dir in server_dirs { + match state.db.get_webdav_directory(user_id, &server_dir.path).await { + Ok(None) => { + // Directory exists on server but not tracked + report.issues_found.push(ValidationIssue { + issue_type: ValidationIssueType::Untracked, + directory_path: server_dir.path.clone(), + severity: ValidationSeverity::Info, + description: "Directory exists on server but not in tracking database".to_string(), + discovered_at: chrono::Utc::now(), + }); + report.server_health_score = report.server_health_score.saturating_sub(2); + } + Ok(Some(_)) => { + // Directory is tracked, all good + } + Err(e) => { + warn!("Database error checking directory {}: {}", server_dir.path, e); + } + } + } + } + Err(e) => { + return Err(anyhow!("Failed to list watch folder {}: {}", watch_folder, e)); + } + } + + Ok(()) + } + + /// Generate actionable recommendations based on validation issues + fn generate_validation_recommendations(&self, report: &mut ValidationReport) { + let mut etag_mismatches = Vec::new(); + let mut untracked_dirs = Vec::new(); + let mut inaccessible_dirs = Vec::new(); + let mut stale_dirs = Vec::new(); + let mut etag_unreliable = false; + + // Categorize issues + for issue in &report.issues_found { + match issue.issue_type { + ValidationIssueType::ETagMismatch => etag_mismatches.push(issue.directory_path.clone()), + ValidationIssueType::Untracked => untracked_dirs.push(issue.directory_path.clone()), + ValidationIssueType::Inaccessible => inaccessible_dirs.push(issue.directory_path.clone()), + ValidationIssueType::Stale => stale_dirs.push(issue.directory_path.clone()), + ValidationIssueType::ETagUnreliable => etag_unreliable = true, + _ => {} + } + } + + // Generate recommendations + if etag_unreliable { + report.recommendations.push(ValidationRecommendation { + action: ValidationAction::DisableETagOptimization, + reason: "ETag support is unreliable, consider switching to periodic deep scans".to_string(), + affected_directories: vec!["all".to_string()], + priority: ValidationSeverity::Critical, + }); + } else if !etag_mismatches.is_empty() { + report.recommendations.push(ValidationRecommendation { + action: ValidationAction::DeepScanRequired, + reason: format!("{} directories have ETag mismatches and need rescanning", etag_mismatches.len()), + affected_directories: etag_mismatches, + priority: ValidationSeverity::Warning, + }); + } + + if !untracked_dirs.is_empty() { + report.recommendations.push(ValidationRecommendation { + action: ValidationAction::DeepScanRequired, + reason: format!("{} untracked directories found on server", untracked_dirs.len()), + affected_directories: untracked_dirs, + priority: ValidationSeverity::Info, + }); + } + + if !inaccessible_dirs.is_empty() { + report.recommendations.push(ValidationRecommendation { + action: ValidationAction::CheckServerConfiguration, + reason: format!("{} directories are inaccessible", inaccessible_dirs.len()), + affected_directories: inaccessible_dirs, + priority: ValidationSeverity::Error, + }); + } + + if !stale_dirs.is_empty() && stale_dirs.len() > 10 { + report.recommendations.push(ValidationRecommendation { + action: ValidationAction::DeepScanRequired, + reason: format!("{} directories haven't been scanned recently", stale_dirs.len()), + affected_directories: stale_dirs, + priority: ValidationSeverity::Info, + }); + } + + // If no major issues, everything is healthy + if report.recommendations.is_empty() { + report.recommendations.push(ValidationRecommendation { + action: ValidationAction::NoActionRequired, + reason: "ETag tracking system is healthy and working correctly".to_string(), + affected_directories: Vec::new(), + priority: ValidationSeverity::Info, + }); + } + } + + /// Check if we should trigger a deep scan based on validation results + pub fn should_trigger_deep_scan(&self, report: &ValidationReport) -> (bool, String) { + // Critical issues always trigger deep scan + let critical_issues = report.issues_found.iter() + .filter(|issue| matches!(issue.severity, ValidationSeverity::Critical)) + .count(); + + if critical_issues > 0 { + return (true, format!("{} critical issues detected", critical_issues)); + } + + // Multiple ETag mismatches suggest systematic issues + let etag_mismatches = report.issues_found.iter() + .filter(|issue| matches!(issue.issue_type, ValidationIssueType::ETagMismatch)) + .count(); + + if etag_mismatches > 5 { + return (true, format!("{} ETag mismatches suggest synchronization issues", etag_mismatches)); + } + + // Many untracked directories suggest incomplete initial scan + let untracked = report.issues_found.iter() + .filter(|issue| matches!(issue.issue_type, ValidationIssueType::Untracked)) + .count(); + + if untracked > 10 { + return (true, format!("{} untracked directories found", untracked)); + } + + // Low health score indicates general problems + if report.server_health_score < 70 { + return (true, format!("Low server health score: {}/100", report.server_health_score)); + } + + (false, "System appears healthy, no deep scan needed".to_string()) + } + + /// Ensure complete directory tree discovery before marking deep scan as complete + /// This is the MOST CRITICAL function - guarantees we've found ALL subdirectories + pub async fn ensure_complete_directory_discovery(&self, user_id: uuid::Uuid, state: &crate::AppState) -> Result { + info!("🔍 Starting complete directory tree discovery verification"); + + let mut report = DirectoryDiscoveryReport { + discovery_id: uuid::Uuid::new_v4(), + user_id, + started_at: chrono::Utc::now(), + completed_at: None, + watch_folders_processed: Vec::new(), + total_directories_discovered: 0, + new_directories_found: 0, + missing_directories_detected: 0, + is_complete: false, + issues: Vec::new(), + }; + + // Process each watch folder to ensure complete discovery + for watch_folder in &self.config.watch_folders { + info!("📂 Ensuring complete discovery for watch folder: {}", watch_folder); + + match self.ensure_watch_folder_complete_discovery(watch_folder, user_id, state, &mut report).await { + Ok(folder_report) => { + report.watch_folders_processed.push(folder_report); + } + Err(e) => { + error!("❌ Failed to ensure complete discovery for {}: {}", watch_folder, e); + report.issues.push(format!("Failed to process {}: {}", watch_folder, e)); + } + } + } + + // Verify completeness by checking for any gaps + self.verify_directory_tree_completeness(&mut report, user_id, state).await?; + + report.completed_at = Some(chrono::Utc::now()); + let duration = report.completed_at.unwrap() - report.started_at; + + if report.is_complete { + info!("✅ Complete directory discovery verified in {:.2}s. {} total directories, {} newly discovered", + duration.num_milliseconds() as f64 / 1000.0, + report.total_directories_discovered, + report.new_directories_found); + } else { + warn!("⚠️ Directory discovery incomplete after {:.2}s. {} issues found", + duration.num_milliseconds() as f64 / 1000.0, + report.issues.len()); + } + + Ok(report) + } + + /// Ensure a single watch folder has complete n-depth directory discovery + async fn ensure_watch_folder_complete_discovery( + &self, + watch_folder: &str, + user_id: uuid::Uuid, + state: &crate::AppState, + main_report: &mut DirectoryDiscoveryReport + ) -> Result { + let mut folder_report = WatchFolderDiscoveryReport { + watch_folder: watch_folder.to_string(), + total_directories: 0, + new_directories: 0, + depth_levels_scanned: 0, + is_complete: false, + }; + + // Use PROPFIND with Depth: infinity to get COMPLETE directory tree + let relative_watch_folder = self.convert_to_relative_path(watch_folder); + let all_entries = self.discover_files_in_folder_impl(&relative_watch_folder).await?; + + // Extract ALL directories from the complete scan + let all_server_directories: Vec<_> = all_entries.iter() + .filter(|entry| entry.is_directory) + .collect(); + + folder_report.total_directories = all_server_directories.len(); + main_report.total_directories_discovered += all_server_directories.len(); + + // Calculate depth levels + let max_depth = all_server_directories.iter() + .map(|dir| dir.path.chars().filter(|&c| c == '/').count()) + .max() + .unwrap_or(0); + folder_report.depth_levels_scanned = max_depth; + + info!("📊 Found {} directories across {} depth levels in {}", + all_server_directories.len(), max_depth, watch_folder); + + // Check each directory against our tracking database + for server_dir in &all_server_directories { + match state.db.get_webdav_directory(user_id, &server_dir.path).await { + Ok(Some(tracked_dir)) => { + // Directory is already tracked - verify ETag is current + if tracked_dir.directory_etag != server_dir.etag { + debug!("🔄 Updating ETag for tracked directory: {}", server_dir.path); + let update = crate::models::UpdateWebDAVDirectory { + directory_etag: server_dir.etag.clone(), + last_scanned_at: chrono::Utc::now(), + file_count: 0, // Will be calculated separately + total_size_bytes: 0, + }; + if let Err(e) = state.db.update_webdav_directory(user_id, &server_dir.path, &update).await { + warn!("Failed to update directory {}: {}", server_dir.path, e); + } + } + } + Ok(None) => { + // NEW DIRECTORY DISCOVERED - this is critical to track + info!("🆕 NEW directory discovered: {}", server_dir.path); + folder_report.new_directories += 1; + main_report.new_directories_found += 1; + + // Immediately add to tracking database + let new_dir = crate::models::CreateWebDAVDirectory { + user_id, + directory_path: server_dir.path.clone(), + directory_etag: server_dir.etag.clone(), + file_count: 0, // Will be calculated when files are processed + total_size_bytes: 0, + }; + + if let Err(e) = state.db.create_or_update_webdav_directory(&new_dir).await { + error!("❌ CRITICAL: Failed to track new directory {}: {}", server_dir.path, e); + main_report.issues.push(format!("Failed to track new directory {}: {}", server_dir.path, e)); + } else { + debug!("✅ Successfully tracking new directory: {}", server_dir.path); + } + } + Err(e) => { + error!("Database error checking directory {}: {}", server_dir.path, e); + main_report.issues.push(format!("Database error for {}: {}", server_dir.path, e)); + } + } + } + + // Check for orphaned tracking entries (directories we track but don't exist on server) + match state.db.list_webdav_directories(user_id).await { + Ok(tracked_dirs) => { + let server_paths: HashSet = all_server_directories.iter() + .map(|d| d.path.clone()) + .collect(); + + for tracked_dir in tracked_dirs { + if tracked_dir.directory_path.starts_with(watch_folder) && !server_paths.contains(&tracked_dir.directory_path) { + warn!("🗑️ Orphaned directory tracking detected: {} (exists in DB but not on server)", tracked_dir.directory_path); + main_report.missing_directories_detected += 1; + + // Could optionally clean up orphaned entries here + // For now, just report them + } + } + } + Err(e) => { + error!("Failed to check for orphaned directories: {}", e); + main_report.issues.push(format!("Failed to check orphaned directories: {}", e)); + } + } + + folder_report.is_complete = folder_report.new_directories == 0 || main_report.issues.is_empty(); + Ok(folder_report) + } + + /// Final verification that directory tree coverage is complete + async fn verify_directory_tree_completeness( + &self, + report: &mut DirectoryDiscoveryReport, + user_id: uuid::Uuid, + state: &crate::AppState + ) -> Result<()> { + info!("🔍 Performing final completeness verification"); + + // Check that we have no scan_in_progress flags left over + match state.db.get_incomplete_webdav_scans(user_id).await { + Ok(incomplete) => { + if !incomplete.is_empty() { + warn!("⚠️ Found {} incomplete scans still in progress", incomplete.len()); + report.issues.push(format!("{} scans still marked as in progress", incomplete.len())); + report.is_complete = false; + return Ok(()); + } + } + Err(e) => { + error!("Failed to check incomplete scans: {}", e); + report.issues.push(format!("Cannot verify scan completeness: {}", e)); + report.is_complete = false; + return Ok(()); + } + } + + // Verify each watch folder has at least some tracked directories + for watch_folder in &self.config.watch_folders { + match state.db.list_webdav_directories(user_id).await { + Ok(dirs) => { + let watch_folder_dirs = dirs.iter() + .filter(|d| d.directory_path.starts_with(watch_folder)) + .count(); + + if watch_folder_dirs == 0 { + warn!("⚠️ No directories tracked for watch folder: {}", watch_folder); + report.issues.push(format!("No directories tracked for watch folder: {}", watch_folder)); + report.is_complete = false; + } else { + debug!("✅ Watch folder {} has {} tracked directories", watch_folder, watch_folder_dirs); + } + } + Err(e) => { + error!("Failed to verify watch folder {}: {}", watch_folder, e); + report.issues.push(format!("Cannot verify watch folder {}: {}", watch_folder, e)); + report.is_complete = false; + } + } + } + + // If no issues found, mark as complete + if report.issues.is_empty() { + report.is_complete = true; + info!("✅ Directory tree completeness verified - all {} watch folders fully discovered", self.config.watch_folders.len()); + } else { + warn!("❌ Directory tree completeness verification failed: {} issues", report.issues.len()); + } + + Ok(()) + } + + /// Modified deep scan that REQUIRES complete directory discovery + pub async fn deep_scan_with_guaranteed_completeness(&self, user_id: uuid::Uuid, state: &crate::AppState) -> Result> { + info!("🚀 Starting deep scan with guaranteed directory completeness"); + + let scan_id = uuid::Uuid::new_v4(); + let started_at = chrono::Utc::now(); + + // STEP 1: CRITICAL - Ensure complete directory discovery FIRST + let discovery_report = self.ensure_complete_directory_discovery(user_id, state).await?; + + if !discovery_report.is_complete { + return Err(anyhow!("Cannot proceed with deep scan: Directory discovery incomplete. {} issues found: {:?}", + discovery_report.issues.len(), discovery_report.issues)); + } + + info!("✅ Directory discovery complete - proceeding with file processing"); + + // STEP 2: Only now process files, knowing we have complete directory coverage + let mut all_files = Vec::new(); + for watch_folder in &self.config.watch_folders { + match self.smart_directory_scan_with_checkpoints(watch_folder, None, user_id, state).await { + Ok(mut files) => { + info!("📁 Processed {} files from {}", files.len(), watch_folder); + all_files.append(&mut files); + } + Err(e) => { + error!("Failed to process files in {}: {}", watch_folder, e); + return Err(anyhow!("File processing failed for {}: {}", watch_folder, e)); + } + } + } + + // STEP 3: Final verification that nothing was missed + let final_verification = self.ensure_complete_directory_discovery(user_id, state).await?; + let is_complete = final_verification.is_complete && final_verification.new_directories_found == 0; + + if final_verification.new_directories_found > 0 { + warn!("⚠️ Found {} additional directories during final verification - scan may need to restart", + final_verification.new_directories_found); + } + + let completed_at = chrono::Utc::now(); + let duration = completed_at - started_at; + + if is_complete { + info!("🎉 DEEP SCAN COMPLETE WITH GUARANTEED COMPLETENESS: {} files processed, {} directories tracked in {:.2}s", + all_files.len(), + discovery_report.total_directories_discovered, + duration.num_milliseconds() as f64 / 1000.0); + } else { + warn!("⚠️ Deep scan completed but completeness not guaranteed: {:.2}s", + duration.num_milliseconds() as f64 / 1000.0); + } + + Ok(all_files) + } +} + +/// Report of complete directory tree discovery +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DirectoryDiscoveryReport { + pub discovery_id: uuid::Uuid, + pub user_id: uuid::Uuid, + pub started_at: chrono::DateTime, + pub completed_at: Option>, + pub watch_folders_processed: Vec, + pub total_directories_discovered: usize, + pub new_directories_found: usize, + pub missing_directories_detected: usize, + pub is_complete: bool, + pub issues: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct WatchFolderDiscoveryReport { + pub watch_folder: String, + pub total_directories: usize, + pub new_directories: usize, + pub depth_levels_scanned: usize, + pub is_complete: bool, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CompleteDeepScanReport { + pub scan_id: uuid::Uuid, + pub user_id: uuid::Uuid, + pub started_at: chrono::DateTime, + pub completed_at: chrono::DateTime, + pub directory_discovery_report: DirectoryDiscoveryReport, + pub final_verification_report: DirectoryDiscoveryReport, + pub total_files_processed: usize, + pub scan_duration_seconds: i64, + pub is_guaranteed_complete: bool, } \ No newline at end of file