feat(server): I feel like I'm going to have to come back and fix this later

2025-06-15 04:58:22 +00:00 · 2025-06-15 04:58:22 +00:00 · 6898d85981
parent 4aa3d77e40
commit 6898d85981
3 changed files with 95 additions and 49 deletions
--- a/src/routes/webdav/webdav_sync.rs
+++ b/src/routes/webdav/webdav_sync.rs
@ -218,21 +218,25 @@ async fn process_single_file(
    info!("Processing file: {}", file_info.path);
    // Check if we've already processed this file
    info!("Checking WebDAV tracking for: {}", file_info.path);
    match state.db.get_webdav_file_by_path(user_id, &file_info.path).await {
        Ok(Some(existing_file)) => {
            info!("Found existing WebDAV file record: {} (current ETag: {}, remote ETag: {})", 
                file_info.path, existing_file.etag, file_info.etag);
            // Check if file has changed (compare ETags)
            if existing_file.etag == file_info.etag {
-                info!("Skipping unchanged file: {} (ETag: {})", file_info.path, file_info.etag);
+                info!("Skipping unchanged WebDAV file: {} (ETag: {})", file_info.path, file_info.etag);
                return Ok(false); // Not processed (no change)
            }
-            info!("File has changed: {} (old ETag: {}, new ETag: {})", 
+            info!("WebDAV file has changed: {} (old ETag: {}, new ETag: {})", 
                file_info.path, existing_file.etag, file_info.etag);
        }
        Ok(None) => {
-            info!("New file found: {}", file_info.path);
+            info!("New WebDAV file detected: {}", file_info.path);
        }
        Err(e) => {
-            warn!("Error checking existing file {}: {}", file_info.path, e);
+            warn!("Error checking existing WebDAV file {}: {}", file_info.path, e);
        }
    }
@ -245,20 +249,29 @@ async fn process_single_file(
    // Calculate file hash for deduplication 
    let file_hash = calculate_file_hash(&file_data);
-    // Check if this exact file content already exists in the system
+    // Check if this exact file content already exists for this user
    // This prevents downloading and processing duplicate files from WebDAV
    info!("Checking for duplicate content for user {}: {} (hash: {}, size: {} bytes)", 
        user_id, file_info.name, &file_hash[..8], file_data.len());
    // Query documents with the same file size for this user only
    let size_filter = file_data.len() as i64;
    if let Ok(existing_docs) = state.db.get_documents_by_user_with_role(user_id, crate::models::UserRole::User, 1000, 0).await {
-        for existing_doc in existing_docs {
+        let matching_docs: Vec<_> = existing_docs.into_iter()
-            // Quick size check first (much faster than hash comparison)
+            .filter(|doc| doc.file_size == size_filter)
-            if existing_doc.file_size == file_data.len() as i64 {
+            .collect();
        info!("Found {} documents with same size for user {}", matching_docs.len(), user_id);
        for existing_doc in matching_docs {
            // Read the existing file and compare hashes
            if let Ok(existing_file_data) = tokio::fs::read(&existing_doc.file_path).await {
                let existing_hash = calculate_file_hash(&existing_file_data);
                if file_hash == existing_hash {
-                        info!("Skipping duplicate WebDAV file content: {} (hash: {}, already exists as: {})", 
+                    info!("Found duplicate content for user {}: {} matches existing document {}", 
-                            file_info.name, &file_hash[..8], existing_doc.original_filename);
+                        user_id, file_info.name, existing_doc.original_filename);
-                        // Still record this WebDAV file in the tracking table to prevent re-downloading
+                    // Record this WebDAV file as a duplicate but link to existing document
                    let webdav_file = CreateWebDAVFile {
                        user_id,
                        webdav_path: file_info.path.clone(),
@ -275,9 +288,11 @@ async fn process_single_file(
                        error!("Failed to record duplicate WebDAV file: {}", e);
                    }
                    info!("WebDAV file marked as duplicate_content, skipping processing");
                    return Ok(false); // Not processed (duplicate)
                }
-                }
+            } else {
                warn!("Could not read existing file for hash comparison: {}", existing_doc.file_path);
            }
        }
    }
@ -325,8 +340,10 @@ async fn process_single_file(
    // Queue for OCR processing if enabled
    if enable_background_ocr {
        info!("Background OCR is enabled, queueing document {} for processing", created_document.id);
        match state.db.pool.acquire().await {
-            Ok(conn) => {
+            Ok(_conn) => {
                let queue_service = crate::ocr_queue::OcrQueueService::new(
                    state.db.clone(), 
                    state.db.pool.clone(), 
@ -350,6 +367,8 @@ async fn process_single_file(
                error!("Failed to connect to database for OCR queueing: {}", e);
            }
        }
    } else {
        info!("Background OCR is disabled, skipping OCR queue for document {}", created_document.id);
    }
    Ok(true) // Successfully processed
--- a/src/watcher.rs
+++ b/src/watcher.rs
@ -13,6 +13,16 @@ use crate::{config::Config, db::Database, file_service::FileService, ocr_queue::
 pub async fn start_folder_watcher(config: Config, db: Database) -> Result<()> {
    info!("Starting hybrid folder watcher on: {}", config.watch_folder);
    info!("Upload path configured as: {}", config.upload_path);
    // Debug: Check if paths resolve correctly
    let watch_canonical = std::path::Path::new(&config.watch_folder).canonicalize()
        .unwrap_or_else(|_| std::path::PathBuf::from(&config.watch_folder));
    let upload_canonical = std::path::Path::new(&config.upload_path).canonicalize()
        .unwrap_or_else(|_| std::path::PathBuf::from(&config.upload_path));
    info!("Watch folder canonical path: {:?}", watch_canonical);
    info!("Upload folder canonical path: {:?}", upload_canonical);
    // Initialize services with shared database
    let file_service = FileService::new(config.upload_path.clone());
@ -167,6 +177,7 @@ async fn scan_directory(
    {
        if entry.file_type().is_file() {
            let path = entry.path().to_path_buf();
            debug!("Found file during scan: {:?}", path);
            if let Ok(metadata) = entry.metadata() {
                if let Ok(modified) = metadata.modified() {
@ -307,12 +318,19 @@ async fn process_file(
    // Calculate file hash for deduplication
    let file_hash = calculate_file_hash(&file_data);
-    // Check if this exact file content already exists in the system by comparing
+    // Check if this exact file content already exists for the admin user
-    // against existing files with the same size (performance optimization)
+    debug!("Checking for duplicate content for admin user: {} (hash: {}, size: {} bytes)", 
        filename, &file_hash[..8], file_size);
    // Query documents with the same file size for the admin user only
    if let Ok(existing_docs) = db.get_documents_by_user_with_role(admin_user_id, crate::models::UserRole::Admin, 1000, 0).await {
-        for existing_doc in existing_docs {
+        let matching_docs: Vec<_> = existing_docs.into_iter()
-            // Quick size check first (much faster than hash comparison)
+            .filter(|doc| doc.file_size == file_size)
-            if existing_doc.file_size == file_size {
+            .collect();
        debug!("Found {} documents with same size for admin user", matching_docs.len());
        for existing_doc in matching_docs {
            // Read the existing file and compare hashes
            if let Ok(existing_file_data) = tokio::fs::read(&existing_doc.file_path).await {
                let existing_hash = calculate_file_hash(&existing_file_data);
@ -324,7 +342,6 @@ async fn process_file(
            }
        }
    }
    }
    debug!("File content is unique: {} (hash: {})", filename, &file_hash[..8]);
--- a/src/webdav_scheduler.rs
+++ b/src/webdav_scheduler.rs
@ -143,11 +143,21 @@ impl WebDAVScheduler {
                let elapsed_minutes = elapsed.num_minutes();
                if elapsed_minutes < sync_interval_minutes as i64 {
                    // Only log this occasionally to avoid spam
                    if elapsed_minutes % 10 == 0 {
                        info!("Sync not due for user {} (last sync {} minutes ago, interval {} minutes)", 
                            user_settings.user_id, elapsed_minutes, sync_interval_minutes);
                    }
                    return Ok(false);
                }
                info!("Sync is due for user {} (last sync {} minutes ago, interval {} minutes)", 
                    user_settings.user_id, elapsed_minutes, sync_interval_minutes);
            } else {
                info!("No previous sync found for user {}, sync is due", user_settings.user_id);
            }
        } else {
            info!("No sync state found for user {}, sync is due", user_settings.user_id);
        }
        // Sync is due