Merge pull request #37 from readur/feat/webdav/ignore-duplicates

feat(server): normalize etags from webdav to properly check for file …
2025-06-23 13:20:49 -07:00 · 2025-06-23 13:20:49 -07:00 · 1661138d29
parent 3d93c96c31 de45300c7a
commit 1661138d29
4 changed files with 90 additions and 5 deletions
--- a/migrations/20250623000001_normalize_existing_etags.sql
+++ b/migrations/20250623000001_normalize_existing_etags.sql
@ -0,0 +1,11 @@
+-- Normalize existing ETags in webdav_files table to match new normalization format
+-- This migration ensures that existing ETag values are normalized to prevent
+-- unnecessary re-downloads of unchanged files after the ETag normalization fix
+
+-- Update ETags to remove quotes and W/ prefixes
+UPDATE webdav_files 
+SET etag = TRIM(BOTH '"' FROM TRIM(LEADING 'W/' FROM etag))
+WHERE etag LIKE '"%"' OR etag LIKE 'W/%';
+
+-- Add a comment to document this normalization
+COMMENT ON COLUMN webdav_files.etag IS 'Normalized ETag without quotes or W/ prefix (since migration 20250623000001)';
--- a/src/webdav_xml_parser.rs
+++ b/src/webdav_xml_parser.rs
@ -83,7 +83,7 @@ pub fn parse_propfind_response(xml_text: &str) -> Result<Vec<FileInfo>> {
                                resp.content_type = Some(text.trim().to_string());
                            }
                            "getetag" => {
-                                resp.etag = Some(text.trim().to_string());
+                                resp.etag = Some(normalize_etag(&text));
                            }
                            "status" if in_propstat => {
                                // Check if status is 200 OK
@ -200,6 +200,20 @@ fn parse_http_date(date_str: &str) -> Option<DateTime<Utc>> {
        })
 }

+/// Normalize ETag by removing quotes and weak ETag prefix
+/// This ensures consistent ETag comparison across different WebDAV servers
+/// 
+/// Examples:
+/// - `"abc123"` → `abc123`
+/// - `W/"abc123"` → `abc123`
+/// - `abc123` → `abc123`
+fn normalize_etag(etag: &str) -> String {
+    etag.trim()
+        .trim_start_matches("W/")
+        .trim_matches('"')
+        .to_string()
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@ -231,7 +245,7 @@ mod tests {
        assert_eq!(file.name, "test.pdf");
        assert_eq!(file.size, 1024);
        assert_eq!(file.mime_type, "application/pdf");
-        assert_eq!(file.etag, "\"abc123\"");
+        assert_eq!(file.etag, "abc123");
        assert!(!file.is_directory);
    }

@ -300,6 +314,7 @@ mod tests {
        assert_eq!(file.name, "report.pdf");
        assert_eq!(file.path, "/remote.php/dav/files/admin/Documents/report.pdf");
        assert_eq!(file.size, 2048000);
+        assert_eq!(file.etag, "pdf123"); // ETag should be normalized (quotes removed)
        assert!(file.last_modified.is_some());
    }

@ -337,4 +352,17 @@ mod tests {
        let files = parse_propfind_response(xml).unwrap();
        assert_eq!(files.len(), 0);
    }
+
+    #[test]
+    fn test_normalize_etag() {
+        // Test various ETag formats that WebDAV servers might return
+        assert_eq!(normalize_etag("abc123"), "abc123");
+        assert_eq!(normalize_etag("\"abc123\""), "abc123");
+        assert_eq!(normalize_etag("W/\"abc123\""), "abc123");
+        assert_eq!(normalize_etag("  \"abc123\"  "), "abc123");
+        assert_eq!(normalize_etag("W/\"abc-123-def\""), "abc-123-def");
+        assert_eq!(normalize_etag(""), "");
+        assert_eq!(normalize_etag("\"\""), "");
+        assert_eq!(normalize_etag("W/\"\""), "");
+    }
 }
--- a/tests/webdav_enhanced_unit_tests.rs
+++ b/tests/webdav_enhanced_unit_tests.rs
@ -201,21 +201,21 @@ fn test_webdav_response_parsing_comprehensive() {
    let pdf_file = files.iter().find(|f| f.name == "report.pdf").unwrap();
    assert_eq!(pdf_file.size, 2048000);
    assert_eq!(pdf_file.mime_type, "application/pdf");
-    assert_eq!(pdf_file.etag, "\"pdf123\"");
+    assert_eq!(pdf_file.etag, "pdf123"); // ETag should be normalized (quotes removed)
    assert!(!pdf_file.is_directory);

    // Verify second file (photo.png)
    let png_file = files.iter().find(|f| f.name == "photo.png").unwrap();
    assert_eq!(png_file.size, 768000);
    assert_eq!(png_file.mime_type, "image/png");
-    assert_eq!(png_file.etag, "\"png456\"");
+    assert_eq!(png_file.etag, "png456"); // ETag should be normalized (quotes removed)
    assert!(!png_file.is_directory);

    // Verify third file (unsupported.docx)
    let docx_file = files.iter().find(|f| f.name == "unsupported.docx").unwrap();
    assert_eq!(docx_file.size, 102400);
    assert_eq!(docx_file.mime_type, "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
-    assert_eq!(docx_file.etag, "\"docx789\"");
+    assert_eq!(docx_file.etag, "docx789"); // ETag should be normalized (quotes removed)
    assert!(!docx_file.is_directory);
 }

--- a/tests/webdav_sync_tests.rs
+++ b/tests/webdav_sync_tests.rs
@ -182,6 +182,52 @@ fn test_etag_change_detection() {
    assert_eq!(normalized_etag, old_etag);
 }

+#[test]
+fn test_etag_normalization() {
+    // Test various ETag formats that WebDAV servers might return
+    let test_cases = vec![
+        ("abc123", "abc123"),                    // Plain ETag
+        ("\"abc123\"", "abc123"),                // Quoted ETag
+        ("W/\"abc123\"", "abc123"),              // Weak ETag
+        ("\"abc-123-def\"", "abc-123-def"),      // Quoted with dashes
+        ("W/\"abc-123-def\"", "abc-123-def"),    // Weak ETag with dashes
+    ];
+    
+    for (input, expected) in test_cases {
+        let normalized = input
+            .trim_start_matches("W/")
+            .trim_matches('"');
+        assert_eq!(normalized, expected, 
+            "Failed to normalize ETag: {} -> expected {}", input, expected);
+    }
+}
+
+#[test]
+fn test_etag_comparison_fixes_duplicate_downloads() {
+    // This test demonstrates how ETag normalization prevents unnecessary downloads
+    
+    // Simulate a WebDAV server that returns quoted ETags 
+    let server_etag = "\"file-hash-123\"";
+    
+    // Before fix: stored ETag would have quotes, server ETag would have quotes
+    // After fix: both should be normalized (no quotes)
+    let normalized_server = server_etag.trim_start_matches("W/").trim_matches('"');
+    let normalized_stored = "file-hash-123"; // What would be stored after normalization
+    
+    // These should match after normalization, preventing redownload
+    assert_eq!(normalized_server, normalized_stored, 
+        "Normalized ETags should match to prevent unnecessary redownloads");
+    
+    // Demonstrate the issue that was fixed
+    let old_behavior_would_mismatch = (server_etag != normalized_stored);
+    assert!(old_behavior_would_mismatch, 
+        "Before fix: quoted vs unquoted ETags would cause unnecessary downloads");
+    
+    let new_behavior_matches = (normalized_server == normalized_stored);
+    assert!(new_behavior_matches, 
+        "After fix: normalized ETags match, preventing unnecessary downloads");
+}
+
 #[test]
 fn test_path_normalization() {
    let test_paths = vec![