diff --git a/migrations/20250623000001_normalize_existing_etags.sql b/migrations/20250623000001_normalize_existing_etags.sql new file mode 100644 index 0000000..3198d35 --- /dev/null +++ b/migrations/20250623000001_normalize_existing_etags.sql @@ -0,0 +1,11 @@ +-- Normalize existing ETags in webdav_files table to match new normalization format +-- This migration ensures that existing ETag values are normalized to prevent +-- unnecessary re-downloads of unchanged files after the ETag normalization fix + +-- Update ETags to remove quotes and W/ prefixes +UPDATE webdav_files +SET etag = TRIM(BOTH '"' FROM TRIM(LEADING 'W/' FROM etag)) +WHERE etag LIKE '"%"' OR etag LIKE 'W/%'; + +-- Add a comment to document this normalization +COMMENT ON COLUMN webdav_files.etag IS 'Normalized ETag without quotes or W/ prefix (since migration 20250623000001)'; \ No newline at end of file diff --git a/src/webdav_xml_parser.rs b/src/webdav_xml_parser.rs index 45ed279..328a09c 100644 --- a/src/webdav_xml_parser.rs +++ b/src/webdav_xml_parser.rs @@ -83,7 +83,7 @@ pub fn parse_propfind_response(xml_text: &str) -> Result> { resp.content_type = Some(text.trim().to_string()); } "getetag" => { - resp.etag = Some(text.trim().to_string()); + resp.etag = Some(normalize_etag(&text)); } "status" if in_propstat => { // Check if status is 200 OK @@ -200,6 +200,20 @@ fn parse_http_date(date_str: &str) -> Option> { }) } +/// Normalize ETag by removing quotes and weak ETag prefix +/// This ensures consistent ETag comparison across different WebDAV servers +/// +/// Examples: +/// - `"abc123"` → `abc123` +/// - `W/"abc123"` → `abc123` +/// - `abc123` → `abc123` +fn normalize_etag(etag: &str) -> String { + etag.trim() + .trim_start_matches("W/") + .trim_matches('"') + .to_string() +} + #[cfg(test)] mod tests { use super::*; @@ -231,7 +245,7 @@ mod tests { assert_eq!(file.name, "test.pdf"); assert_eq!(file.size, 1024); assert_eq!(file.mime_type, "application/pdf"); - assert_eq!(file.etag, "\"abc123\""); + assert_eq!(file.etag, "abc123"); assert!(!file.is_directory); } @@ -300,6 +314,7 @@ mod tests { assert_eq!(file.name, "report.pdf"); assert_eq!(file.path, "/remote.php/dav/files/admin/Documents/report.pdf"); assert_eq!(file.size, 2048000); + assert_eq!(file.etag, "pdf123"); // ETag should be normalized (quotes removed) assert!(file.last_modified.is_some()); } @@ -337,4 +352,17 @@ mod tests { let files = parse_propfind_response(xml).unwrap(); assert_eq!(files.len(), 0); } + + #[test] + fn test_normalize_etag() { + // Test various ETag formats that WebDAV servers might return + assert_eq!(normalize_etag("abc123"), "abc123"); + assert_eq!(normalize_etag("\"abc123\""), "abc123"); + assert_eq!(normalize_etag("W/\"abc123\""), "abc123"); + assert_eq!(normalize_etag(" \"abc123\" "), "abc123"); + assert_eq!(normalize_etag("W/\"abc-123-def\""), "abc-123-def"); + assert_eq!(normalize_etag(""), ""); + assert_eq!(normalize_etag("\"\""), ""); + assert_eq!(normalize_etag("W/\"\""), ""); + } } \ No newline at end of file diff --git a/tests/webdav_enhanced_unit_tests.rs b/tests/webdav_enhanced_unit_tests.rs index 112c0f6..d229205 100644 --- a/tests/webdav_enhanced_unit_tests.rs +++ b/tests/webdav_enhanced_unit_tests.rs @@ -201,21 +201,21 @@ fn test_webdav_response_parsing_comprehensive() { let pdf_file = files.iter().find(|f| f.name == "report.pdf").unwrap(); assert_eq!(pdf_file.size, 2048000); assert_eq!(pdf_file.mime_type, "application/pdf"); - assert_eq!(pdf_file.etag, "\"pdf123\""); + assert_eq!(pdf_file.etag, "pdf123"); // ETag should be normalized (quotes removed) assert!(!pdf_file.is_directory); // Verify second file (photo.png) let png_file = files.iter().find(|f| f.name == "photo.png").unwrap(); assert_eq!(png_file.size, 768000); assert_eq!(png_file.mime_type, "image/png"); - assert_eq!(png_file.etag, "\"png456\""); + assert_eq!(png_file.etag, "png456"); // ETag should be normalized (quotes removed) assert!(!png_file.is_directory); // Verify third file (unsupported.docx) let docx_file = files.iter().find(|f| f.name == "unsupported.docx").unwrap(); assert_eq!(docx_file.size, 102400); assert_eq!(docx_file.mime_type, "application/vnd.openxmlformats-officedocument.wordprocessingml.document"); - assert_eq!(docx_file.etag, "\"docx789\""); + assert_eq!(docx_file.etag, "docx789"); // ETag should be normalized (quotes removed) assert!(!docx_file.is_directory); } diff --git a/tests/webdav_sync_tests.rs b/tests/webdav_sync_tests.rs index 24fae0c..ed343ab 100644 --- a/tests/webdav_sync_tests.rs +++ b/tests/webdav_sync_tests.rs @@ -182,6 +182,52 @@ fn test_etag_change_detection() { assert_eq!(normalized_etag, old_etag); } +#[test] +fn test_etag_normalization() { + // Test various ETag formats that WebDAV servers might return + let test_cases = vec![ + ("abc123", "abc123"), // Plain ETag + ("\"abc123\"", "abc123"), // Quoted ETag + ("W/\"abc123\"", "abc123"), // Weak ETag + ("\"abc-123-def\"", "abc-123-def"), // Quoted with dashes + ("W/\"abc-123-def\"", "abc-123-def"), // Weak ETag with dashes + ]; + + for (input, expected) in test_cases { + let normalized = input + .trim_start_matches("W/") + .trim_matches('"'); + assert_eq!(normalized, expected, + "Failed to normalize ETag: {} -> expected {}", input, expected); + } +} + +#[test] +fn test_etag_comparison_fixes_duplicate_downloads() { + // This test demonstrates how ETag normalization prevents unnecessary downloads + + // Simulate a WebDAV server that returns quoted ETags + let server_etag = "\"file-hash-123\""; + + // Before fix: stored ETag would have quotes, server ETag would have quotes + // After fix: both should be normalized (no quotes) + let normalized_server = server_etag.trim_start_matches("W/").trim_matches('"'); + let normalized_stored = "file-hash-123"; // What would be stored after normalization + + // These should match after normalization, preventing redownload + assert_eq!(normalized_server, normalized_stored, + "Normalized ETags should match to prevent unnecessary redownloads"); + + // Demonstrate the issue that was fixed + let old_behavior_would_mismatch = (server_etag != normalized_stored); + assert!(old_behavior_would_mismatch, + "Before fix: quoted vs unquoted ETags would cause unnecessary downloads"); + + let new_behavior_matches = (normalized_server == normalized_stored); + assert!(new_behavior_matches, + "After fix: normalized ETags match, preventing unnecessary downloads"); +} + #[test] fn test_path_normalization() { let test_paths = vec![