Merge pull request #37 from readur/feat/webdav/ignore-duplicates

feat(server): normalize etags from webdav to properly check for file …
This commit is contained in:
Jon Fuller 2025-06-23 13:20:49 -07:00 committed by GitHub
commit 1661138d29
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 90 additions and 5 deletions

View File

@ -0,0 +1,11 @@
-- Normalize existing ETags in webdav_files table to match new normalization format
-- This migration ensures that existing ETag values are normalized to prevent
-- unnecessary re-downloads of unchanged files after the ETag normalization fix
-- Update ETags to remove quotes and W/ prefixes
UPDATE webdav_files
SET etag = TRIM(BOTH '"' FROM TRIM(LEADING 'W/' FROM etag))
WHERE etag LIKE '"%"' OR etag LIKE 'W/%';
-- Add a comment to document this normalization
COMMENT ON COLUMN webdav_files.etag IS 'Normalized ETag without quotes or W/ prefix (since migration 20250623000001)';

View File

@ -83,7 +83,7 @@ pub fn parse_propfind_response(xml_text: &str) -> Result<Vec<FileInfo>> {
resp.content_type = Some(text.trim().to_string());
}
"getetag" => {
resp.etag = Some(text.trim().to_string());
resp.etag = Some(normalize_etag(&text));
}
"status" if in_propstat => {
// Check if status is 200 OK
@ -200,6 +200,20 @@ fn parse_http_date(date_str: &str) -> Option<DateTime<Utc>> {
})
}
/// Normalize ETag by removing quotes and weak ETag prefix
/// This ensures consistent ETag comparison across different WebDAV servers
///
/// Examples:
/// - `"abc123"` → `abc123`
/// - `W/"abc123"` → `abc123`
/// - `abc123` → `abc123`
fn normalize_etag(etag: &str) -> String {
etag.trim()
.trim_start_matches("W/")
.trim_matches('"')
.to_string()
}
#[cfg(test)]
mod tests {
use super::*;
@ -231,7 +245,7 @@ mod tests {
assert_eq!(file.name, "test.pdf");
assert_eq!(file.size, 1024);
assert_eq!(file.mime_type, "application/pdf");
assert_eq!(file.etag, "\"abc123\"");
assert_eq!(file.etag, "abc123");
assert!(!file.is_directory);
}
@ -300,6 +314,7 @@ mod tests {
assert_eq!(file.name, "report.pdf");
assert_eq!(file.path, "/remote.php/dav/files/admin/Documents/report.pdf");
assert_eq!(file.size, 2048000);
assert_eq!(file.etag, "pdf123"); // ETag should be normalized (quotes removed)
assert!(file.last_modified.is_some());
}
@ -337,4 +352,17 @@ mod tests {
let files = parse_propfind_response(xml).unwrap();
assert_eq!(files.len(), 0);
}
#[test]
fn test_normalize_etag() {
// Test various ETag formats that WebDAV servers might return
assert_eq!(normalize_etag("abc123"), "abc123");
assert_eq!(normalize_etag("\"abc123\""), "abc123");
assert_eq!(normalize_etag("W/\"abc123\""), "abc123");
assert_eq!(normalize_etag(" \"abc123\" "), "abc123");
assert_eq!(normalize_etag("W/\"abc-123-def\""), "abc-123-def");
assert_eq!(normalize_etag(""), "");
assert_eq!(normalize_etag("\"\""), "");
assert_eq!(normalize_etag("W/\"\""), "");
}
}

View File

@ -201,21 +201,21 @@ fn test_webdav_response_parsing_comprehensive() {
let pdf_file = files.iter().find(|f| f.name == "report.pdf").unwrap();
assert_eq!(pdf_file.size, 2048000);
assert_eq!(pdf_file.mime_type, "application/pdf");
assert_eq!(pdf_file.etag, "\"pdf123\"");
assert_eq!(pdf_file.etag, "pdf123"); // ETag should be normalized (quotes removed)
assert!(!pdf_file.is_directory);
// Verify second file (photo.png)
let png_file = files.iter().find(|f| f.name == "photo.png").unwrap();
assert_eq!(png_file.size, 768000);
assert_eq!(png_file.mime_type, "image/png");
assert_eq!(png_file.etag, "\"png456\"");
assert_eq!(png_file.etag, "png456"); // ETag should be normalized (quotes removed)
assert!(!png_file.is_directory);
// Verify third file (unsupported.docx)
let docx_file = files.iter().find(|f| f.name == "unsupported.docx").unwrap();
assert_eq!(docx_file.size, 102400);
assert_eq!(docx_file.mime_type, "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
assert_eq!(docx_file.etag, "\"docx789\"");
assert_eq!(docx_file.etag, "docx789"); // ETag should be normalized (quotes removed)
assert!(!docx_file.is_directory);
}

View File

@ -182,6 +182,52 @@ fn test_etag_change_detection() {
assert_eq!(normalized_etag, old_etag);
}
#[test]
fn test_etag_normalization() {
// Test various ETag formats that WebDAV servers might return
let test_cases = vec![
("abc123", "abc123"), // Plain ETag
("\"abc123\"", "abc123"), // Quoted ETag
("W/\"abc123\"", "abc123"), // Weak ETag
("\"abc-123-def\"", "abc-123-def"), // Quoted with dashes
("W/\"abc-123-def\"", "abc-123-def"), // Weak ETag with dashes
];
for (input, expected) in test_cases {
let normalized = input
.trim_start_matches("W/")
.trim_matches('"');
assert_eq!(normalized, expected,
"Failed to normalize ETag: {} -> expected {}", input, expected);
}
}
#[test]
fn test_etag_comparison_fixes_duplicate_downloads() {
// This test demonstrates how ETag normalization prevents unnecessary downloads
// Simulate a WebDAV server that returns quoted ETags
let server_etag = "\"file-hash-123\"";
// Before fix: stored ETag would have quotes, server ETag would have quotes
// After fix: both should be normalized (no quotes)
let normalized_server = server_etag.trim_start_matches("W/").trim_matches('"');
let normalized_stored = "file-hash-123"; // What would be stored after normalization
// These should match after normalization, preventing redownload
assert_eq!(normalized_server, normalized_stored,
"Normalized ETags should match to prevent unnecessary redownloads");
// Demonstrate the issue that was fixed
let old_behavior_would_mismatch = (server_etag != normalized_stored);
assert!(old_behavior_would_mismatch,
"Before fix: quoted vs unquoted ETags would cause unnecessary downloads");
let new_behavior_matches = (normalized_server == normalized_stored);
assert!(new_behavior_matches,
"After fix: normalized ETags match, preventing unnecessary downloads");
}
#[test]
fn test_path_normalization() {
let test_paths = vec![