862 lines
36 KiB
Rust
862 lines
36 KiB
Rust
use readur::services::webdav::{WebDAVService, WebDAVConfig};
|
|
use readur::models::FileIngestionInfo;
|
|
use tokio;
|
|
use chrono::Utc;
|
|
|
|
// Helper function to create test WebDAV service
|
|
fn create_test_webdav_service() -> WebDAVService {
|
|
let config = WebDAVConfig {
|
|
server_url: "https://test.example.com".to_string(),
|
|
username: "testuser".to_string(),
|
|
password: "testpass".to_string(),
|
|
watch_folders: vec!["/Documents".to_string()],
|
|
file_extensions: vec!["pdf".to_string(), "png".to_string()],
|
|
timeout_seconds: 30,
|
|
server_type: Some("nextcloud".to_string()),
|
|
};
|
|
|
|
WebDAVService::new(config).unwrap()
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_empty_directory_tracking() {
|
|
let service = create_test_webdav_service();
|
|
|
|
// Test completely empty directory
|
|
let empty_files: Vec<FileIngestionInfo> = vec![];
|
|
|
|
// Test the directory extraction logic that happens in track_subdirectories_recursively
|
|
let mut all_directories = std::collections::BTreeSet::new();
|
|
|
|
for file in &empty_files {
|
|
if file.is_directory {
|
|
all_directories.insert(file.path.clone());
|
|
} else {
|
|
let mut path_parts: Vec<&str> = file.path.split('/').collect();
|
|
path_parts.pop();
|
|
|
|
let mut current_path = String::new();
|
|
for part in path_parts {
|
|
if !part.is_empty() {
|
|
if !current_path.is_empty() {
|
|
current_path.push('/');
|
|
}
|
|
current_path.push_str(part);
|
|
all_directories.insert(current_path.clone());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
assert!(all_directories.is_empty(), "Empty file list should result in no directories");
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_directory_only_structure() {
|
|
let service = create_test_webdav_service();
|
|
|
|
// Test structure with only directories, no files
|
|
let directory_only_files = vec![
|
|
FileIngestionInfo {
|
|
relative_path: "/Documents".to_string(),
|
|
full_path: "/Documents".to_string(),
|
|
#[allow(deprecated)]
|
|
path: "/Documents".to_string(),
|
|
name: "Documents".to_string(),
|
|
size: 0,
|
|
mime_type: "".to_string(),
|
|
last_modified: Some(Utc::now()),
|
|
etag: "docs-etag".to_string(),
|
|
is_directory: true,
|
|
created_at: Some(Utc::now()),
|
|
permissions: Some(755),
|
|
owner: Some("admin".to_string()),
|
|
group: Some("admin".to_string()),
|
|
metadata: None,
|
|
},
|
|
FileIngestionInfo {
|
|
relative_path: "/Documents/Empty1".to_string(),
|
|
full_path: "/Documents/Empty1".to_string(),
|
|
#[allow(deprecated)]
|
|
path: "/Documents/Empty1".to_string(),
|
|
name: "Empty1".to_string(),
|
|
size: 0,
|
|
mime_type: "".to_string(),
|
|
last_modified: Some(Utc::now()),
|
|
etag: "empty1-etag".to_string(),
|
|
is_directory: true,
|
|
created_at: Some(Utc::now()),
|
|
permissions: Some(755),
|
|
owner: Some("admin".to_string()),
|
|
group: Some("admin".to_string()),
|
|
metadata: None,
|
|
},
|
|
FileIngestionInfo {
|
|
relative_path: "/Documents/Empty2".to_string(),
|
|
full_path: "/Documents/Empty2".to_string(),
|
|
#[allow(deprecated)]
|
|
path: "/Documents/Empty2".to_string(),
|
|
name: "Empty2".to_string(),
|
|
size: 0,
|
|
mime_type: "".to_string(),
|
|
last_modified: Some(Utc::now()),
|
|
etag: "empty2-etag".to_string(),
|
|
is_directory: true,
|
|
created_at: Some(Utc::now()),
|
|
permissions: Some(755),
|
|
owner: Some("admin".to_string()),
|
|
group: Some("admin".to_string()),
|
|
metadata: None,
|
|
},
|
|
];
|
|
|
|
// Test file counting for empty directories
|
|
let root_files: Vec<_> = directory_only_files.iter()
|
|
.filter(|f| !f.is_directory && service.is_direct_child(&f.path, "/Documents"))
|
|
.collect();
|
|
assert_eq!(root_files.len(), 0, "Root directory should have no files");
|
|
|
|
let empty1_files: Vec<_> = directory_only_files.iter()
|
|
.filter(|f| !f.is_directory && service.is_direct_child(&f.path, "/Documents/Empty1"))
|
|
.collect();
|
|
assert_eq!(empty1_files.len(), 0, "Empty1 directory should have no files");
|
|
|
|
// Test subdirectory counting
|
|
let root_subdirs: Vec<_> = directory_only_files.iter()
|
|
.filter(|f| f.is_directory && service.is_direct_child(&f.path, "/Documents"))
|
|
.collect();
|
|
assert_eq!(root_subdirs.len(), 2, "Root should have 2 subdirectories");
|
|
|
|
// Test size calculation for empty directories
|
|
let root_size: i64 = directory_only_files.iter()
|
|
.filter(|f| !f.is_directory && service.is_direct_child(&f.path, "/Documents"))
|
|
.map(|f| f.size)
|
|
.sum();
|
|
assert_eq!(root_size, 0, "Empty directory should have zero total size");
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_very_deep_nesting() {
|
|
let service = create_test_webdav_service();
|
|
|
|
// Create a very deeply nested structure (10 levels deep)
|
|
let deep_path = "/Documents/L1/L2/L3/L4/L5/L6/L7/L8/L9/L10";
|
|
let file_path = format!("{}/deep-file.pdf", deep_path);
|
|
|
|
let deep_files = vec![
|
|
// All directories in the path
|
|
FileIngestionInfo {
|
|
relative_path: "/Documents".to_string(),
|
|
full_path: "/Documents".to_string(),
|
|
#[allow(deprecated)]
|
|
path: "/Documents".to_string(),
|
|
name: "Documents".to_string(),
|
|
size: 0,
|
|
mime_type: "".to_string(),
|
|
last_modified: Some(Utc::now()),
|
|
etag: "docs-etag".to_string(),
|
|
is_directory: true,
|
|
created_at: Some(Utc::now()),
|
|
permissions: Some(755),
|
|
owner: Some("admin".to_string()),
|
|
group: Some("admin".to_string()),
|
|
metadata: None,
|
|
},
|
|
// All intermediate directories from L1 to L10
|
|
FileIngestionInfo {
|
|
relative_path: "/Documents/L1".to_string(),
|
|
full_path: "/Documents/L1".to_string(),
|
|
#[allow(deprecated)]
|
|
path: "/Documents/L1".to_string(),
|
|
name: "L1".to_string(),
|
|
size: 0,
|
|
mime_type: "".to_string(),
|
|
last_modified: Some(Utc::now()),
|
|
etag: "l1-etag".to_string(),
|
|
is_directory: true,
|
|
created_at: Some(Utc::now()),
|
|
permissions: Some(755),
|
|
owner: Some("admin".to_string()),
|
|
group: Some("admin".to_string()),
|
|
metadata: None,
|
|
},
|
|
FileIngestionInfo {
|
|
relative_path: "/Documents/L1/L2".to_string(),
|
|
full_path: "/Documents/L1/L2".to_string(),
|
|
#[allow(deprecated)]
|
|
path: "/Documents/L1/L2".to_string(),
|
|
name: "L2".to_string(),
|
|
size: 0,
|
|
mime_type: "".to_string(),
|
|
last_modified: Some(Utc::now()),
|
|
etag: "l2-etag".to_string(),
|
|
is_directory: true,
|
|
created_at: Some(Utc::now()),
|
|
permissions: Some(755),
|
|
owner: Some("admin".to_string()),
|
|
group: Some("admin".to_string()),
|
|
metadata: None,
|
|
},
|
|
FileIngestionInfo {
|
|
relative_path: "/Documents/L1/L2/L3".to_string(),
|
|
full_path: "/Documents/L1/L2/L3".to_string(),
|
|
#[allow(deprecated)]
|
|
path: "/Documents/L1/L2/L3".to_string(),
|
|
name: "L3".to_string(),
|
|
size: 0,
|
|
mime_type: "".to_string(),
|
|
last_modified: Some(Utc::now()),
|
|
etag: "l3-etag".to_string(),
|
|
is_directory: true,
|
|
created_at: Some(Utc::now()),
|
|
permissions: Some(755),
|
|
owner: Some("admin".to_string()),
|
|
group: Some("admin".to_string()),
|
|
metadata: None,
|
|
},
|
|
FileIngestionInfo {
|
|
relative_path: deep_path.to_string(),
|
|
full_path: deep_path.to_string(),
|
|
#[allow(deprecated)]
|
|
path: deep_path.to_string(),
|
|
name: "L10".to_string(),
|
|
size: 0,
|
|
mime_type: "".to_string(),
|
|
last_modified: Some(Utc::now()),
|
|
etag: "l10-etag".to_string(),
|
|
is_directory: true,
|
|
created_at: Some(Utc::now()),
|
|
permissions: Some(755),
|
|
owner: Some("admin".to_string()),
|
|
group: Some("admin".to_string()),
|
|
metadata: None,
|
|
},
|
|
// File at the deepest level
|
|
FileIngestionInfo {
|
|
relative_path: file_path.clone(),
|
|
full_path: file_path.clone(),
|
|
#[allow(deprecated)]
|
|
path: file_path.clone(),
|
|
name: "deep-file.pdf".to_string(),
|
|
size: 1024000,
|
|
mime_type: "application/pdf".to_string(),
|
|
last_modified: Some(Utc::now()),
|
|
etag: "deep-file-etag".to_string(),
|
|
is_directory: false,
|
|
created_at: Some(Utc::now()),
|
|
permissions: Some(644),
|
|
owner: Some("admin".to_string()),
|
|
group: Some("admin".to_string()),
|
|
metadata: None,
|
|
},
|
|
];
|
|
|
|
// Test is_direct_child for deep paths
|
|
assert!(service.is_direct_child(&file_path, deep_path), "File should be direct child of deepest directory");
|
|
assert!(!service.is_direct_child(&file_path, "/Documents"), "File should not be direct child of root");
|
|
assert!(!service.is_direct_child(&file_path, "/Documents/L1"), "File should not be direct child of L1");
|
|
|
|
// Test directory extraction from deep file path
|
|
let mut all_directories = std::collections::BTreeSet::new();
|
|
|
|
for file in &deep_files {
|
|
if file.is_directory {
|
|
all_directories.insert(file.path.clone());
|
|
} else {
|
|
let mut path_parts: Vec<&str> = file.path.split('/').collect();
|
|
path_parts.pop(); // Remove filename
|
|
|
|
let mut current_path = String::new();
|
|
for part in path_parts {
|
|
if !part.is_empty() {
|
|
if !current_path.is_empty() {
|
|
current_path.push('/');
|
|
}
|
|
current_path.push_str(part);
|
|
all_directories.insert(current_path.clone());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Should extract all intermediate directories
|
|
assert!(all_directories.contains("/Documents"));
|
|
assert!(all_directories.contains("/Documents/L1"));
|
|
assert!(all_directories.contains("/Documents/L1/L2"));
|
|
assert!(all_directories.contains(deep_path));
|
|
assert!(all_directories.len() >= 11, "Should track all intermediate directories"); // /Documents + L1 + L2 + L3 + L10 + extracted from file path = 11+ directories total
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_special_characters_in_paths() {
|
|
let service = create_test_webdav_service();
|
|
|
|
// Test paths with special characters, spaces, unicode
|
|
let special_files = vec![
|
|
FileIngestionInfo {
|
|
relative_path: "/Documents/Folder with spaces".to_string(),
|
|
full_path: "/Documents/Folder with spaces".to_string(),
|
|
#[allow(deprecated)]
|
|
path: "/Documents/Folder with spaces".to_string(),
|
|
name: "Folder with spaces".to_string(),
|
|
size: 0,
|
|
mime_type: "".to_string(),
|
|
last_modified: Some(Utc::now()),
|
|
etag: "spaces-etag".to_string(),
|
|
is_directory: true,
|
|
created_at: Some(Utc::now()),
|
|
permissions: Some(755),
|
|
owner: Some("admin".to_string()),
|
|
group: Some("admin".to_string()),
|
|
metadata: None,
|
|
},
|
|
FileIngestionInfo {
|
|
relative_path: "/Documents/Folder-with-dashes".to_string(),
|
|
full_path: "/Documents/Folder-with-dashes".to_string(),
|
|
#[allow(deprecated)]
|
|
path: "/Documents/Folder-with-dashes".to_string(),
|
|
name: "Folder-with-dashes".to_string(),
|
|
size: 0,
|
|
mime_type: "".to_string(),
|
|
last_modified: Some(Utc::now()),
|
|
etag: "dashes-etag".to_string(),
|
|
is_directory: true,
|
|
created_at: Some(Utc::now()),
|
|
permissions: Some(755),
|
|
owner: Some("admin".to_string()),
|
|
group: Some("admin".to_string()),
|
|
metadata: None,
|
|
},
|
|
FileIngestionInfo {
|
|
relative_path: "/Documents/Документы".to_string(), // Cyrillic
|
|
full_path: "/Documents/Документы".to_string(),
|
|
#[allow(deprecated)]
|
|
path: "/Documents/Документы".to_string(),
|
|
name: "Документы".to_string(),
|
|
size: 0,
|
|
mime_type: "".to_string(),
|
|
last_modified: Some(Utc::now()),
|
|
etag: "cyrillic-etag".to_string(),
|
|
is_directory: true,
|
|
created_at: Some(Utc::now()),
|
|
permissions: Some(755),
|
|
owner: Some("admin".to_string()),
|
|
group: Some("admin".to_string()),
|
|
metadata: None,
|
|
},
|
|
FileIngestionInfo {
|
|
relative_path: "/Documents/Folder with spaces/file with spaces.pdf".to_string(),
|
|
full_path: "/Documents/Folder with spaces/file with spaces.pdf".to_string(),
|
|
#[allow(deprecated)]
|
|
path: "/Documents/Folder with spaces/file with spaces.pdf".to_string(),
|
|
name: "file with spaces.pdf".to_string(),
|
|
size: 1024000,
|
|
mime_type: "application/pdf".to_string(),
|
|
last_modified: Some(Utc::now()),
|
|
etag: "space-file-etag".to_string(),
|
|
is_directory: false,
|
|
created_at: Some(Utc::now()),
|
|
permissions: Some(644),
|
|
owner: Some("admin".to_string()),
|
|
group: Some("admin".to_string()),
|
|
metadata: None,
|
|
},
|
|
];
|
|
|
|
// Test is_direct_child with special characters
|
|
assert!(service.is_direct_child("/Documents/Folder with spaces/file with spaces.pdf", "/Documents/Folder with spaces"));
|
|
assert!(service.is_direct_child("/Documents/Folder with spaces", "/Documents"));
|
|
assert!(service.is_direct_child("/Documents/Документы", "/Documents"));
|
|
|
|
// Test file counting with special characters
|
|
let spaces_folder_files: Vec<_> = special_files.iter()
|
|
.filter(|f| !f.is_directory && service.is_direct_child(&f.path, "/Documents/Folder with spaces"))
|
|
.collect();
|
|
assert_eq!(spaces_folder_files.len(), 1);
|
|
assert_eq!(spaces_folder_files[0].name, "file with spaces.pdf");
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_edge_case_path_patterns() {
|
|
let service = create_test_webdav_service();
|
|
|
|
// Test various edge case paths
|
|
let edge_case_tests = vec![
|
|
// (child_path, parent_path, expected_result)
|
|
("/Documents/file.pdf", "/Documents", true),
|
|
("/Documents/", "/Documents", false), // Same path
|
|
("/Documents", "/Documents", false), // Same path
|
|
("/Documents/subfolder/", "/Documents", true), // Trailing slash
|
|
("/Documents/subfolder", "/Documents/", true), // Parent with trailing slash
|
|
("/Documenting/file.pdf", "/Documents", false), // Prefix but not parent
|
|
("/Documents/file.pdf", "/Doc", false), // Partial parent match
|
|
("", "/Documents", false), // Empty child
|
|
("/Documents/file.pdf", "", false), // Not direct child of root (nested in Documents)
|
|
("/file.pdf", "", true), // Root level file
|
|
("/Documents/file.pdf", "/", false), // Not direct child of root (nested in Documents)
|
|
("/file.pdf", "/", true), // Root level file with slash parent
|
|
("//Documents//file.pdf", "/Documents", false), // Double slashes (malformed)
|
|
("/Documents/./file.pdf", "/Documents", false), // Dot notation (should be normalized first)
|
|
("/Documents/../file.pdf", "", false), // Parent notation (should be normalized first)
|
|
];
|
|
|
|
for (child, parent, expected) in edge_case_tests {
|
|
let result = service.is_direct_child(child, parent);
|
|
assert_eq!(
|
|
result, expected,
|
|
"is_direct_child('{}', '{}') expected {}, got {}",
|
|
child, parent, expected, result
|
|
);
|
|
}
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_etag_normalization_edge_cases() {
|
|
use readur::webdav_xml_parser::{normalize_etag, ParsedETag, ETagFormat};
|
|
|
|
// Test comprehensive ETag normalization with our custom parser
|
|
let etag_test_cases = vec![
|
|
// Standard formats
|
|
(r#""simple-etag""#, "simple-etag", ETagFormat::Simple),
|
|
(r#"W/"weak-etag""#, "weak-etag", ETagFormat::Simple),
|
|
(r#"no-quotes"#, "no-quotes", ETagFormat::Simple),
|
|
|
|
// Microsoft Exchange/Outlook ETags
|
|
(r#""1*SPReplicationID{GUID}*1*#ReplDigest{digest}""#, r#"1*SPReplicationID{GUID}*1*#ReplDigest{digest}"#, ETagFormat::Complex),
|
|
(r#""CQAAABYAAABi2uhEGy3pQaAw2GZp2vhOAAAP1234""#, "CQAAABYAAABi2uhEGy3pQaAw2GZp2vhOAAAP1234", ETagFormat::Complex),
|
|
|
|
// Apache/nginx server ETags (hex hashes)
|
|
(r#""5f9c2a3b-1a2b""#, "5f9c2a3b-1a2b", ETagFormat::Simple),
|
|
(r#""deadbeef-cafe-babe""#, "deadbeef-cafe-babe", ETagFormat::Simple),
|
|
(r#""0x7fffffff""#, "0x7fffffff", ETagFormat::Simple),
|
|
|
|
// NextCloud/ownCloud ETags (often UUIDs or complex strings)
|
|
(r#""8f7e3d2c1b0a9e8d7c6b5a49382716e5""#, "8f7e3d2c1b0a9e8d7c6b5a49382716e5", ETagFormat::Hash),
|
|
(r#""mtime:1234567890size:1024""#, "mtime:1234567890size:1024", ETagFormat::Timestamp),
|
|
(r#""59a8b0c7:1648483200:123456""#, "59a8b0c7:1648483200:123456", ETagFormat::Timestamp),
|
|
|
|
// Google Drive ETags (base64-like)
|
|
(r#""MTY0ODQ4MzIwMA==""#, "MTY0ODQ4MzIwMA==", ETagFormat::Encoded),
|
|
(r#""BwKBCgMEBQYHCAkKCwwNDg8Q""#, "BwKBCgMEBQYHCAkKCwwNDg8Q", ETagFormat::Unknown),
|
|
|
|
// AWS S3 ETags (MD5 hashes, sometimes with part info)
|
|
(r#""d41d8cd98f00b204e9800998ecf8427e""#, "d41d8cd98f00b204e9800998ecf8427e", ETagFormat::Hash),
|
|
(r#""098f6bcd4621d373cade4e832627b4f6-1""#, "098f6bcd4621d373cade4e832627b4f6-1", ETagFormat::Unknown),
|
|
(r#""e1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6-128""#, "e1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6-128", ETagFormat::Unknown),
|
|
|
|
// Dropbox ETags (custom format)
|
|
(r#""rev:a1b2c3d4e5f6""#, "rev:a1b2c3d4e5f6", ETagFormat::Versioned),
|
|
(r#""dbid:12345:67890""#, "dbid:12345:67890", ETagFormat::Unknown),
|
|
|
|
// SharePoint ETags (complex Microsoft format)
|
|
(r#""{BB31-4321-ABCD-EFGH-1234567890AB},4""#, "{BB31-4321-ABCD-EFGH-1234567890AB},4", ETagFormat::UUID),
|
|
(r#""1*SPFileVersion{12345}*1*#ChangeKey{ABCD}""#, "1*SPFileVersion{12345}*1*#ChangeKey{ABCD}", ETagFormat::Complex),
|
|
|
|
// Box.com ETags
|
|
(r#""v12345678""#, "v12345678", ETagFormat::Versioned),
|
|
(r#""etag_abc123def456""#, "etag_abc123def456", ETagFormat::Simple),
|
|
|
|
// Weird whitespace and formatting
|
|
(r#" " spaced-etag " "#, " spaced-etag ", ETagFormat::Simple),
|
|
(r#"W/ "weak-with-spaces" "#, "weak-with-spaces", ETagFormat::Simple),
|
|
|
|
// Special characters and escaping
|
|
(r#""etag+with+plus+signs""#, "etag+with+plus+signs", ETagFormat::Unknown),
|
|
(r#""etag&with&ersands""#, "etag&with&ersands", ETagFormat::Unknown),
|
|
(r#""etag<with>brackets""#, "etag<with>brackets", ETagFormat::XMLLike),
|
|
|
|
// Unicode and international characters
|
|
(r#""unicode-ж-etag""#, "unicode-ж-etag", ETagFormat::Unknown),
|
|
(r#""unicode-日本語-etag""#, "unicode-日本語-etag", ETagFormat::Unknown),
|
|
(r#""unicode-🚀-emoji""#, "unicode-🚀-emoji", ETagFormat::Unknown),
|
|
|
|
// Version-based ETags
|
|
(r#""v1.2.3.4""#, "v1.2.3.4", ETagFormat::Versioned),
|
|
(r#""revision-12345-branch-main""#, "revision-12345-branch-main", ETagFormat::Versioned),
|
|
(r#""commit-sha256-abcdef1234567890""#, "commit-sha256-abcdef1234567890", ETagFormat::Versioned),
|
|
|
|
// Timestamp-based ETags
|
|
(r#""ts:1648483200""#, "ts:1648483200", ETagFormat::Timestamp),
|
|
(r#""2024-01-15T10:30:00Z""#, "2024-01-15T10:30:00Z", ETagFormat::Timestamp),
|
|
(r#""epoch-1648483200-nanos-123456789""#, "epoch-1648483200-nanos-123456789", ETagFormat::Timestamp),
|
|
|
|
// Compressed/encoded ETags
|
|
(r#""gzip:d41d8cd98f00b204e9800998ecf8427e""#, "gzip:d41d8cd98f00b204e9800998ecf8427e", ETagFormat::Encoded),
|
|
(r#""base64:VGVzdCBjb250ZW50""#, "base64:VGVzdCBjb250ZW50", ETagFormat::Encoded),
|
|
(r#""url-encoded:Hello%20World%21""#, "url-encoded:Hello%20World%21", ETagFormat::Encoded),
|
|
|
|
// Security/cryptographic ETags
|
|
(r#""2aae6c35c94fcfb415dbe95f408b9ce91ee846ed""#, "2aae6c35c94fcfb415dbe95f408b9ce91ee846ed", ETagFormat::Hash),
|
|
(r#""315f5bdb76d078c43b8ac0064e4a0164612b1fce77c869345bfc94c75894edd3""#, "315f5bdb76d078c43b8ac0064e4a0164612b1fce77c869345bfc94c75894edd3", ETagFormat::Hash),
|
|
(r#""hmac-sha256:abcdef1234567890""#, "hmac-sha256:abcdef1234567890", ETagFormat::Unknown),
|
|
|
|
// Mixed case and variations
|
|
(r#"W/"Mixed-Case-ETAG""#, "Mixed-Case-ETAG", ETagFormat::Simple),
|
|
(r#""UPPERCASE-ETAG""#, "UPPERCASE-ETAG", ETagFormat::Simple),
|
|
(r#""lowercase-etag""#, "lowercase-etag", ETagFormat::Simple),
|
|
(r#""CamelCaseEtag""#, "CamelCaseEtag", ETagFormat::Simple),
|
|
|
|
// Numeric ETags
|
|
(r#""12345""#, "12345", ETagFormat::Simple),
|
|
(r#""1.23456789""#, "1.23456789", ETagFormat::Unknown),
|
|
(r#""-42""#, "-42", ETagFormat::Unknown),
|
|
(r#""0""#, "0", ETagFormat::Simple),
|
|
|
|
// Path-like ETags (some servers include path info)
|
|
(r#""/path/to/file.txt:v123""#, "/path/to/file.txt:v123", ETagFormat::PathBased),
|
|
(r#""./relative/path/file.pdf""#, "./relative/path/file.pdf", ETagFormat::PathBased),
|
|
(r#""file://localhost/tmp/test.doc""#, "file://localhost/tmp/test.doc", ETagFormat::PathBased),
|
|
|
|
// JSON-like ETags (some APIs embed JSON)
|
|
(r#""{"version":1,"modified":"2024-01-15"}""#, r#"{"version":1,"modified":"2024-01-15"}"#, ETagFormat::JSONLike),
|
|
(r#""[1,2,3,4,5]""#, "[1,2,3,4,5]", ETagFormat::Unknown),
|
|
|
|
// XML-like ETags
|
|
(r#""<etag version=\"1\">abc123</etag>""#, r#"<etag version=\"1\">abc123</etag>"#, ETagFormat::XMLLike),
|
|
|
|
// Query parameter style ETags
|
|
(r#""?v=123&t=1648483200&u=admin""#, "?v=123&t=1648483200&u=admin", ETagFormat::Unknown),
|
|
|
|
// Multiple weak indicators (malformed but seen in the wild)
|
|
(r#"W/W/"double-weak""#, "double-weak", ETagFormat::Simple),
|
|
(r#"w/"lowercase-weak""#, "lowercase-weak", ETagFormat::Simple),
|
|
];
|
|
|
|
println!("Testing {} ETag cases with our comprehensive parser...", etag_test_cases.len());
|
|
|
|
for (input_etag, expected_normalized, expected_format) in etag_test_cases {
|
|
// Test direct normalization
|
|
let normalized = normalize_etag(input_etag);
|
|
assert_eq!(
|
|
normalized, expected_normalized,
|
|
"ETag normalization failed for input '{}': expected '{}', got '{}'",
|
|
input_etag, expected_normalized, normalized
|
|
);
|
|
|
|
// Test full parsing with classification
|
|
let parsed = ParsedETag::parse(input_etag);
|
|
assert_eq!(
|
|
parsed.normalized, expected_normalized,
|
|
"ParsedETag normalization failed for input '{}': expected '{}', got '{}'",
|
|
input_etag, expected_normalized, parsed.normalized
|
|
);
|
|
|
|
// Check if weak detection works
|
|
let expected_weak = input_etag.trim().starts_with("W/") || input_etag.trim().starts_with("w/");
|
|
assert_eq!(
|
|
parsed.is_weak, expected_weak,
|
|
"Weak ETag detection failed for input '{}': expected weak={}, got weak={}",
|
|
input_etag, expected_weak, parsed.is_weak
|
|
);
|
|
|
|
// Verify format classification (allow some flexibility for complex cases)
|
|
if parsed.format_type != expected_format {
|
|
println!("Format classification differs for '{}': expected {:?}, got {:?} (this may be acceptable)",
|
|
input_etag, expected_format, parsed.format_type);
|
|
}
|
|
}
|
|
|
|
println!("✅ All ETag normalization tests passed!");
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_etag_parser_equivalence_and_comparison() {
|
|
use readur::webdav_xml_parser::{ParsedETag, ETagFormat};
|
|
|
|
println!("Testing ETag parser equivalence detection...");
|
|
|
|
// Test ETag equivalence (ignoring weak/strong differences)
|
|
let test_cases = vec![
|
|
// Same ETag in different formats should be equivalent
|
|
(r#""abc123""#, r#"W/"abc123""#, true),
|
|
(r#"W/"weak-etag""#, r#""weak-etag""#, true),
|
|
(r#"w/"lowercase-weak""#, r#"W/"lowercase-weak""#, true),
|
|
|
|
// Different ETags should not be equivalent
|
|
(r#""abc123""#, r#""def456""#, false),
|
|
(r#"W/"weak1""#, r#"W/"weak2""#, false),
|
|
|
|
// Complex ETags should work
|
|
(r#""8f7e3d2c1b0a9e8d7c6b5a49382716e5""#, r#"W/"8f7e3d2c1b0a9e8d7c6b5a49382716e5""#, true),
|
|
(r#""mtime:1234567890size:1024""#, r#""mtime:1234567890size:1024""#, true),
|
|
];
|
|
|
|
for (etag1, etag2, should_be_equivalent) in test_cases {
|
|
let parsed1 = ParsedETag::parse(etag1);
|
|
let parsed2 = ParsedETag::parse(etag2);
|
|
|
|
let is_equivalent = parsed1.is_equivalent(&parsed2);
|
|
assert_eq!(
|
|
is_equivalent, should_be_equivalent,
|
|
"ETag equivalence test failed for '{}' vs '{}': expected {}, got {}",
|
|
etag1, etag2, should_be_equivalent, is_equivalent
|
|
);
|
|
|
|
println!(" ✓ '{}' {} '{}' = {}", etag1,
|
|
if is_equivalent { "≡" } else { "≢" }, etag2, is_equivalent);
|
|
}
|
|
|
|
// Test format classification accuracy
|
|
let format_tests = vec![
|
|
(r#""d41d8cd98f00b204e9800998ecf8427e""#, ETagFormat::Hash), // MD5
|
|
(r#""2aae6c35c94fcfb415dbe95f408b9ce91ee846ed""#, ETagFormat::Hash), // SHA1
|
|
(r#""315f5bdb76d078c43b8ac0064e4a0164612b1fce77c869345bfc94c75894edd3""#, ETagFormat::Hash), // SHA256
|
|
(r#""BB31-4321-ABCD-EFGH-1234567890AB""#, ETagFormat::UUID),
|
|
(r#""1*SPReplicationID{GUID}*1*#ReplDigest{digest}""#, ETagFormat::Complex),
|
|
(r#""rev:a1b2c3d4e5f6""#, ETagFormat::Versioned),
|
|
(r#""mtime:1648483200size:1024""#, ETagFormat::Timestamp),
|
|
(r#""MTY0ODQ4MzIwMA==""#, ETagFormat::Encoded),
|
|
(r#""/path/to/file.txt:v123""#, ETagFormat::PathBased),
|
|
(r#""{"version":1}""#, ETagFormat::JSONLike),
|
|
(r#""<etag>abc</etag>""#, ETagFormat::XMLLike),
|
|
(r#""simple-etag""#, ETagFormat::Simple),
|
|
];
|
|
|
|
println!("\nTesting ETag format classification...");
|
|
for (etag, expected_format) in format_tests {
|
|
let parsed = ParsedETag::parse(etag);
|
|
if parsed.format_type == expected_format {
|
|
println!(" ✓ '{}' correctly classified as {:?}", etag, expected_format);
|
|
} else {
|
|
println!(" ⚠ '{}' classified as {:?}, expected {:?}", etag, parsed.format_type, expected_format);
|
|
}
|
|
}
|
|
|
|
// Test comparison string generation (for fuzzy matching)
|
|
let comparison_tests = vec![
|
|
(r#""etag-with-\"internal\"-quotes""#, "etag-with-internal-quotes"),
|
|
(r#"" spaced-etag ""#, "spaced-etag"),
|
|
(r#"W/"weak-etag""#, "weak-etag"),
|
|
];
|
|
|
|
println!("\nTesting ETag comparison string generation...");
|
|
for (etag, expected_comparison) in comparison_tests {
|
|
let parsed = ParsedETag::parse(etag);
|
|
let comparison = parsed.comparison_string();
|
|
assert_eq!(
|
|
comparison, expected_comparison,
|
|
"Comparison string failed for '{}': expected '{}', got '{}'",
|
|
etag, expected_comparison, comparison
|
|
);
|
|
println!(" ✓ '{}' → '{}'", etag, comparison);
|
|
}
|
|
|
|
println!("✅ All ETag parser advanced tests passed!");
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_malformed_xml_responses() {
|
|
let service = create_test_webdav_service();
|
|
|
|
// Test various malformed XML responses
|
|
let malformed_xml_cases = vec![
|
|
// Empty response
|
|
"",
|
|
// Not XML
|
|
"not xml at all",
|
|
// Incomplete XML
|
|
"<?xml version=\"1.0\"?><d:multistatus",
|
|
// Missing ETag
|
|
r#"<?xml version="1.0"?>
|
|
<d:multistatus xmlns:d="DAV:">
|
|
<d:response>
|
|
<d:href>/remote.php/dav/files/admin/Documents/</d:href>
|
|
<d:propstat>
|
|
<d:prop>
|
|
<d:displayname>Documents</d:displayname>
|
|
</d:prop>
|
|
<d:status>HTTP/1.1 200 OK</d:status>
|
|
</d:propstat>
|
|
</d:response>
|
|
</d:multistatus>"#,
|
|
// Empty ETag
|
|
r#"<?xml version="1.0"?>
|
|
<d:multistatus xmlns:d="DAV:">
|
|
<d:response>
|
|
<d:href>/remote.php/dav/files/admin/Documents/</d:href>
|
|
<d:propstat>
|
|
<d:prop>
|
|
<d:getetag></d:getetag>
|
|
</d:prop>
|
|
<d:status>HTTP/1.1 200 OK</d:status>
|
|
</d:propstat>
|
|
</d:response>
|
|
</d:multistatus>"#,
|
|
// Invalid XML characters
|
|
r#"<?xml version="1.0"?>
|
|
<d:multistatus xmlns:d="DAV:">
|
|
<d:response>
|
|
<d:href>/remote.php/dav/files/admin/Documents/</d:href>
|
|
<d:propstat>
|
|
<d:prop>
|
|
<d:getetag>"invalid-xml--char"</d:getetag>
|
|
</d:prop>
|
|
</d:propstat>
|
|
</d:response>
|
|
</d:multistatus>"#,
|
|
];
|
|
|
|
for (i, malformed_xml) in malformed_xml_cases.iter().enumerate() {
|
|
// Use the actual XML parser function instead of the non-existent service method
|
|
use readur::webdav_xml_parser::parse_propfind_response;
|
|
let result = parse_propfind_response(malformed_xml);
|
|
// Some malformed XML might still be parsed successfully by the robust parser
|
|
// The key is that it doesn't crash - either error or success is acceptable
|
|
match result {
|
|
Ok(files) => {
|
|
if let Some(file) = files.first() {
|
|
println!("Malformed XML case {} parsed successfully with ETag: {}", i, file.etag);
|
|
} else {
|
|
println!("Malformed XML case {} parsed successfully but no files found", i);
|
|
}
|
|
}
|
|
Err(e) => {
|
|
println!("Malformed XML case {} failed as expected: {}", i, e);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_large_directory_structures() {
|
|
let service = create_test_webdav_service();
|
|
|
|
// Generate a large directory structure (1000 directories, 5000 files)
|
|
let mut large_files = Vec::new();
|
|
|
|
// Add root directory
|
|
large_files.push(FileIngestionInfo {
|
|
relative_path: "/Documents".to_string(),
|
|
full_path: "/Documents".to_string(),
|
|
#[allow(deprecated)]
|
|
path: "/Documents".to_string(),
|
|
name: "Documents".to_string(),
|
|
size: 0,
|
|
mime_type: "".to_string(),
|
|
last_modified: Some(Utc::now()),
|
|
etag: "root-etag".to_string(),
|
|
is_directory: true,
|
|
created_at: Some(Utc::now()),
|
|
permissions: Some(755),
|
|
owner: Some("admin".to_string()),
|
|
group: Some("admin".to_string()),
|
|
metadata: None,
|
|
});
|
|
|
|
// Generate 100 level-1 directories, each with 10 subdirectories and 50 files
|
|
for i in 0..100 {
|
|
let level1_path = format!("/Documents/Dir{:03}", i);
|
|
|
|
// Add level-1 directory
|
|
large_files.push(FileIngestionInfo {
|
|
relative_path: level1_path.clone(),
|
|
full_path: level1_path.clone(),
|
|
#[allow(deprecated)]
|
|
path: level1_path.clone(),
|
|
name: format!("Dir{:03}", i),
|
|
size: 0,
|
|
mime_type: "".to_string(),
|
|
last_modified: Some(Utc::now()),
|
|
etag: format!("dir{}-etag", i),
|
|
is_directory: true,
|
|
created_at: Some(Utc::now()),
|
|
permissions: Some(755),
|
|
owner: Some("admin".to_string()),
|
|
group: Some("admin".to_string()),
|
|
metadata: None,
|
|
});
|
|
|
|
// Add 10 subdirectories
|
|
for j in 0..10 {
|
|
let level2_path = format!("{}/SubDir{:02}", level1_path, j);
|
|
large_files.push(FileIngestionInfo {
|
|
relative_path: level2_path.clone(),
|
|
full_path: level2_path.clone(),
|
|
#[allow(deprecated)]
|
|
path: level2_path.clone(),
|
|
name: format!("SubDir{:02}", j),
|
|
size: 0,
|
|
mime_type: "".to_string(),
|
|
last_modified: Some(Utc::now()),
|
|
etag: format!("subdir{}-{}-etag", i, j),
|
|
is_directory: true,
|
|
created_at: Some(Utc::now()),
|
|
permissions: Some(755),
|
|
owner: Some("admin".to_string()),
|
|
group: Some("admin".to_string()),
|
|
metadata: None,
|
|
});
|
|
|
|
// Add 5 files in each subdirectory
|
|
for k in 0..5 {
|
|
let file_path = format!("{}/file{:02}.pdf", level2_path, k);
|
|
large_files.push(FileIngestionInfo {
|
|
relative_path: file_path.clone(),
|
|
full_path: file_path.clone(),
|
|
#[allow(deprecated)]
|
|
path: file_path,
|
|
name: format!("file{:02}.pdf", k),
|
|
size: 1024 * (k + 1) as i64,
|
|
mime_type: "application/pdf".to_string(),
|
|
last_modified: Some(Utc::now()),
|
|
etag: format!("file{}-{}-{}-etag", i, j, k),
|
|
is_directory: false,
|
|
created_at: Some(Utc::now()),
|
|
permissions: Some(644),
|
|
owner: Some("admin".to_string()),
|
|
group: Some("admin".to_string()),
|
|
metadata: None,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
println!("Generated {} files and directories", large_files.len());
|
|
|
|
// Test performance of directory extraction
|
|
let start_time = std::time::Instant::now();
|
|
let mut all_directories = std::collections::BTreeSet::new();
|
|
|
|
for file in &large_files {
|
|
if file.is_directory {
|
|
all_directories.insert(file.path.clone());
|
|
} else {
|
|
let mut path_parts: Vec<&str> = file.path.split('/').collect();
|
|
path_parts.pop();
|
|
|
|
let mut current_path = String::new();
|
|
for part in path_parts {
|
|
if !part.is_empty() {
|
|
if !current_path.is_empty() {
|
|
current_path.push('/');
|
|
}
|
|
current_path.push_str(part);
|
|
all_directories.insert(current_path.clone());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
let extraction_time = start_time.elapsed();
|
|
println!("Extracted {} directories in {:?}", all_directories.len(), extraction_time);
|
|
|
|
// Verify structure - the actual count includes extraction from file paths too
|
|
assert!(all_directories.len() >= 1101, "Should have at least 1101 directories"); // 1 root + 100 level1 + 1000 level2 + extracted paths
|
|
assert!(all_directories.contains("/Documents"));
|
|
assert!(all_directories.contains("/Documents/Dir000"));
|
|
assert!(all_directories.contains("/Documents/Dir099/SubDir09"));
|
|
|
|
// Test performance of file counting for a specific directory
|
|
let count_start = std::time::Instant::now();
|
|
let test_dir = "/Documents/Dir050";
|
|
let direct_files: Vec<_> = large_files.iter()
|
|
.filter(|f| !f.is_directory && service.is_direct_child(&f.path, test_dir))
|
|
.collect();
|
|
let count_time = count_start.elapsed();
|
|
|
|
println!("Counted {} direct files in {} in {:?}", direct_files.len(), test_dir, count_time);
|
|
|
|
// Performance assertions
|
|
assert!(extraction_time.as_millis() < 1000, "Directory extraction too slow: {:?}", extraction_time);
|
|
assert!(count_time.as_millis() < 100, "File counting too slow: {:?}", count_time);
|
|
} |