feat(webdav): also fix the parser to include directories, and add tests
This commit is contained in:
parent
c1dbd06df2
commit
a381cdd12c
|
|
@ -10,7 +10,7 @@ use crate::models::{
|
||||||
FileInfo, WebDAVConnectionResult, WebDAVCrawlEstimate, WebDAVFolderInfo,
|
FileInfo, WebDAVConnectionResult, WebDAVCrawlEstimate, WebDAVFolderInfo,
|
||||||
WebDAVTestConnection,
|
WebDAVTestConnection,
|
||||||
};
|
};
|
||||||
use crate::webdav_xml_parser::parse_propfind_response;
|
use crate::webdav_xml_parser::{parse_propfind_response, parse_propfind_response_with_directories};
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct WebDAVConfig {
|
pub struct WebDAVConfig {
|
||||||
|
|
@ -613,6 +613,307 @@ impl WebDAVService {
|
||||||
!remaining.contains('/') && !remaining.is_empty()
|
!remaining.contains('/') && !remaining.is_empty()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Perform targeted re-scanning of only specific paths that have changed
|
||||||
|
pub async fn discover_files_targeted_rescan(&self, paths_to_scan: &[String], user_id: uuid::Uuid, state: &crate::AppState) -> Result<Vec<FileInfo>> {
|
||||||
|
info!("🎯 Starting targeted re-scan for {} specific paths", paths_to_scan.len());
|
||||||
|
|
||||||
|
let mut all_files = Vec::new();
|
||||||
|
|
||||||
|
for path in paths_to_scan {
|
||||||
|
info!("🔍 Targeted scan of: {}", path);
|
||||||
|
|
||||||
|
// Check if this specific path has changed
|
||||||
|
match self.check_directory_etag(path).await {
|
||||||
|
Ok(current_etag) => {
|
||||||
|
// Check cached ETag
|
||||||
|
let needs_scan = match state.db.get_webdav_directory(user_id, path).await {
|
||||||
|
Ok(Some(stored_dir)) => {
|
||||||
|
if stored_dir.directory_etag != current_etag {
|
||||||
|
info!("🔄 Path {} changed (old: {}, new: {})", path, stored_dir.directory_etag, current_etag);
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
debug!("✅ Path {} unchanged (ETag: {})", path, current_etag);
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(None) => {
|
||||||
|
info!("🆕 New path {} detected", path);
|
||||||
|
true
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
warn!("Database error for path {}: {}", path, e);
|
||||||
|
true // Scan on error to be safe
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if needs_scan {
|
||||||
|
// Use shallow scan for this specific directory only
|
||||||
|
match self.discover_files_in_folder_shallow(path).await {
|
||||||
|
Ok(mut path_files) => {
|
||||||
|
info!("📂 Found {} files in changed path {}", path_files.len(), path);
|
||||||
|
all_files.append(&mut path_files);
|
||||||
|
|
||||||
|
// Update tracking for this specific path
|
||||||
|
self.update_single_directory_tracking(path, &path_files, user_id, state).await;
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("Failed to scan changed path {}: {}", path, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
warn!("Failed to check ETag for path {}: {}, skipping", path, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("🎯 Targeted re-scan completed: {} total files found", all_files.len());
|
||||||
|
Ok(all_files)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Discover files in a single directory only (shallow scan, no recursion)
|
||||||
|
async fn discover_files_in_folder_shallow(&self, folder_path: &str) -> Result<Vec<FileInfo>> {
|
||||||
|
let folder_url = format!("{}{}", self.base_webdav_url, folder_path);
|
||||||
|
|
||||||
|
debug!("Shallow scan of directory: {}", folder_url);
|
||||||
|
|
||||||
|
let propfind_body = r#"<?xml version="1.0"?>
|
||||||
|
<d:propfind xmlns:d="DAV:">
|
||||||
|
<d:allprop/>
|
||||||
|
</d:propfind>"#;
|
||||||
|
|
||||||
|
let response = self.client
|
||||||
|
.request(Method::from_bytes(b"PROPFIND").unwrap(), &folder_url)
|
||||||
|
.basic_auth(&self.config.username, Some(&self.config.password))
|
||||||
|
.header("Depth", "1") // Only direct children, not recursive
|
||||||
|
.header("Content-Type", "application/xml")
|
||||||
|
.body(propfind_body)
|
||||||
|
.send()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
if !response.status().is_success() {
|
||||||
|
return Err(anyhow!("PROPFIND request failed: {}", response.status()));
|
||||||
|
}
|
||||||
|
|
||||||
|
let response_text = response.text().await?;
|
||||||
|
debug!("Shallow WebDAV response received, parsing...");
|
||||||
|
|
||||||
|
// Use the parser that includes directories for shallow scans
|
||||||
|
self.parse_webdav_response_with_directories(&response_text)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Update tracking for a single directory without recursive processing
|
||||||
|
async fn update_single_directory_tracking(&self, directory_path: &str, files: &[FileInfo], user_id: uuid::Uuid, state: &crate::AppState) {
|
||||||
|
// Get the directory's own ETag
|
||||||
|
let dir_etag = files.iter()
|
||||||
|
.find(|f| f.is_directory && f.path == directory_path)
|
||||||
|
.map(|f| f.etag.clone())
|
||||||
|
.unwrap_or_else(|| {
|
||||||
|
warn!("No ETag found for directory {}, using timestamp-based fallback", directory_path);
|
||||||
|
chrono::Utc::now().timestamp().to_string()
|
||||||
|
});
|
||||||
|
|
||||||
|
// Count direct files in this directory only
|
||||||
|
let direct_files: Vec<_> = files.iter()
|
||||||
|
.filter(|f| !f.is_directory && self.is_direct_child(&f.path, directory_path))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let file_count = direct_files.len() as i64;
|
||||||
|
let total_size_bytes = direct_files.iter().map(|f| f.size).sum::<i64>();
|
||||||
|
|
||||||
|
let directory_record = crate::models::CreateWebDAVDirectory {
|
||||||
|
user_id,
|
||||||
|
directory_path: directory_path.to_string(),
|
||||||
|
directory_etag: dir_etag.clone(),
|
||||||
|
file_count,
|
||||||
|
total_size_bytes,
|
||||||
|
};
|
||||||
|
|
||||||
|
match state.db.create_or_update_webdav_directory(&directory_record).await {
|
||||||
|
Ok(_) => {
|
||||||
|
info!("📊 Updated single directory tracking: {} ({} files, {} bytes, ETag: {})",
|
||||||
|
directory_path, file_count, total_size_bytes, dir_etag);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("Failed to update single directory tracking for {}: {}", directory_path, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get a list of directories that need targeted scanning based on recent changes
|
||||||
|
pub async fn get_directories_needing_scan(&self, user_id: uuid::Uuid, state: &crate::AppState, max_age_hours: i64) -> Result<Vec<String>> {
|
||||||
|
let cutoff_time = chrono::Utc::now() - chrono::Duration::hours(max_age_hours);
|
||||||
|
|
||||||
|
match state.db.list_webdav_directories(user_id).await {
|
||||||
|
Ok(directories) => {
|
||||||
|
let stale_dirs: Vec<String> = directories.iter()
|
||||||
|
.filter(|dir| dir.last_scanned_at < cutoff_time)
|
||||||
|
.map(|dir| dir.directory_path.clone())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
info!("🕒 Found {} directories not scanned in last {} hours", stale_dirs.len(), max_age_hours);
|
||||||
|
Ok(stale_dirs)
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("Failed to get directories needing scan: {}", e);
|
||||||
|
Err(e.into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Smart sync mode that combines multiple optimization strategies
|
||||||
|
pub async fn discover_files_smart_sync(&self, watch_folders: &[String], user_id: uuid::Uuid, state: &crate::AppState) -> Result<Vec<FileInfo>> {
|
||||||
|
info!("🧠 Starting smart sync for {} watch folders", watch_folders.len());
|
||||||
|
|
||||||
|
let mut all_files = Vec::new();
|
||||||
|
|
||||||
|
for folder_path in watch_folders {
|
||||||
|
info!("🔍 Smart sync processing folder: {}", folder_path);
|
||||||
|
|
||||||
|
// Step 1: Try optimized discovery first (checks directory ETag)
|
||||||
|
let optimized_result = self.discover_files_in_folder_optimized(folder_path, user_id, state).await;
|
||||||
|
|
||||||
|
match optimized_result {
|
||||||
|
Ok(files) => {
|
||||||
|
if !files.is_empty() {
|
||||||
|
info!("✅ Optimized discovery found {} files in {}", files.len(), folder_path);
|
||||||
|
all_files.extend(files);
|
||||||
|
} else {
|
||||||
|
info!("🔍 Directory {} unchanged, checking for stale subdirectories", folder_path);
|
||||||
|
|
||||||
|
// Step 2: Check for stale subdirectories that need targeted scanning
|
||||||
|
let stale_dirs = self.get_stale_subdirectories(folder_path, user_id, state, 24).await?;
|
||||||
|
|
||||||
|
if !stale_dirs.is_empty() {
|
||||||
|
info!("🎯 Found {} stale subdirectories, performing targeted scan", stale_dirs.len());
|
||||||
|
let targeted_files = self.discover_files_targeted_rescan(&stale_dirs, user_id, state).await?;
|
||||||
|
all_files.extend(targeted_files);
|
||||||
|
} else {
|
||||||
|
info!("✅ All subdirectories of {} are fresh, no scan needed", folder_path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
warn!("Optimized discovery failed for {}, falling back to full scan: {}", folder_path, e);
|
||||||
|
// Fallback to traditional full scan
|
||||||
|
match self.discover_files_in_folder(folder_path).await {
|
||||||
|
Ok(files) => {
|
||||||
|
info!("📂 Fallback scan found {} files in {}", files.len(), folder_path);
|
||||||
|
all_files.extend(files);
|
||||||
|
}
|
||||||
|
Err(fallback_error) => {
|
||||||
|
error!("Both optimized and fallback scans failed for {}: {}", folder_path, fallback_error);
|
||||||
|
return Err(fallback_error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("🧠 Smart sync completed: {} total files discovered", all_files.len());
|
||||||
|
Ok(all_files)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get subdirectories of a parent that haven't been scanned recently
|
||||||
|
async fn get_stale_subdirectories(&self, parent_path: &str, user_id: uuid::Uuid, state: &crate::AppState, max_age_hours: i64) -> Result<Vec<String>> {
|
||||||
|
let cutoff_time = chrono::Utc::now() - chrono::Duration::hours(max_age_hours);
|
||||||
|
|
||||||
|
match state.db.list_webdav_directories(user_id).await {
|
||||||
|
Ok(directories) => {
|
||||||
|
let stale_subdirs: Vec<String> = directories.iter()
|
||||||
|
.filter(|dir| {
|
||||||
|
dir.directory_path.starts_with(parent_path) &&
|
||||||
|
dir.directory_path != parent_path &&
|
||||||
|
dir.last_scanned_at < cutoff_time
|
||||||
|
})
|
||||||
|
.map(|dir| dir.directory_path.clone())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
debug!("🕒 Found {} stale subdirectories under {} (not scanned in {} hours)",
|
||||||
|
stale_subdirs.len(), parent_path, max_age_hours);
|
||||||
|
Ok(stale_subdirs)
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("Failed to get stale subdirectories: {}", e);
|
||||||
|
Err(e.into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Perform incremental sync - only scan directories that have actually changed
|
||||||
|
pub async fn discover_files_incremental(&self, watch_folders: &[String], user_id: uuid::Uuid, state: &crate::AppState) -> Result<Vec<FileInfo>> {
|
||||||
|
info!("⚡ Starting incremental sync for {} watch folders", watch_folders.len());
|
||||||
|
|
||||||
|
let mut changed_files = Vec::new();
|
||||||
|
let mut unchanged_count = 0;
|
||||||
|
let mut changed_count = 0;
|
||||||
|
|
||||||
|
for folder_path in watch_folders {
|
||||||
|
// Check directory ETag to see if it changed
|
||||||
|
match self.check_directory_etag(folder_path).await {
|
||||||
|
Ok(current_etag) => {
|
||||||
|
let needs_scan = match state.db.get_webdav_directory(user_id, folder_path).await {
|
||||||
|
Ok(Some(stored_dir)) => {
|
||||||
|
if stored_dir.directory_etag != current_etag {
|
||||||
|
info!("🔄 Directory {} changed (ETag: {} → {})", folder_path, stored_dir.directory_etag, current_etag);
|
||||||
|
changed_count += 1;
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
debug!("✅ Directory {} unchanged (ETag: {})", folder_path, current_etag);
|
||||||
|
unchanged_count += 1;
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(None) => {
|
||||||
|
info!("🆕 New directory {} detected", folder_path);
|
||||||
|
changed_count += 1;
|
||||||
|
true
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
warn!("Database error for {}: {}, scanning to be safe", folder_path, e);
|
||||||
|
changed_count += 1;
|
||||||
|
true
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if needs_scan {
|
||||||
|
// Directory changed - perform targeted scan
|
||||||
|
match self.discover_files_in_folder_optimized(folder_path, user_id, state).await {
|
||||||
|
Ok(mut files) => {
|
||||||
|
info!("📂 Incremental scan found {} files in changed directory {}", files.len(), folder_path);
|
||||||
|
changed_files.append(&mut files);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("Failed incremental scan of {}: {}", folder_path, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Directory unchanged - just update scan timestamp
|
||||||
|
let update = crate::models::UpdateWebDAVDirectory {
|
||||||
|
directory_etag: current_etag,
|
||||||
|
last_scanned_at: chrono::Utc::now(),
|
||||||
|
file_count: 0, // Will be updated by the database layer
|
||||||
|
total_size_bytes: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Err(e) = state.db.update_webdav_directory(user_id, folder_path, &update).await {
|
||||||
|
warn!("Failed to update scan timestamp for {}: {}", folder_path, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("Failed to check directory ETag for {}: {}", folder_path, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("⚡ Incremental sync completed: {} unchanged, {} changed, {} total files found",
|
||||||
|
unchanged_count, changed_count, changed_files.len());
|
||||||
|
|
||||||
|
Ok(changed_files)
|
||||||
|
}
|
||||||
|
|
||||||
/// Check subdirectories individually for changes when parent directory is unchanged
|
/// Check subdirectories individually for changes when parent directory is unchanged
|
||||||
async fn check_subdirectories_for_changes(&self, parent_path: &str, user_id: uuid::Uuid, state: &crate::AppState) -> Result<Vec<FileInfo>> {
|
async fn check_subdirectories_for_changes(&self, parent_path: &str, user_id: uuid::Uuid, state: &crate::AppState) -> Result<Vec<FileInfo>> {
|
||||||
// Get all known subdirectories from database
|
// Get all known subdirectories from database
|
||||||
|
|
@ -806,6 +1107,12 @@ impl WebDAVService {
|
||||||
parse_propfind_response(xml_text)
|
parse_propfind_response(xml_text)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parse WebDAV response including both files and directories
|
||||||
|
/// Used for shallow directory scans where we need to track directory structure
|
||||||
|
pub fn parse_webdav_response_with_directories(&self, xml_text: &str) -> Result<Vec<FileInfo>> {
|
||||||
|
parse_propfind_response_with_directories(xml_text)
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn download_file(&self, file_path: &str) -> Result<Vec<u8>> {
|
pub async fn download_file(&self, file_path: &str) -> Result<Vec<u8>> {
|
||||||
self.retry_with_backoff("download_file", || {
|
self.retry_with_backoff("download_file", || {
|
||||||
self.download_file_impl(file_path)
|
self.download_file_impl(file_path)
|
||||||
|
|
|
||||||
|
|
@ -246,6 +246,225 @@ pub fn parse_propfind_response(xml_text: &str) -> Result<Vec<FileInfo>> {
|
||||||
Ok(files)
|
Ok(files)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parse PROPFIND response including both files and directories
|
||||||
|
/// This is used for shallow directory scans where we need to track directory structure
|
||||||
|
pub fn parse_propfind_response_with_directories(xml_text: &str) -> Result<Vec<FileInfo>> {
|
||||||
|
let mut reader = Reader::from_str(xml_text);
|
||||||
|
reader.config_mut().trim_text(true);
|
||||||
|
|
||||||
|
let mut files = Vec::new();
|
||||||
|
let mut current_response: Option<PropFindResponse> = None;
|
||||||
|
let mut current_element = String::new();
|
||||||
|
let mut in_response = false;
|
||||||
|
let mut in_propstat = false;
|
||||||
|
let mut in_prop = false;
|
||||||
|
let mut in_resourcetype = false;
|
||||||
|
let mut status_ok = false;
|
||||||
|
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
match reader.read_event_into(&mut buf) {
|
||||||
|
Ok(Event::Start(e)) | Ok(Event::Empty(e)) => {
|
||||||
|
let name = get_local_name(&e)?;
|
||||||
|
|
||||||
|
match name.as_str() {
|
||||||
|
"response" => {
|
||||||
|
in_response = true;
|
||||||
|
current_response = Some(PropFindResponse::default());
|
||||||
|
}
|
||||||
|
"propstat" => {
|
||||||
|
in_propstat = true;
|
||||||
|
}
|
||||||
|
"prop" => {
|
||||||
|
in_prop = true;
|
||||||
|
}
|
||||||
|
"resourcetype" => {
|
||||||
|
in_resourcetype = true;
|
||||||
|
}
|
||||||
|
"collection" if in_resourcetype => {
|
||||||
|
if let Some(ref mut resp) = current_response {
|
||||||
|
resp.is_collection = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
current_element = name;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(Event::Text(e)) => {
|
||||||
|
let text = e.unescape()?.to_string();
|
||||||
|
|
||||||
|
if in_response && !text.trim().is_empty() {
|
||||||
|
if let Some(ref mut resp) = current_response {
|
||||||
|
match current_element.as_str() {
|
||||||
|
"href" => {
|
||||||
|
resp.href = text.trim().to_string();
|
||||||
|
}
|
||||||
|
"displayname" => {
|
||||||
|
resp.displayname = text.trim().to_string();
|
||||||
|
}
|
||||||
|
"getcontentlength" => {
|
||||||
|
resp.content_length = text.trim().parse().ok();
|
||||||
|
}
|
||||||
|
"getlastmodified" => {
|
||||||
|
resp.last_modified = Some(text.trim().to_string());
|
||||||
|
}
|
||||||
|
"getcontenttype" => {
|
||||||
|
resp.content_type = Some(text.trim().to_string());
|
||||||
|
}
|
||||||
|
"getetag" => {
|
||||||
|
resp.etag = Some(normalize_etag(&text));
|
||||||
|
}
|
||||||
|
"creationdate" => {
|
||||||
|
resp.creation_date = Some(text.trim().to_string());
|
||||||
|
}
|
||||||
|
"owner" => {
|
||||||
|
resp.owner = Some(text.trim().to_string());
|
||||||
|
}
|
||||||
|
"group" => {
|
||||||
|
resp.group = Some(text.trim().to_string());
|
||||||
|
}
|
||||||
|
"status" if in_propstat => {
|
||||||
|
// Check if status is 200 OK
|
||||||
|
if text.contains("200") {
|
||||||
|
status_ok = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
// Store any other properties as generic metadata
|
||||||
|
if !text.trim().is_empty() && in_prop {
|
||||||
|
if resp.metadata.is_none() {
|
||||||
|
resp.metadata = Some(serde_json::Value::Object(serde_json::Map::new()));
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(serde_json::Value::Object(ref mut map)) = resp.metadata {
|
||||||
|
match current_element.as_str() {
|
||||||
|
"permissions" | "oc:permissions" => {
|
||||||
|
resp.permissions = Some(text.trim().to_string());
|
||||||
|
map.insert("permissions_raw".to_string(), serde_json::Value::String(text.trim().to_string()));
|
||||||
|
}
|
||||||
|
"fileid" | "oc:fileid" => {
|
||||||
|
map.insert("file_id".to_string(), serde_json::Value::String(text.trim().to_string()));
|
||||||
|
}
|
||||||
|
"owner-id" | "oc:owner-id" => {
|
||||||
|
map.insert("owner_id".to_string(), serde_json::Value::String(text.trim().to_string()));
|
||||||
|
}
|
||||||
|
"owner-display-name" | "oc:owner-display-name" => {
|
||||||
|
resp.owner_display_name = Some(text.trim().to_string());
|
||||||
|
map.insert("owner_display_name".to_string(), serde_json::Value::String(text.trim().to_string()));
|
||||||
|
}
|
||||||
|
"has-preview" | "nc:has-preview" => {
|
||||||
|
if let Ok(val) = text.trim().parse::<bool>() {
|
||||||
|
map.insert("has_preview".to_string(), serde_json::Value::Bool(val));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
map.insert(current_element.clone(), serde_json::Value::String(text.trim().to_string()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(Event::End(e)) => {
|
||||||
|
let name = get_local_name_from_end(&e)?;
|
||||||
|
|
||||||
|
match name.as_str() {
|
||||||
|
"response" => {
|
||||||
|
if let Some(resp) = current_response.take() {
|
||||||
|
// Include both files AND directories with valid properties
|
||||||
|
if status_ok && !resp.href.is_empty() {
|
||||||
|
// Extract name from href
|
||||||
|
let name = if resp.displayname.is_empty() {
|
||||||
|
resp.href
|
||||||
|
.split('/')
|
||||||
|
.filter(|s| !s.is_empty())
|
||||||
|
.last()
|
||||||
|
.unwrap_or("")
|
||||||
|
.to_string()
|
||||||
|
} else {
|
||||||
|
resp.displayname.clone()
|
||||||
|
};
|
||||||
|
|
||||||
|
// Decode URL-encoded characters
|
||||||
|
let name = urlencoding::decode(&name)
|
||||||
|
.unwrap_or_else(|_| std::borrow::Cow::Borrowed(&name))
|
||||||
|
.to_string();
|
||||||
|
|
||||||
|
// Parse creation date
|
||||||
|
let created_at = resp.creation_date
|
||||||
|
.as_ref()
|
||||||
|
.and_then(|d| parse_http_date(d));
|
||||||
|
|
||||||
|
// Parse permissions
|
||||||
|
let permissions_int = resp.permissions
|
||||||
|
.as_ref()
|
||||||
|
.and_then(|p| {
|
||||||
|
if p.chars().all(|c| c.is_uppercase()) {
|
||||||
|
let mut perms = 0u32;
|
||||||
|
if p.contains('R') { perms |= 0o444; }
|
||||||
|
if p.contains('W') { perms |= 0o222; }
|
||||||
|
if p.contains('D') { perms |= 0o111; }
|
||||||
|
Some(perms)
|
||||||
|
} else {
|
||||||
|
p.parse().ok()
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let file_info = FileInfo {
|
||||||
|
path: resp.href.clone(),
|
||||||
|
name,
|
||||||
|
size: resp.content_length.unwrap_or(0),
|
||||||
|
mime_type: if resp.is_collection {
|
||||||
|
"".to_string()
|
||||||
|
} else {
|
||||||
|
resp.content_type.unwrap_or_else(|| "application/octet-stream".to_string())
|
||||||
|
},
|
||||||
|
last_modified: parse_http_date(&resp.last_modified.unwrap_or_default()),
|
||||||
|
etag: resp.etag.unwrap_or_else(|| format!("\"{}\"", uuid::Uuid::new_v4())),
|
||||||
|
is_directory: resp.is_collection,
|
||||||
|
created_at,
|
||||||
|
permissions: permissions_int,
|
||||||
|
owner: resp.owner.or(resp.owner_display_name),
|
||||||
|
group: resp.group,
|
||||||
|
metadata: resp.metadata,
|
||||||
|
};
|
||||||
|
|
||||||
|
files.push(file_info);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
in_response = false;
|
||||||
|
status_ok = false;
|
||||||
|
}
|
||||||
|
"propstat" => {
|
||||||
|
in_propstat = false;
|
||||||
|
}
|
||||||
|
"prop" => {
|
||||||
|
in_prop = false;
|
||||||
|
}
|
||||||
|
"resourcetype" => {
|
||||||
|
in_resourcetype = false;
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
|
||||||
|
current_element.clear();
|
||||||
|
}
|
||||||
|
Ok(Event::Eof) => break,
|
||||||
|
Err(e) => return Err(anyhow!("XML parsing error: {}", e)),
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
|
||||||
|
buf.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(files)
|
||||||
|
}
|
||||||
|
|
||||||
fn get_local_name(e: &BytesStart) -> Result<String> {
|
fn get_local_name(e: &BytesStart) -> Result<String> {
|
||||||
let qname = e.name();
|
let qname = e.name();
|
||||||
let local = qname.local_name();
|
let local = qname.local_name();
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,341 @@
|
||||||
|
use readur::services::webdav_service::{WebDAVService, WebDAVConfig};
|
||||||
|
use readur::models::FileInfo;
|
||||||
|
use tokio;
|
||||||
|
use chrono::Utc;
|
||||||
|
|
||||||
|
// Helper function to create test WebDAV service
|
||||||
|
fn create_test_webdav_service() -> WebDAVService {
|
||||||
|
let config = WebDAVConfig {
|
||||||
|
server_url: "https://test.example.com".to_string(),
|
||||||
|
username: "testuser".to_string(),
|
||||||
|
password: "testpass".to_string(),
|
||||||
|
watch_folders: vec!["/Documents".to_string()],
|
||||||
|
file_extensions: vec!["pdf".to_string(), "png".to_string()],
|
||||||
|
timeout_seconds: 30,
|
||||||
|
server_type: Some("nextcloud".to_string()),
|
||||||
|
};
|
||||||
|
|
||||||
|
WebDAVService::new(config).unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_discover_files_in_folder_shallow() {
|
||||||
|
let service = create_test_webdav_service();
|
||||||
|
|
||||||
|
// Mock XML response for shallow directory scan (Depth: 1)
|
||||||
|
let mock_response = r#"<?xml version="1.0"?>
|
||||||
|
<d:multistatus xmlns:d="DAV:">
|
||||||
|
<d:response>
|
||||||
|
<d:href>/remote.php/dav/files/admin/Documents/</d:href>
|
||||||
|
<d:propstat>
|
||||||
|
<d:prop>
|
||||||
|
<d:displayname>Documents</d:displayname>
|
||||||
|
<d:resourcetype>
|
||||||
|
<d:collection/>
|
||||||
|
</d:resourcetype>
|
||||||
|
<d:getetag>"docs-etag"</d:getetag>
|
||||||
|
</d:prop>
|
||||||
|
<d:status>HTTP/1.1 200 OK</d:status>
|
||||||
|
</d:propstat>
|
||||||
|
</d:response>
|
||||||
|
<d:response>
|
||||||
|
<d:href>/remote.php/dav/files/admin/Documents/file1.pdf</d:href>
|
||||||
|
<d:propstat>
|
||||||
|
<d:prop>
|
||||||
|
<d:displayname>file1.pdf</d:displayname>
|
||||||
|
<d:getcontentlength>1024</d:getcontentlength>
|
||||||
|
<d:getcontenttype>application/pdf</d:getcontenttype>
|
||||||
|
<d:getetag>"file1-etag"</d:getetag>
|
||||||
|
<d:resourcetype/>
|
||||||
|
</d:prop>
|
||||||
|
<d:status>HTTP/1.1 200 OK</d:status>
|
||||||
|
</d:propstat>
|
||||||
|
</d:response>
|
||||||
|
<d:response>
|
||||||
|
<d:href>/remote.php/dav/files/admin/Documents/SubFolder/</d:href>
|
||||||
|
<d:propstat>
|
||||||
|
<d:prop>
|
||||||
|
<d:displayname>SubFolder</d:displayname>
|
||||||
|
<d:resourcetype>
|
||||||
|
<d:collection/>
|
||||||
|
</d:resourcetype>
|
||||||
|
<d:getetag>"subfolder-etag"</d:getetag>
|
||||||
|
</d:prop>
|
||||||
|
<d:status>HTTP/1.1 200 OK</d:status>
|
||||||
|
</d:propstat>
|
||||||
|
</d:response>
|
||||||
|
</d:multistatus>"#;
|
||||||
|
|
||||||
|
// Test that shallow parsing works correctly
|
||||||
|
let files = service.parse_webdav_response_with_directories(mock_response).unwrap();
|
||||||
|
|
||||||
|
// Debug print to see what files we actually got
|
||||||
|
for file in &files {
|
||||||
|
println!("Parsed file: {} (is_directory: {}, path: {})", file.name, file.is_directory, file.path);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Should have directory, direct file, and direct subdirectory (but no nested files)
|
||||||
|
assert_eq!(files.len(), 3);
|
||||||
|
|
||||||
|
// Check that we got the right items
|
||||||
|
let directory = files.iter().find(|f| f.name == "Documents").unwrap();
|
||||||
|
assert!(directory.is_directory);
|
||||||
|
assert_eq!(directory.etag, "docs-etag");
|
||||||
|
|
||||||
|
let file = files.iter().find(|f| f.name == "file1.pdf").unwrap();
|
||||||
|
assert!(!file.is_directory);
|
||||||
|
assert_eq!(file.size, 1024);
|
||||||
|
assert_eq!(file.etag, "file1-etag");
|
||||||
|
|
||||||
|
let subfolder = files.iter().find(|f| f.name == "SubFolder").unwrap();
|
||||||
|
assert!(subfolder.is_directory);
|
||||||
|
assert_eq!(subfolder.etag, "subfolder-etag");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_update_single_directory_tracking() {
|
||||||
|
let service = create_test_webdav_service();
|
||||||
|
|
||||||
|
// Create mock files representing a shallow directory scan
|
||||||
|
let files = vec![
|
||||||
|
FileInfo {
|
||||||
|
path: "/Documents".to_string(),
|
||||||
|
name: "Documents".to_string(),
|
||||||
|
size: 0,
|
||||||
|
mime_type: "".to_string(),
|
||||||
|
last_modified: Some(Utc::now()),
|
||||||
|
etag: "docs-etag-123".to_string(),
|
||||||
|
is_directory: true,
|
||||||
|
created_at: Some(Utc::now()),
|
||||||
|
permissions: Some(755),
|
||||||
|
owner: Some("admin".to_string()),
|
||||||
|
group: Some("admin".to_string()),
|
||||||
|
metadata: None,
|
||||||
|
},
|
||||||
|
FileInfo {
|
||||||
|
path: "/Documents/file1.pdf".to_string(),
|
||||||
|
name: "file1.pdf".to_string(),
|
||||||
|
size: 1024000,
|
||||||
|
mime_type: "application/pdf".to_string(),
|
||||||
|
last_modified: Some(Utc::now()),
|
||||||
|
etag: "file1-etag".to_string(),
|
||||||
|
is_directory: false,
|
||||||
|
created_at: Some(Utc::now()),
|
||||||
|
permissions: Some(644),
|
||||||
|
owner: Some("admin".to_string()),
|
||||||
|
group: Some("admin".to_string()),
|
||||||
|
metadata: None,
|
||||||
|
},
|
||||||
|
FileInfo {
|
||||||
|
path: "/Documents/file2.pdf".to_string(),
|
||||||
|
name: "file2.pdf".to_string(),
|
||||||
|
size: 2048000,
|
||||||
|
mime_type: "application/pdf".to_string(),
|
||||||
|
last_modified: Some(Utc::now()),
|
||||||
|
etag: "file2-etag".to_string(),
|
||||||
|
is_directory: false,
|
||||||
|
created_at: Some(Utc::now()),
|
||||||
|
permissions: Some(644),
|
||||||
|
owner: Some("admin".to_string()),
|
||||||
|
group: Some("admin".to_string()),
|
||||||
|
metadata: None,
|
||||||
|
},
|
||||||
|
FileInfo {
|
||||||
|
path: "/Documents/SubFolder".to_string(),
|
||||||
|
name: "SubFolder".to_string(),
|
||||||
|
size: 0,
|
||||||
|
mime_type: "".to_string(),
|
||||||
|
last_modified: Some(Utc::now()),
|
||||||
|
etag: "subfolder-etag".to_string(),
|
||||||
|
is_directory: true,
|
||||||
|
created_at: Some(Utc::now()),
|
||||||
|
permissions: Some(755),
|
||||||
|
owner: Some("admin".to_string()),
|
||||||
|
group: Some("admin".to_string()),
|
||||||
|
metadata: None,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
// Test that direct file counting works correctly
|
||||||
|
let direct_files: Vec<_> = files.iter()
|
||||||
|
.filter(|f| !f.is_directory && service.is_direct_child(&f.path, "/Documents"))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
assert_eq!(direct_files.len(), 2); // file1.pdf and file2.pdf
|
||||||
|
|
||||||
|
let total_size: i64 = direct_files.iter().map(|f| f.size).sum();
|
||||||
|
assert_eq!(total_size, 3072000); // 1024000 + 2048000
|
||||||
|
|
||||||
|
// Test that directory ETag extraction works
|
||||||
|
let dir_etag = files.iter()
|
||||||
|
.find(|f| f.is_directory && f.path == "/Documents")
|
||||||
|
.map(|f| f.etag.clone())
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(dir_etag, "docs-etag-123");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_targeted_rescan_logic() {
|
||||||
|
let service = create_test_webdav_service();
|
||||||
|
|
||||||
|
// Test the logic that determines which paths need scanning
|
||||||
|
let paths_to_check = vec![
|
||||||
|
"/Documents".to_string(),
|
||||||
|
"/Documents/2024".to_string(),
|
||||||
|
"/Documents/Archive".to_string(),
|
||||||
|
];
|
||||||
|
|
||||||
|
// This tests the core logic used in discover_files_targeted_rescan
|
||||||
|
// In a real implementation, this would involve database calls and network requests
|
||||||
|
|
||||||
|
// Simulate ETag checking logic
|
||||||
|
let mut paths_needing_scan = Vec::new();
|
||||||
|
|
||||||
|
for path in &paths_to_check {
|
||||||
|
// Simulate: current_etag != stored_etag (directory changed)
|
||||||
|
let current_etag = format!("{}-current", path.replace('/', "-"));
|
||||||
|
let stored_etag = format!("{}-stored", path.replace('/', "-"));
|
||||||
|
|
||||||
|
if current_etag != stored_etag {
|
||||||
|
paths_needing_scan.push(path.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// All paths should need scanning in this test scenario
|
||||||
|
assert_eq!(paths_needing_scan.len(), 3);
|
||||||
|
assert!(paths_needing_scan.contains(&"/Documents".to_string()));
|
||||||
|
assert!(paths_needing_scan.contains(&"/Documents/2024".to_string()));
|
||||||
|
assert!(paths_needing_scan.contains(&"/Documents/Archive".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_stale_directory_detection() {
|
||||||
|
let service = create_test_webdav_service();
|
||||||
|
|
||||||
|
// Test the logic for detecting stale subdirectories
|
||||||
|
let parent_path = "/Documents";
|
||||||
|
let directories = vec![
|
||||||
|
("/Documents", chrono::Utc::now()), // Fresh parent
|
||||||
|
("/Documents/2024", chrono::Utc::now() - chrono::Duration::hours(25)), // Stale (25 hours old)
|
||||||
|
("/Documents/Archive", chrono::Utc::now() - chrono::Duration::hours(1)), // Fresh (1 hour old)
|
||||||
|
("/Documents/2024/Q1", chrono::Utc::now() - chrono::Duration::hours(30)), // Stale (30 hours old)
|
||||||
|
("/Other", chrono::Utc::now() - chrono::Duration::hours(48)), // Stale but not under parent
|
||||||
|
];
|
||||||
|
|
||||||
|
let max_age_hours = 24;
|
||||||
|
let cutoff_time = chrono::Utc::now() - chrono::Duration::hours(max_age_hours);
|
||||||
|
|
||||||
|
// Test the filtering logic
|
||||||
|
let stale_subdirs: Vec<String> = directories.iter()
|
||||||
|
.filter(|(path, last_scanned)| {
|
||||||
|
path.starts_with(parent_path) &&
|
||||||
|
*path != parent_path &&
|
||||||
|
*last_scanned < cutoff_time
|
||||||
|
})
|
||||||
|
.map(|(path, _)| path.to_string())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
assert_eq!(stale_subdirs.len(), 2);
|
||||||
|
assert!(stale_subdirs.contains(&"/Documents/2024".to_string()));
|
||||||
|
assert!(stale_subdirs.contains(&"/Documents/2024/Q1".to_string()));
|
||||||
|
assert!(!stale_subdirs.contains(&"/Documents/Archive".to_string())); // Fresh
|
||||||
|
assert!(!stale_subdirs.contains(&"/Other".to_string())); // Different parent
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_incremental_sync_logic() {
|
||||||
|
let service = create_test_webdav_service();
|
||||||
|
|
||||||
|
// Test the change detection logic used in incremental sync
|
||||||
|
let watch_folders = vec![
|
||||||
|
"/Documents".to_string(),
|
||||||
|
"/Photos".to_string(),
|
||||||
|
"/Archive".to_string(),
|
||||||
|
];
|
||||||
|
|
||||||
|
// Simulate stored ETags vs current ETags
|
||||||
|
let stored_etags = [
|
||||||
|
("/Documents", "docs-etag-old"),
|
||||||
|
("/Photos", "photos-etag-same"),
|
||||||
|
("/Archive", "archive-etag-old"),
|
||||||
|
];
|
||||||
|
|
||||||
|
let current_etags = [
|
||||||
|
("/Documents", "docs-etag-new"), // Changed
|
||||||
|
("/Photos", "photos-etag-same"), // Unchanged
|
||||||
|
("/Archive", "archive-etag-new"), // Changed
|
||||||
|
];
|
||||||
|
|
||||||
|
let mut changed_folders = Vec::new();
|
||||||
|
let mut unchanged_folders = Vec::new();
|
||||||
|
|
||||||
|
for folder in &watch_folders {
|
||||||
|
let stored = stored_etags.iter().find(|(path, _)| path == folder).map(|(_, etag)| *etag);
|
||||||
|
let current = current_etags.iter().find(|(path, _)| path == folder).map(|(_, etag)| *etag);
|
||||||
|
|
||||||
|
match (stored, current) {
|
||||||
|
(Some(stored_etag), Some(current_etag)) => {
|
||||||
|
if stored_etag != current_etag {
|
||||||
|
changed_folders.push(folder.clone());
|
||||||
|
} else {
|
||||||
|
unchanged_folders.push(folder.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
// New folder or missing data - assume changed
|
||||||
|
changed_folders.push(folder.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_eq!(changed_folders.len(), 2);
|
||||||
|
assert!(changed_folders.contains(&"/Documents".to_string()));
|
||||||
|
assert!(changed_folders.contains(&"/Archive".to_string()));
|
||||||
|
|
||||||
|
assert_eq!(unchanged_folders.len(), 1);
|
||||||
|
assert!(unchanged_folders.contains(&"/Photos".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_smart_sync_strategy_selection() {
|
||||||
|
let service = create_test_webdav_service();
|
||||||
|
|
||||||
|
// Test the logic for choosing between different sync strategies
|
||||||
|
|
||||||
|
// Scenario 1: Directory unchanged, no stale subdirectories -> no scan needed
|
||||||
|
let scenario1_main_dir_changed = false;
|
||||||
|
let scenario1_stale_subdirs = 0;
|
||||||
|
let scenario1_action = if scenario1_main_dir_changed {
|
||||||
|
"full_scan"
|
||||||
|
} else if scenario1_stale_subdirs > 0 {
|
||||||
|
"targeted_scan"
|
||||||
|
} else {
|
||||||
|
"no_scan"
|
||||||
|
};
|
||||||
|
assert_eq!(scenario1_action, "no_scan");
|
||||||
|
|
||||||
|
// Scenario 2: Directory unchanged, has stale subdirectories -> targeted scan
|
||||||
|
let scenario2_main_dir_changed = false;
|
||||||
|
let scenario2_stale_subdirs = 3;
|
||||||
|
let scenario2_action = if scenario2_main_dir_changed {
|
||||||
|
"full_scan"
|
||||||
|
} else if scenario2_stale_subdirs > 0 {
|
||||||
|
"targeted_scan"
|
||||||
|
} else {
|
||||||
|
"no_scan"
|
||||||
|
};
|
||||||
|
assert_eq!(scenario2_action, "targeted_scan");
|
||||||
|
|
||||||
|
// Scenario 3: Directory changed -> full scan (optimized)
|
||||||
|
let scenario3_main_dir_changed = true;
|
||||||
|
let scenario3_stale_subdirs = 0;
|
||||||
|
let scenario3_action = if scenario3_main_dir_changed {
|
||||||
|
"full_scan"
|
||||||
|
} else if scenario3_stale_subdirs > 0 {
|
||||||
|
"targeted_scan"
|
||||||
|
} else {
|
||||||
|
"no_scan"
|
||||||
|
};
|
||||||
|
assert_eq!(scenario3_action, "full_scan");
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue