use anyhow::{anyhow, Result}; use chrono::{DateTime, Utc}; use quick_xml::events::{BytesStart, Event}; use quick_xml::reader::Reader; use std::str; use serde_json; use crate::models::FileInfo; #[derive(Debug, Default)] struct PropFindResponse { href: String, displayname: String, content_length: Option, last_modified: Option, content_type: Option, etag: Option, is_collection: bool, creation_date: Option, owner: Option, group: Option, permissions: Option, owner_display_name: Option, metadata: Option, } pub fn parse_propfind_response(xml_text: &str) -> Result> { let mut reader = Reader::from_str(xml_text); reader.config_mut().trim_text(true); let mut files = Vec::new(); let mut current_response: Option = None; let mut current_element = String::new(); let mut in_response = false; let mut in_propstat = false; let mut in_prop = false; let mut in_resourcetype = false; let mut status_ok = false; let mut buf = Vec::new(); loop { match reader.read_event_into(&mut buf) { Ok(Event::Start(e)) | Ok(Event::Empty(e)) => { let name = get_local_name(&e)?; match name.as_str() { "response" => { in_response = true; current_response = Some(PropFindResponse::default()); } "propstat" => { in_propstat = true; } "prop" => { in_prop = true; } "resourcetype" => { in_resourcetype = true; } "collection" if in_resourcetype => { if let Some(ref mut resp) = current_response { resp.is_collection = true; } } _ => { current_element = name; } } } Ok(Event::Text(e)) => { let text = e.unescape()?.to_string(); if in_response && !text.trim().is_empty() { if let Some(ref mut resp) = current_response { match current_element.as_str() { "href" => { resp.href = text.trim().to_string(); } "displayname" => { resp.displayname = text.trim().to_string(); } "getcontentlength" => { resp.content_length = text.trim().parse().ok(); } "getlastmodified" => { resp.last_modified = Some(text.trim().to_string()); } "getcontenttype" => { resp.content_type = Some(text.trim().to_string()); } "getetag" => { resp.etag = Some(normalize_etag(&text)); } "creationdate" => { resp.creation_date = Some(text.trim().to_string()); } "owner" => { resp.owner = Some(text.trim().to_string()); } "group" => { resp.group = Some(text.trim().to_string()); } "status" if in_propstat => { // Check if status is 200 OK if text.contains("200") { status_ok = true; } } _ => { // Store any other properties as generic metadata // This handles vendor-specific properties from any WebDAV server if !text.trim().is_empty() && in_prop { if resp.metadata.is_none() { resp.metadata = Some(serde_json::Value::Object(serde_json::Map::new())); } if let Some(serde_json::Value::Object(ref mut map)) = resp.metadata { // Special handling for known properties match current_element.as_str() { "permissions" | "oc:permissions" => { resp.permissions = Some(text.trim().to_string()); map.insert("permissions_raw".to_string(), serde_json::Value::String(text.trim().to_string())); } "fileid" | "oc:fileid" => { map.insert("file_id".to_string(), serde_json::Value::String(text.trim().to_string())); } "owner-id" | "oc:owner-id" => { map.insert("owner_id".to_string(), serde_json::Value::String(text.trim().to_string())); } "owner-display-name" | "oc:owner-display-name" => { resp.owner_display_name = Some(text.trim().to_string()); map.insert("owner_display_name".to_string(), serde_json::Value::String(text.trim().to_string())); } "has-preview" | "nc:has-preview" => { if let Ok(val) = text.trim().parse::() { map.insert("has_preview".to_string(), serde_json::Value::Bool(val)); } } _ => { // Store any other property as-is map.insert(current_element.clone(), serde_json::Value::String(text.trim().to_string())); } } } } } } } } } Ok(Event::End(e)) => { let name = get_local_name_from_end(&e)?; match name.as_str() { "response" => { if let Some(resp) = current_response.take() { // Only add files (not directories) with valid properties if !resp.is_collection && status_ok && !resp.href.is_empty() { // Extract filename from href let name = if resp.displayname.is_empty() { resp.href .split('/') .last() .unwrap_or("") .to_string() } else { resp.displayname.clone() }; // Decode URL-encoded characters let name = urlencoding::decode(&name) .unwrap_or_else(|_| std::borrow::Cow::Borrowed(&name)) .to_string(); // Parse creation date let created_at = resp.creation_date .as_ref() .and_then(|d| parse_http_date(d)); // Parse permissions (Nextcloud/ownCloud format) let permissions_int = resp.permissions .as_ref() .and_then(|p| { // Nextcloud permissions are a string like "RGDNVW" // Convert to Unix-style octal permissions if p.chars().all(|c| c.is_uppercase()) { // This is Nextcloud format let mut perms = 0u32; if p.contains('R') { perms |= 0o444; } // Read if p.contains('W') { perms |= 0o222; } // Write if p.contains('D') { perms |= 0o111; } // Delete (execute-like) Some(perms) } else { // Try to parse as numeric p.parse().ok() } }); // Use the metadata collected during parsing let metadata = resp.metadata; let file_info = FileInfo { path: resp.href.clone(), name, size: resp.content_length.unwrap_or(0), mime_type: resp.content_type.unwrap_or_else(|| "application/octet-stream".to_string()), last_modified: parse_http_date(&resp.last_modified.unwrap_or_default()), etag: resp.etag.unwrap_or_else(|| format!("\"{}\"", uuid::Uuid::new_v4())), is_directory: false, created_at, permissions: permissions_int, owner: resp.owner.or(resp.owner_display_name), group: resp.group, metadata, }; files.push(file_info); } } in_response = false; status_ok = false; } "propstat" => { in_propstat = false; } "prop" => { in_prop = false; } "resourcetype" => { in_resourcetype = false; } _ => {} } current_element.clear(); } Ok(Event::Eof) => break, Err(e) => return Err(anyhow!("XML parsing error: {}", e)), _ => {} } buf.clear(); } Ok(files) } /// Parse PROPFIND response including both files and directories /// This is used for shallow directory scans where we need to track directory structure pub fn parse_propfind_response_with_directories(xml_text: &str) -> Result> { let mut reader = Reader::from_str(xml_text); reader.config_mut().trim_text(true); let mut files = Vec::new(); let mut current_response: Option = None; let mut current_element = String::new(); let mut in_response = false; let mut in_propstat = false; let mut in_prop = false; let mut in_resourcetype = false; let mut status_ok = false; let mut buf = Vec::new(); loop { match reader.read_event_into(&mut buf) { Ok(Event::Start(e)) | Ok(Event::Empty(e)) => { let name = get_local_name(&e)?; match name.as_str() { "response" => { in_response = true; current_response = Some(PropFindResponse::default()); } "propstat" => { in_propstat = true; } "prop" => { in_prop = true; } "resourcetype" => { in_resourcetype = true; } "collection" if in_resourcetype => { if let Some(ref mut resp) = current_response { resp.is_collection = true; } } _ => { current_element = name; } } } Ok(Event::Text(e)) => { let text = e.unescape()?.to_string(); if in_response && !text.trim().is_empty() { if let Some(ref mut resp) = current_response { match current_element.as_str() { "href" => { resp.href = text.trim().to_string(); } "displayname" => { resp.displayname = text.trim().to_string(); } "getcontentlength" => { resp.content_length = text.trim().parse().ok(); } "getlastmodified" => { resp.last_modified = Some(text.trim().to_string()); } "getcontenttype" => { resp.content_type = Some(text.trim().to_string()); } "getetag" => { resp.etag = Some(normalize_etag(&text)); } "creationdate" => { resp.creation_date = Some(text.trim().to_string()); } "owner" => { resp.owner = Some(text.trim().to_string()); } "group" => { resp.group = Some(text.trim().to_string()); } "status" if in_propstat => { // Check if status is 200 OK if text.contains("200") { status_ok = true; } } _ => { // Store any other properties as generic metadata if !text.trim().is_empty() && in_prop { if resp.metadata.is_none() { resp.metadata = Some(serde_json::Value::Object(serde_json::Map::new())); } if let Some(serde_json::Value::Object(ref mut map)) = resp.metadata { match current_element.as_str() { "permissions" | "oc:permissions" => { resp.permissions = Some(text.trim().to_string()); map.insert("permissions_raw".to_string(), serde_json::Value::String(text.trim().to_string())); } "fileid" | "oc:fileid" => { map.insert("file_id".to_string(), serde_json::Value::String(text.trim().to_string())); } "owner-id" | "oc:owner-id" => { map.insert("owner_id".to_string(), serde_json::Value::String(text.trim().to_string())); } "owner-display-name" | "oc:owner-display-name" => { resp.owner_display_name = Some(text.trim().to_string()); map.insert("owner_display_name".to_string(), serde_json::Value::String(text.trim().to_string())); } "has-preview" | "nc:has-preview" => { if let Ok(val) = text.trim().parse::() { map.insert("has_preview".to_string(), serde_json::Value::Bool(val)); } } _ => { map.insert(current_element.clone(), serde_json::Value::String(text.trim().to_string())); } } } } } } } } } Ok(Event::End(e)) => { let name = get_local_name_from_end(&e)?; match name.as_str() { "response" => { if let Some(resp) = current_response.take() { // Include both files AND directories with valid properties if status_ok && !resp.href.is_empty() { // Extract name from href let name = if resp.displayname.is_empty() { resp.href .split('/') .filter(|s| !s.is_empty()) .last() .unwrap_or("") .to_string() } else { resp.displayname.clone() }; // Decode URL-encoded characters let name = urlencoding::decode(&name) .unwrap_or_else(|_| std::borrow::Cow::Borrowed(&name)) .to_string(); // Parse creation date let created_at = resp.creation_date .as_ref() .and_then(|d| parse_http_date(d)); // Parse permissions let permissions_int = resp.permissions .as_ref() .and_then(|p| { if p.chars().all(|c| c.is_uppercase()) { let mut perms = 0u32; if p.contains('R') { perms |= 0o444; } if p.contains('W') { perms |= 0o222; } if p.contains('D') { perms |= 0o111; } Some(perms) } else { p.parse().ok() } }); let file_info = FileInfo { path: resp.href.clone(), name, size: resp.content_length.unwrap_or(0), mime_type: if resp.is_collection { "".to_string() } else { resp.content_type.unwrap_or_else(|| "application/octet-stream".to_string()) }, last_modified: parse_http_date(&resp.last_modified.unwrap_or_default()), etag: resp.etag.unwrap_or_else(|| format!("\"{}\"", uuid::Uuid::new_v4())), is_directory: resp.is_collection, created_at, permissions: permissions_int, owner: resp.owner.or(resp.owner_display_name), group: resp.group, metadata: resp.metadata, }; files.push(file_info); } } in_response = false; status_ok = false; } "propstat" => { in_propstat = false; } "prop" => { in_prop = false; } "resourcetype" => { in_resourcetype = false; } _ => {} } current_element.clear(); } Ok(Event::Eof) => break, Err(e) => return Err(anyhow!("XML parsing error: {}", e)), _ => {} } buf.clear(); } Ok(files) } fn get_local_name(e: &BytesStart) -> Result { let qname = e.name(); let local = qname.local_name(); let name = str::from_utf8(local.as_ref()) .map_err(|e| anyhow!("Invalid UTF-8 in element name: {}", e))?; Ok(name.to_string()) } fn get_local_name_from_end(e: &quick_xml::events::BytesEnd) -> Result { let qname = e.name(); let local = qname.local_name(); let name = str::from_utf8(local.as_ref()) .map_err(|e| anyhow!("Invalid UTF-8 in element name: {}", e))?; Ok(name.to_string()) } fn parse_http_date(date_str: &str) -> Option> { if date_str.is_empty() { return None; } // Try to parse RFC 2822 format (used by WebDAV) DateTime::parse_from_rfc2822(date_str) .ok() .map(|dt| dt.with_timezone(&Utc)) .or_else(|| { // Try RFC 3339 as fallback DateTime::parse_from_rfc3339(date_str) .ok() .map(|dt| dt.with_timezone(&Utc)) }) .or_else(|| { // Try a custom format as last resort chrono::NaiveDateTime::parse_from_str(date_str, "%a, %d %b %Y %H:%M:%S GMT") .ok() .map(|ndt| DateTime::from_naive_utc_and_offset(ndt, Utc)) }) } /// Normalize ETag by removing quotes and weak ETag prefix /// This ensures consistent ETag comparison across different WebDAV servers /// /// Examples: /// - `"abc123"` → `abc123` /// - `W/"abc123"` → `abc123` /// - `abc123` → `abc123` pub fn normalize_etag(etag: &str) -> String { etag.trim() .trim_start_matches("W/") .trim() .trim_matches('"') .to_string() } #[cfg(test)] mod tests { use super::*; #[test] fn test_parse_simple_propfind() { let xml = r#" /webdav/test.pdf test.pdf 1024 Mon, 01 Jan 2024 12:00:00 GMT application/pdf "abc123" HTTP/1.1 200 OK "#; let files = parse_propfind_response(xml).unwrap(); assert_eq!(files.len(), 1); let file = &files[0]; assert_eq!(file.name, "test.pdf"); assert_eq!(file.size, 1024); assert_eq!(file.mime_type, "application/pdf"); assert_eq!(file.etag, "abc123"); assert!(!file.is_directory); } #[test] fn test_parse_propfind_with_directory() { let xml = r#" /webdav/Documents/ Documents HTTP/1.1 200 OK /webdav/Documents/file.txt file.txt 256 text/plain HTTP/1.1 200 OK "#; let files = parse_propfind_response(xml).unwrap(); assert_eq!(files.len(), 1); // Only the file, not the directory let file = &files[0]; assert_eq!(file.name, "file.txt"); assert_eq!(file.size, 256); } #[test] fn test_parse_nextcloud_response() { let xml = r#" /remote.php/dav/files/admin/Documents/report.pdf report.pdf 2048000 Mon, 15 Jan 2024 14:30:00 GMT application/pdf "pdf123" HTTP/1.1 200 OK "#; let files = parse_propfind_response(xml).unwrap(); assert_eq!(files.len(), 1); let file = &files[0]; assert_eq!(file.name, "report.pdf"); assert_eq!(file.path, "/remote.php/dav/files/admin/Documents/report.pdf"); assert_eq!(file.size, 2048000); assert_eq!(file.etag, "pdf123"); // ETag should be normalized (quotes removed) assert!(file.last_modified.is_some()); } #[test] fn test_parse_url_encoded_filenames() { let xml = r#" /webdav/File%20with%20spaces.pdf File with spaces.pdf 1024 application/pdf HTTP/1.1 200 OK "#; let files = parse_propfind_response(xml).unwrap(); assert_eq!(files.len(), 1); let file = &files[0]; assert_eq!(file.name, "File with spaces.pdf"); } #[test] fn test_empty_response() { let xml = r#" "#; let files = parse_propfind_response(xml).unwrap(); assert_eq!(files.len(), 0); } #[test] fn test_normalize_etag() { // Test various ETag formats that WebDAV servers might return assert_eq!(normalize_etag("abc123"), "abc123"); assert_eq!(normalize_etag("\"abc123\""), "abc123"); assert_eq!(normalize_etag("W/\"abc123\""), "abc123"); assert_eq!(normalize_etag(" \"abc123\" "), "abc123"); assert_eq!(normalize_etag("W/\"abc-123-def\""), "abc-123-def"); assert_eq!(normalize_etag(""), ""); assert_eq!(normalize_etag("\"\""), ""); assert_eq!(normalize_etag("W/\"\""), ""); } }