feat(server): webdav integration nearly done
This commit is contained in:
parent
5b67232266
commit
aa45cd06e0
|
|
@ -2288,6 +2288,16 @@ dependencies = [
|
|||
"bytemuck",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.31.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.40"
|
||||
|
|
@ -2434,6 +2444,7 @@ dependencies = [
|
|||
"mime_guess",
|
||||
"notify",
|
||||
"pdf-extract",
|
||||
"quick-xml",
|
||||
"regex",
|
||||
"reqwest",
|
||||
"serde",
|
||||
|
|
@ -2449,6 +2460,7 @@ dependencies = [
|
|||
"tower-http",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
"urlencoding",
|
||||
"utoipa",
|
||||
"utoipa-swagger-ui",
|
||||
"uuid",
|
||||
|
|
|
|||
|
|
@ -37,6 +37,8 @@ pdf-extract = { version = "0.7", optional = true }
|
|||
image = { version = "0.24", features = ["png", "jpeg", "tiff", "bmp"], optional = true }
|
||||
imageproc = { version = "0.23", optional = true }
|
||||
reqwest = { version = "0.11", features = ["json", "multipart"] }
|
||||
quick-xml = { version = "0.31", features = ["serialize"] }
|
||||
urlencoding = "2.1"
|
||||
dotenvy = "0.15"
|
||||
hostname = "0.4"
|
||||
walkdir = "2"
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ pub mod seed;
|
|||
pub mod watcher;
|
||||
pub mod webdav_service;
|
||||
pub mod webdav_scheduler;
|
||||
pub mod webdav_xml_parser;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ mod swagger;
|
|||
mod watcher;
|
||||
mod webdav_service;
|
||||
mod webdav_scheduler;
|
||||
mod webdav_xml_parser;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
|
|
|||
|
|
@ -610,3 +610,14 @@ pub struct CreateWebDAVFile {
|
|||
pub sync_status: String,
|
||||
pub sync_error: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FileInfo {
|
||||
pub path: String,
|
||||
pub name: String,
|
||||
pub size: i64,
|
||||
pub mime_type: String,
|
||||
pub last_modified: Option<DateTime<Utc>>,
|
||||
pub etag: String,
|
||||
pub is_directory: bool,
|
||||
}
|
||||
|
|
@ -1,16 +1,16 @@
|
|||
use anyhow::{anyhow, Result};
|
||||
use chrono::{DateTime, Utc};
|
||||
use reqwest::{Client, Method};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashSet;
|
||||
use std::time::Duration;
|
||||
use tokio::time::sleep;
|
||||
use tracing::{debug, error, info, warn};
|
||||
|
||||
use crate::models::{
|
||||
WebDAVConnectionResult, WebDAVCrawlEstimate, WebDAVFolderInfo,
|
||||
WebDAVSyncStatus, WebDAVTestConnection,
|
||||
FileInfo, WebDAVConnectionResult, WebDAVCrawlEstimate, WebDAVFolderInfo,
|
||||
WebDAVTestConnection,
|
||||
};
|
||||
use crate::webdav_xml_parser::parse_propfind_response;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct WebDAVConfig {
|
||||
|
|
@ -44,66 +44,7 @@ impl Default for RetryConfig {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct WebDAVResponse {
|
||||
#[serde(rename = "d:multistatus")]
|
||||
multistatus: MultiStatus,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct MultiStatus {
|
||||
#[serde(rename = "d:response")]
|
||||
responses: Vec<DAVResponse>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct DAVResponse {
|
||||
#[serde(rename = "d:href")]
|
||||
href: String,
|
||||
#[serde(rename = "d:propstat")]
|
||||
propstat: PropStat,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct PropStat {
|
||||
#[serde(rename = "d:prop")]
|
||||
prop: DAVProperties,
|
||||
#[serde(rename = "d:status")]
|
||||
status: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct DAVProperties {
|
||||
#[serde(rename = "d:displayname")]
|
||||
displayname: Option<String>,
|
||||
#[serde(rename = "d:getcontentlength")]
|
||||
contentlength: Option<String>,
|
||||
#[serde(rename = "d:getlastmodified")]
|
||||
lastmodified: Option<String>,
|
||||
#[serde(rename = "d:getcontenttype")]
|
||||
contenttype: Option<String>,
|
||||
#[serde(rename = "d:getetag")]
|
||||
etag: Option<String>,
|
||||
#[serde(rename = "d:resourcetype")]
|
||||
resourcetype: Option<ResourceType>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct ResourceType {
|
||||
#[serde(rename = "d:collection")]
|
||||
collection: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FileInfo {
|
||||
pub path: String,
|
||||
pub name: String,
|
||||
pub size: i64,
|
||||
pub mime_type: String,
|
||||
pub last_modified: Option<DateTime<Utc>>,
|
||||
pub etag: String,
|
||||
pub is_directory: bool,
|
||||
}
|
||||
|
||||
pub struct WebDAVService {
|
||||
client: Client,
|
||||
|
|
@ -438,75 +379,7 @@ impl WebDAVService {
|
|||
}
|
||||
|
||||
pub fn parse_webdav_response(&self, xml_text: &str) -> Result<Vec<FileInfo>> {
|
||||
// For now, we'll do simple string parsing
|
||||
// In a production system, you'd want to use a proper XML parser like quick-xml
|
||||
let mut files = Vec::new();
|
||||
|
||||
// This is a simplified parser - in practice you'd want proper XML parsing
|
||||
let lines: Vec<&str> = xml_text.lines().collect();
|
||||
let mut current_file: Option<FileInfo> = None;
|
||||
let mut in_response = false;
|
||||
|
||||
for line in lines {
|
||||
let line = line.trim();
|
||||
|
||||
if line.contains("<d:response>") {
|
||||
in_response = true;
|
||||
current_file = Some(FileInfo {
|
||||
path: String::new(),
|
||||
name: String::new(),
|
||||
size: 0,
|
||||
mime_type: String::new(),
|
||||
last_modified: None,
|
||||
etag: String::new(),
|
||||
is_directory: false,
|
||||
});
|
||||
} else if line.contains("</d:response>") && in_response {
|
||||
if let Some(file) = current_file.take() {
|
||||
if !file.path.is_empty() && !file.path.ends_with('/') {
|
||||
files.push(file);
|
||||
}
|
||||
}
|
||||
in_response = false;
|
||||
} else if in_response {
|
||||
if let Some(ref mut file) = current_file {
|
||||
if line.contains("<d:href>") {
|
||||
if let Some(start) = line.find("<d:href>") {
|
||||
if let Some(end) = line.find("</d:href>") {
|
||||
let href = &line[start + 8..end];
|
||||
file.path = href.to_string();
|
||||
file.name = href.split('/').last().unwrap_or("").to_string();
|
||||
}
|
||||
}
|
||||
} else if line.contains("<d:getcontentlength>") {
|
||||
if let Some(start) = line.find("<d:getcontentlength>") {
|
||||
if let Some(end) = line.find("</d:getcontentlength>") {
|
||||
if let Ok(size) = line[start + 20..end].parse::<i64>() {
|
||||
file.size = size;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if line.contains("<d:getcontenttype>") {
|
||||
if let Some(start) = line.find("<d:getcontenttype>") {
|
||||
if let Some(end) = line.find("</d:getcontenttype>") {
|
||||
file.mime_type = line[start + 18..end].to_string();
|
||||
}
|
||||
}
|
||||
} else if line.contains("<d:getetag>") {
|
||||
if let Some(start) = line.find("<d:getetag>") {
|
||||
if let Some(end) = line.find("</d:getetag>") {
|
||||
file.etag = line[start + 11..end].to_string();
|
||||
}
|
||||
}
|
||||
} else if line.contains("<d:collection") {
|
||||
file.is_directory = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
info!("Parsed {} files from WebDAV response", files.len());
|
||||
Ok(files)
|
||||
parse_propfind_response(xml_text)
|
||||
}
|
||||
|
||||
pub async fn download_file(&self, file_path: &str) -> Result<Vec<u8>> {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,340 @@
|
|||
use anyhow::{anyhow, Result};
|
||||
use chrono::{DateTime, Utc};
|
||||
use quick_xml::events::{BytesStart, Event};
|
||||
use quick_xml::reader::Reader;
|
||||
use std::str;
|
||||
|
||||
use crate::models::FileInfo;
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct PropFindResponse {
|
||||
href: String,
|
||||
displayname: String,
|
||||
content_length: Option<i64>,
|
||||
last_modified: Option<String>,
|
||||
content_type: Option<String>,
|
||||
etag: Option<String>,
|
||||
is_collection: bool,
|
||||
}
|
||||
|
||||
pub fn parse_propfind_response(xml_text: &str) -> Result<Vec<FileInfo>> {
|
||||
let mut reader = Reader::from_str(xml_text);
|
||||
reader.trim_text(true);
|
||||
|
||||
let mut files = Vec::new();
|
||||
let mut current_response: Option<PropFindResponse> = None;
|
||||
let mut current_element = String::new();
|
||||
let mut in_response = false;
|
||||
let mut in_propstat = false;
|
||||
let mut in_prop = false;
|
||||
let mut in_resourcetype = false;
|
||||
let mut status_ok = false;
|
||||
|
||||
let mut buf = Vec::new();
|
||||
|
||||
loop {
|
||||
match reader.read_event_into(&mut buf) {
|
||||
Ok(Event::Start(e)) | Ok(Event::Empty(e)) => {
|
||||
let name = get_local_name(&e)?;
|
||||
|
||||
match name.as_str() {
|
||||
"response" => {
|
||||
in_response = true;
|
||||
current_response = Some(PropFindResponse::default());
|
||||
}
|
||||
"propstat" => {
|
||||
in_propstat = true;
|
||||
}
|
||||
"prop" => {
|
||||
in_prop = true;
|
||||
}
|
||||
"resourcetype" => {
|
||||
in_resourcetype = true;
|
||||
}
|
||||
"collection" if in_resourcetype => {
|
||||
if let Some(ref mut resp) = current_response {
|
||||
resp.is_collection = true;
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
current_element = name;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(Event::Text(e)) => {
|
||||
let text = e.unescape()?.to_string();
|
||||
|
||||
if in_response && !text.trim().is_empty() {
|
||||
if let Some(ref mut resp) = current_response {
|
||||
match current_element.as_str() {
|
||||
"href" => {
|
||||
resp.href = text.trim().to_string();
|
||||
}
|
||||
"displayname" => {
|
||||
resp.displayname = text.trim().to_string();
|
||||
}
|
||||
"getcontentlength" => {
|
||||
resp.content_length = text.trim().parse().ok();
|
||||
}
|
||||
"getlastmodified" => {
|
||||
resp.last_modified = Some(text.trim().to_string());
|
||||
}
|
||||
"getcontenttype" => {
|
||||
resp.content_type = Some(text.trim().to_string());
|
||||
}
|
||||
"getetag" => {
|
||||
resp.etag = Some(text.trim().to_string());
|
||||
}
|
||||
"status" if in_propstat => {
|
||||
// Check if status is 200 OK
|
||||
if text.contains("200") {
|
||||
status_ok = true;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(Event::End(e)) => {
|
||||
let name = get_local_name_from_end(&e)?;
|
||||
|
||||
match name.as_str() {
|
||||
"response" => {
|
||||
if let Some(resp) = current_response.take() {
|
||||
// Only add files (not directories) with valid properties
|
||||
if !resp.is_collection && status_ok && !resp.href.is_empty() {
|
||||
// Extract filename from href
|
||||
let name = if resp.displayname.is_empty() {
|
||||
resp.href
|
||||
.split('/')
|
||||
.last()
|
||||
.unwrap_or("")
|
||||
.to_string()
|
||||
} else {
|
||||
resp.displayname.clone()
|
||||
};
|
||||
|
||||
// Decode URL-encoded characters
|
||||
let name = urlencoding::decode(&name)
|
||||
.unwrap_or_else(|_| std::borrow::Cow::Borrowed(&name))
|
||||
.to_string();
|
||||
|
||||
let file_info = FileInfo {
|
||||
path: resp.href.clone(),
|
||||
name,
|
||||
size: resp.content_length.unwrap_or(0),
|
||||
mime_type: resp.content_type.unwrap_or_else(|| "application/octet-stream".to_string()),
|
||||
last_modified: parse_http_date(&resp.last_modified.unwrap_or_default()),
|
||||
etag: resp.etag.unwrap_or_else(|| format!("\"{}\"", uuid::Uuid::new_v4())),
|
||||
is_directory: false,
|
||||
};
|
||||
|
||||
files.push(file_info);
|
||||
}
|
||||
}
|
||||
in_response = false;
|
||||
status_ok = false;
|
||||
}
|
||||
"propstat" => {
|
||||
in_propstat = false;
|
||||
}
|
||||
"prop" => {
|
||||
in_prop = false;
|
||||
}
|
||||
"resourcetype" => {
|
||||
in_resourcetype = false;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
current_element.clear();
|
||||
}
|
||||
Ok(Event::Eof) => break,
|
||||
Err(e) => return Err(anyhow!("XML parsing error: {}", e)),
|
||||
_ => {}
|
||||
}
|
||||
|
||||
buf.clear();
|
||||
}
|
||||
|
||||
Ok(files)
|
||||
}
|
||||
|
||||
fn get_local_name(e: &BytesStart) -> Result<String> {
|
||||
let qname = e.name();
|
||||
let local = qname.local_name();
|
||||
let name = str::from_utf8(local.as_ref())
|
||||
.map_err(|e| anyhow!("Invalid UTF-8 in element name: {}", e))?;
|
||||
Ok(name.to_string())
|
||||
}
|
||||
|
||||
fn get_local_name_from_end(e: &quick_xml::events::BytesEnd) -> Result<String> {
|
||||
let qname = e.name();
|
||||
let local = qname.local_name();
|
||||
let name = str::from_utf8(local.as_ref())
|
||||
.map_err(|e| anyhow!("Invalid UTF-8 in element name: {}", e))?;
|
||||
Ok(name.to_string())
|
||||
}
|
||||
|
||||
fn parse_http_date(date_str: &str) -> Option<DateTime<Utc>> {
|
||||
if date_str.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Try to parse RFC 2822 format (used by WebDAV)
|
||||
DateTime::parse_from_rfc2822(date_str)
|
||||
.ok()
|
||||
.map(|dt| dt.with_timezone(&Utc))
|
||||
.or_else(|| {
|
||||
// Try RFC 3339 as fallback
|
||||
DateTime::parse_from_rfc3339(date_str)
|
||||
.ok()
|
||||
.map(|dt| dt.with_timezone(&Utc))
|
||||
})
|
||||
.or_else(|| {
|
||||
// Try a custom format as last resort
|
||||
chrono::NaiveDateTime::parse_from_str(date_str, "%a, %d %b %Y %H:%M:%S GMT")
|
||||
.ok()
|
||||
.map(|ndt| DateTime::from_naive_utc_and_offset(ndt, Utc))
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_simple_propfind() {
|
||||
let xml = r#"<?xml version="1.0"?>
|
||||
<d:multistatus xmlns:d="DAV:">
|
||||
<d:response>
|
||||
<d:href>/webdav/test.pdf</d:href>
|
||||
<d:propstat>
|
||||
<d:prop>
|
||||
<d:displayname>test.pdf</d:displayname>
|
||||
<d:getcontentlength>1024</d:getcontentlength>
|
||||
<d:getlastmodified>Mon, 01 Jan 2024 12:00:00 GMT</d:getlastmodified>
|
||||
<d:getcontenttype>application/pdf</d:getcontenttype>
|
||||
<d:getetag>"abc123"</d:getetag>
|
||||
<d:resourcetype/>
|
||||
</d:prop>
|
||||
<d:status>HTTP/1.1 200 OK</d:status>
|
||||
</d:propstat>
|
||||
</d:response>
|
||||
</d:multistatus>"#;
|
||||
|
||||
let files = parse_propfind_response(xml).unwrap();
|
||||
assert_eq!(files.len(), 1);
|
||||
|
||||
let file = &files[0];
|
||||
assert_eq!(file.name, "test.pdf");
|
||||
assert_eq!(file.size, 1024);
|
||||
assert_eq!(file.mime_type, "application/pdf");
|
||||
assert_eq!(file.etag, "\"abc123\"");
|
||||
assert!(!file.is_directory);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_propfind_with_directory() {
|
||||
let xml = r#"<?xml version="1.0"?>
|
||||
<d:multistatus xmlns:d="DAV:">
|
||||
<d:response>
|
||||
<d:href>/webdav/Documents/</d:href>
|
||||
<d:propstat>
|
||||
<d:prop>
|
||||
<d:displayname>Documents</d:displayname>
|
||||
<d:resourcetype>
|
||||
<d:collection/>
|
||||
</d:resourcetype>
|
||||
</d:prop>
|
||||
<d:status>HTTP/1.1 200 OK</d:status>
|
||||
</d:propstat>
|
||||
</d:response>
|
||||
<d:response>
|
||||
<d:href>/webdav/Documents/file.txt</d:href>
|
||||
<d:propstat>
|
||||
<d:prop>
|
||||
<d:displayname>file.txt</d:displayname>
|
||||
<d:getcontentlength>256</d:getcontentlength>
|
||||
<d:getcontenttype>text/plain</d:getcontenttype>
|
||||
<d:resourcetype/>
|
||||
</d:prop>
|
||||
<d:status>HTTP/1.1 200 OK</d:status>
|
||||
</d:propstat>
|
||||
</d:response>
|
||||
</d:multistatus>"#;
|
||||
|
||||
let files = parse_propfind_response(xml).unwrap();
|
||||
assert_eq!(files.len(), 1); // Only the file, not the directory
|
||||
|
||||
let file = &files[0];
|
||||
assert_eq!(file.name, "file.txt");
|
||||
assert_eq!(file.size, 256);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_nextcloud_response() {
|
||||
let xml = r#"<?xml version="1.0"?>
|
||||
<d:multistatus xmlns:d="DAV:" xmlns:s="http://sabredav.org/ns" xmlns:oc="http://owncloud.org/ns">
|
||||
<d:response>
|
||||
<d:href>/remote.php/dav/files/admin/Documents/report.pdf</d:href>
|
||||
<d:propstat>
|
||||
<d:prop>
|
||||
<d:displayname>report.pdf</d:displayname>
|
||||
<d:getcontentlength>2048000</d:getcontentlength>
|
||||
<d:getlastmodified>Mon, 15 Jan 2024 14:30:00 GMT</d:getlastmodified>
|
||||
<d:getcontenttype>application/pdf</d:getcontenttype>
|
||||
<d:getetag>"pdf123"</d:getetag>
|
||||
<d:resourcetype/>
|
||||
</d:prop>
|
||||
<d:status>HTTP/1.1 200 OK</d:status>
|
||||
</d:propstat>
|
||||
</d:response>
|
||||
</d:multistatus>"#;
|
||||
|
||||
let files = parse_propfind_response(xml).unwrap();
|
||||
assert_eq!(files.len(), 1);
|
||||
|
||||
let file = &files[0];
|
||||
assert_eq!(file.name, "report.pdf");
|
||||
assert_eq!(file.path, "/remote.php/dav/files/admin/Documents/report.pdf");
|
||||
assert_eq!(file.size, 2048000);
|
||||
assert!(file.last_modified.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_url_encoded_filenames() {
|
||||
let xml = r#"<?xml version="1.0"?>
|
||||
<d:multistatus xmlns:d="DAV:">
|
||||
<d:response>
|
||||
<d:href>/webdav/File%20with%20spaces.pdf</d:href>
|
||||
<d:propstat>
|
||||
<d:prop>
|
||||
<d:displayname>File with spaces.pdf</d:displayname>
|
||||
<d:getcontentlength>1024</d:getcontentlength>
|
||||
<d:getcontenttype>application/pdf</d:getcontenttype>
|
||||
<d:resourcetype/>
|
||||
</d:prop>
|
||||
<d:status>HTTP/1.1 200 OK</d:status>
|
||||
</d:propstat>
|
||||
</d:response>
|
||||
</d:multistatus>"#;
|
||||
|
||||
let files = parse_propfind_response(xml).unwrap();
|
||||
assert_eq!(files.len(), 1);
|
||||
|
||||
let file = &files[0];
|
||||
assert_eq!(file.name, "File with spaces.pdf");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_response() {
|
||||
let xml = r#"<?xml version="1.0"?>
|
||||
<d:multistatus xmlns:d="DAV:">
|
||||
</d:multistatus>"#;
|
||||
|
||||
let files = parse_propfind_response(xml).unwrap();
|
||||
assert_eq!(files.len(), 0);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue