feat(webdav): also set up deep scanning button and fix unit tests

This commit is contained in:
perf3ct 2025-07-03 04:24:26 +00:00
parent 4dbd89b81b
commit 915fe92993
No known key found for this signature in database
GPG Key ID: 569C4EEC436F5232
7 changed files with 974 additions and 99 deletions

View File

@ -68,6 +68,7 @@ import {
TextSnippet as DocumentIcon,
Visibility as OcrIcon,
Block as BlockIcon,
FindInPage as DeepScanIcon,
} from '@mui/icons-material';
import { useNavigate } from 'react-router-dom';
import api, { queueService } from '../services/api';
@ -151,6 +152,7 @@ const SourcesPage: React.FC = () => {
const [testingConnection, setTestingConnection] = useState(false);
const [syncingSource, setSyncingSource] = useState<string | null>(null);
const [stoppingSync, setStoppingSync] = useState<string | null>(null);
const [deepScanning, setDeepScanning] = useState<string | null>(null);
const [autoRefreshing, setAutoRefreshing] = useState(false);
useEffect(() => {
@ -488,6 +490,31 @@ const SourcesPage: React.FC = () => {
}
};
const handleDeepScan = async (sourceId: string) => {
setDeepScanning(sourceId);
try {
const response = await api.post(`/sources/${sourceId}/deep-scan`);
if (response.data.success) {
showSnackbar(response.data.message || 'Deep scan started successfully', 'success');
setTimeout(loadSources, 1000);
} else {
showSnackbar(response.data.message || 'Failed to start deep scan', 'error');
}
} catch (error: any) {
console.error('Failed to trigger deep scan:', error);
if (error.response?.status === 409) {
showSnackbar('Source is already syncing', 'warning');
} else if (error.response?.status === 404) {
showSnackbar('Source not found', 'error');
} else {
const message = error.response?.data?.message || 'Failed to start deep scan';
showSnackbar(message, 'error');
}
} finally {
setDeepScanning(null);
}
};
// Utility functions for folder management
const addFolder = () => {
if (newFolder && !formData.watch_folders.includes(newFolder)) {
@ -837,6 +864,25 @@ const SourcesPage: React.FC = () => {
</span>
</Tooltip>
)}
<Tooltip title="Deep Scan">
<span>
<IconButton
onClick={() => handleDeepScan(source.id)}
disabled={deepScanning === source.id || source.status === 'syncing' || !source.enabled}
sx={{
bgcolor: alpha(theme.palette.secondary.main, 0.1),
'&:hover': { bgcolor: alpha(theme.palette.secondary.main, 0.2) },
color: theme.palette.secondary.main,
}}
>
{deepScanning === source.id ? (
<CircularProgress size={20} />
) : (
<DeepScanIcon />
)}
</IconButton>
</span>
</Tooltip>
<Tooltip title="Edit Source">
<IconButton
onClick={() => handleEditSource(source)}

View File

@ -339,4 +339,16 @@ impl Database {
Ok(directories)
}
/// Clear all WebDAV directory tracking for a user (used for deep scan)
pub async fn clear_webdav_directories(&self, user_id: Uuid) -> Result<i64> {
let result = sqlx::query(
r#"DELETE FROM webdav_directories WHERE user_id = $1"#
)
.bind(user_id)
.execute(&self.pool)
.await?;
Ok(result.rows_affected() as i64)
}
}

View File

@ -1120,7 +1120,7 @@ impl From<Source> for SourceResponse {
}
}
#[derive(Debug, Serialize, Deserialize, ToSchema)]
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct WebDAVSourceConfig {
pub server_url: String,
pub username: String,

View File

@ -8,6 +8,7 @@ use axum::{
use std::sync::Arc;
use uuid::Uuid;
use tracing::{error, info};
use anyhow::Result;
use crate::{
auth::AuthUser,
@ -21,6 +22,7 @@ pub fn router() -> Router<Arc<AppState>> {
.route("/{id}", get(get_source).put(update_source).delete(delete_source))
.route("/{id}/sync", post(trigger_sync))
.route("/{id}/sync/stop", post(stop_sync))
.route("/{id}/deep-scan", post(trigger_deep_scan))
.route("/{id}/test", post(test_connection))
.route("/{id}/estimate", post(estimate_crawl))
.route("/estimate", post(estimate_crawl_with_config))
@ -389,6 +391,241 @@ async fn trigger_sync(
Ok(StatusCode::OK)
}
#[utoipa::path(
post,
path = "/api/sources/{id}/deep-scan",
tag = "sources",
security(
("bearer_auth" = [])
),
params(
("id" = Uuid, Path, description = "Source ID")
),
responses(
(status = 200, description = "Deep scan started successfully"),
(status = 401, description = "Unauthorized"),
(status = 404, description = "Source not found"),
(status = 409, description = "Source is already syncing"),
(status = 500, description = "Internal server error")
)
)]
async fn trigger_deep_scan(
auth_user: AuthUser,
Path(source_id): Path<Uuid>,
State(state): State<Arc<AppState>>,
) -> Result<Json<serde_json::Value>, StatusCode> {
info!("Starting deep scan for source {} by user {}", source_id, auth_user.user.username);
let source = state
.db
.get_source(auth_user.user.id, source_id)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
.ok_or(StatusCode::NOT_FOUND)?;
// Check if source is already syncing
if matches!(source.status, crate::models::SourceStatus::Syncing) {
return Ok(Json(serde_json::json!({
"success": false,
"error": "source_already_syncing",
"message": "Source is already syncing. Please wait for the current sync to complete before starting a deep scan."
})));
}
match source.source_type {
crate::models::SourceType::WebDAV => {
// Handle WebDAV deep scan
let config: crate::models::WebDAVSourceConfig = serde_json::from_value(source.config)
.map_err(|e| {
error!("Failed to parse WebDAV config for source {}: {}", source_id, e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
// Create WebDAV service
let webdav_config = crate::services::webdav_service::WebDAVConfig {
server_url: config.server_url.clone(),
username: config.username.clone(),
password: config.password.clone(),
watch_folders: config.watch_folders.clone(),
file_extensions: config.file_extensions.clone(),
timeout_seconds: 600, // 10 minutes for deep scan
server_type: config.server_type.clone(),
};
let webdav_service = crate::services::webdav_service::WebDAVService::new(webdav_config.clone())
.map_err(|e| {
error!("Failed to create WebDAV service for deep scan: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
// Update source status to syncing
state
.db
.update_source_status(
source_id,
crate::models::SourceStatus::Syncing,
Some("Deep scan in progress".to_string()),
)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
// Start deep scan in background
let state_clone = state.clone();
let user_id = auth_user.user.id;
let source_name = source.name.clone();
let source_id_clone = source_id;
let config_clone = config.clone();
tokio::spawn(async move {
let start_time = chrono::Utc::now();
// Clear existing directory tracking to force full rescan
if let Err(e) = state_clone.db.clear_webdav_directories(user_id).await {
error!("Failed to clear WebDAV directories for deep scan: {}", e);
}
// Use traditional discovery for deep scan to avoid borrowing issues
let mut all_discovered_files = Vec::new();
for folder in &config_clone.watch_folders {
match webdav_service.discover_files_in_folder(folder).await {
Ok(mut folder_files) => {
info!("Deep scan discovered {} files in folder {}", folder_files.len(), folder);
all_discovered_files.append(&mut folder_files);
}
Err(e) => {
error!("Deep scan failed to discover files in folder {}: {}", folder, e);
// Continue with other folders
}
}
}
if !all_discovered_files.is_empty() {
info!("Deep scan discovery completed for source {}: {} files found", source_id_clone, all_discovered_files.len());
// Filter files by extensions and process them
let files_to_process: Vec<_> = all_discovered_files.into_iter()
.filter(|file_info| {
if file_info.is_directory {
return false;
}
let file_extension = std::path::Path::new(&file_info.name)
.extension()
.and_then(|ext| ext.to_str())
.unwrap_or("")
.to_lowercase();
config_clone.file_extensions.contains(&file_extension)
})
.collect();
info!("Deep scan will process {} files for source {}", files_to_process.len(), source_id_clone);
// Process files using the existing sync mechanism
match crate::routes::webdav::webdav_sync::process_files_for_deep_scan(
state_clone.clone(),
user_id,
&webdav_service,
&files_to_process,
true, // enable background OCR
Some(source_id_clone)
).await {
Ok(files_processed) => {
let duration = chrono::Utc::now() - start_time;
info!("Deep scan completed for source {}: {} files processed in {:?}",
source_id_clone, files_processed, duration);
// Update source status to idle
if let Err(e) = state_clone.db.update_source_status(
source_id_clone,
crate::models::SourceStatus::Idle,
Some(format!("Deep scan completed: {} files processed", files_processed)),
).await {
error!("Failed to update source status after deep scan: {}", e);
}
// Send success notification
let notification = crate::models::CreateNotification {
notification_type: "success".to_string(),
title: "Deep Scan Completed".to_string(),
message: format!(
"Deep scan of {} completed successfully. {} files processed in {:.1} minutes.",
source_name,
files_processed,
duration.num_seconds() as f64 / 60.0
),
action_url: Some("/documents".to_string()),
metadata: Some(serde_json::json!({
"source_id": source_id_clone,
"scan_type": "deep_scan",
"files_processed": files_processed,
"duration_seconds": duration.num_seconds()
})),
};
if let Err(e) = state_clone.db.create_notification(user_id, &notification).await {
error!("Failed to create deep scan success notification: {}", e);
}
}
Err(e) => {
error!("Deep scan file processing failed for source {}: {}", source_id_clone, e);
// Update source status to error
if let Err(e2) = state_clone.db.update_source_status(
source_id_clone,
crate::models::SourceStatus::Error,
Some(format!("Deep scan failed: {}", e)),
).await {
error!("Failed to update source status after deep scan error: {}", e2);
}
// Send error notification
let notification = crate::models::CreateNotification {
notification_type: "error".to_string(),
title: "Deep Scan Failed".to_string(),
message: format!("Deep scan of {} failed: {}", source_name, e),
action_url: Some("/sources".to_string()),
metadata: Some(serde_json::json!({
"source_id": source_id_clone,
"scan_type": "deep_scan",
"error": e.to_string()
})),
};
if let Err(e) = state_clone.db.create_notification(user_id, &notification).await {
error!("Failed to create deep scan error notification: {}", e);
}
}
}
} else {
info!("Deep scan found no files for source {}", source_id_clone);
// Update source status to idle even if no files found
if let Err(e) = state_clone.db.update_source_status(
source_id_clone,
crate::models::SourceStatus::Idle,
Some("Deep scan completed: no files found".to_string()),
).await {
error!("Failed to update source status after empty deep scan: {}", e);
}
}
});
Ok(Json(serde_json::json!({
"success": true,
"message": format!("Deep scan started for source '{}'. This will perform a complete rescan of all configured folders.", source.name)
})))
}
_ => {
error!("Deep scan not supported for source type: {:?}", source.source_type);
Ok(Json(serde_json::json!({
"success": false,
"error": "unsupported_source_type",
"message": "Deep scan is currently only supported for WebDAV sources"
})))
}
}
}
#[utoipa::path(
post,
path = "/api/sources/{id}/sync/stop",

View File

@ -379,3 +379,68 @@ async fn process_single_file(
Ok(true) // Successfully processed
}
/// Process files for deep scan - similar to regular sync but forces processing
pub async fn process_files_for_deep_scan(
state: Arc<AppState>,
user_id: uuid::Uuid,
webdav_service: &WebDAVService,
files_to_process: &[crate::models::FileInfo],
enable_background_ocr: bool,
webdav_source_id: Option<uuid::Uuid>,
) -> Result<usize, anyhow::Error> {
info!("Processing {} files for deep scan", files_to_process.len());
let concurrent_limit = 5; // Max 5 concurrent downloads
let semaphore = Arc::new(Semaphore::new(concurrent_limit));
let mut files_processed = 0;
let mut sync_errors = Vec::new();
// Create futures for processing each file concurrently
let mut file_futures = FuturesUnordered::new();
for file_info in files_to_process.iter() {
let state_clone = state.clone();
let webdav_service_clone = webdav_service.clone();
let file_info_clone = file_info.clone();
let semaphore_clone = semaphore.clone();
// Create a future for processing this file
let future = async move {
process_single_file(
state_clone,
user_id,
&webdav_service_clone,
&file_info_clone,
enable_background_ocr,
semaphore_clone,
webdav_source_id,
).await
};
file_futures.push(future);
}
// Process files concurrently and collect results
while let Some(result) = file_futures.next().await {
match result {
Ok(processed) => {
if processed {
files_processed += 1;
info!("Deep scan: Successfully processed file ({} completed)", files_processed);
}
}
Err(error) => {
error!("Deep scan file processing error: {}", error);
sync_errors.push(error);
}
}
}
if !sync_errors.is_empty() {
warn!("Deep scan completed with {} errors: {:?}", sync_errors.len(), sync_errors);
}
info!("Deep scan file processing completed: {} files processed successfully", files_processed);
Ok(files_processed)
}

View File

@ -4,6 +4,8 @@ use reqwest::{Client, Method, Url};
use std::collections::HashSet;
use std::time::Duration;
use tokio::time::sleep;
use tokio::sync::Semaphore;
use futures_util::stream::{self, StreamExt};
use tracing::{debug, error, info, warn};
use crate::models::{
@ -30,6 +32,14 @@ pub struct RetryConfig {
pub max_delay_ms: u64,
pub backoff_multiplier: f64,
pub timeout_seconds: u64,
pub rate_limit_backoff_ms: u64, // Additional backoff for 429 responses
}
#[derive(Debug, Clone)]
pub struct ConcurrencyConfig {
pub max_concurrent_scans: usize,
pub max_concurrent_downloads: usize,
pub adaptive_rate_limiting: bool,
}
impl Default for RetryConfig {
@ -40,6 +50,17 @@ impl Default for RetryConfig {
max_delay_ms: 30000, // 30 seconds
backoff_multiplier: 2.0,
timeout_seconds: 300, // 5 minutes total timeout for crawl operations
rate_limit_backoff_ms: 5000, // 5 seconds extra for rate limits
}
}
}
impl Default for ConcurrencyConfig {
fn default() -> Self {
Self {
max_concurrent_scans: 10,
max_concurrent_downloads: 5,
adaptive_rate_limiting: true,
}
}
}
@ -52,14 +73,19 @@ pub struct WebDAVService {
config: WebDAVConfig,
base_webdav_url: String,
retry_config: RetryConfig,
concurrency_config: ConcurrencyConfig,
}
impl WebDAVService {
pub fn new(config: WebDAVConfig) -> Result<Self> {
Self::new_with_retry(config, RetryConfig::default())
Self::new_with_configs(config, RetryConfig::default(), ConcurrencyConfig::default())
}
pub fn new_with_retry(config: WebDAVConfig, retry_config: RetryConfig) -> Result<Self> {
Self::new_with_configs(config, retry_config, ConcurrencyConfig::default())
}
pub fn new_with_configs(config: WebDAVConfig, retry_config: RetryConfig, concurrency_config: ConcurrencyConfig) -> Result<Self> {
let client = Client::builder()
.timeout(Duration::from_secs(config.timeout_seconds))
.build()?;
@ -103,7 +129,7 @@ impl WebDAVService {
config.server_url.trim_end_matches('/'),
config.username
);
info!("🔗 Constructed Nextcloud/ownCloud WebDAV URL: {}", url);
debug!("🔗 Constructed Nextcloud/ownCloud WebDAV URL: {}", url);
url
},
_ => {
@ -111,7 +137,7 @@ impl WebDAVService {
"{}/webdav",
config.server_url.trim_end_matches('/')
);
info!("🔗 Constructed generic WebDAV URL: {}", url);
debug!("🔗 Constructed generic WebDAV URL: {}", url);
url
},
};
@ -121,6 +147,7 @@ impl WebDAVService {
config,
base_webdav_url,
retry_config,
concurrency_config,
})
}
@ -154,10 +181,19 @@ impl WebDAVService {
return Err(err);
}
warn!("{} failed (attempt {}), retrying in {}ms: {}",
operation_name, attempt, delay, err);
// Apply adaptive backoff for rate limiting
let actual_delay = if Self::is_rate_limit_error(&err) && self.concurrency_config.adaptive_rate_limiting {
let rate_limit_delay = delay + self.retry_config.rate_limit_backoff_ms;
warn!("{} rate limited (attempt {}), retrying in {}ms with extra backoff: {}",
operation_name, attempt, rate_limit_delay, err);
rate_limit_delay
} else {
warn!("{} failed (attempt {}), retrying in {}ms: {}",
operation_name, attempt, delay, err);
delay
};
sleep(Duration::from_millis(delay)).await;
sleep(Duration::from_millis(actual_delay)).await;
// Calculate next delay with exponential backoff
delay = ((delay as f64 * self.retry_config.backoff_multiplier) as u64)
@ -175,7 +211,13 @@ impl WebDAVService {
|| reqwest_error.is_connect()
|| reqwest_error.is_request()
|| reqwest_error.status()
.map(|s| s.is_server_error() || s == 429) // 429 = Too Many Requests
.map(|s| {
s.is_server_error() // 5xx errors (including server restart scenarios)
|| s == 429 // Too Many Requests
|| s == 502 // Bad Gateway (server restarting)
|| s == 503 // Service Unavailable (server restarting/overloaded)
|| s == 504 // Gateway Timeout (server slow to respond)
})
.unwrap_or(true);
}
@ -185,6 +227,44 @@ impl WebDAVService {
|| error_str.contains("connection")
|| error_str.contains("network")
|| error_str.contains("temporary")
|| error_str.contains("rate limit")
|| error_str.contains("too many requests")
|| error_str.contains("connection reset")
|| error_str.contains("connection aborted")
|| error_str.contains("server unavailable")
|| error_str.contains("bad gateway")
|| error_str.contains("service unavailable")
}
fn is_rate_limit_error(error: &anyhow::Error) -> bool {
if let Some(reqwest_error) = error.downcast_ref::<reqwest::Error>() {
return reqwest_error.status()
.map(|s| s == 429)
.unwrap_or(false);
}
let error_str = error.to_string().to_lowercase();
error_str.contains("rate limit") || error_str.contains("too many requests")
}
fn is_server_restart_error(&self, error: &anyhow::Error) -> bool {
if let Some(reqwest_error) = error.downcast_ref::<reqwest::Error>() {
if let Some(status) = reqwest_error.status() {
return status == 502 // Bad Gateway
|| status == 503 // Service Unavailable
|| status == 504; // Gateway Timeout
}
// Network-level connection issues often indicate server restart
return reqwest_error.is_connect() || reqwest_error.is_timeout();
}
let error_str = error.to_string().to_lowercase();
error_str.contains("connection reset")
|| error_str.contains("connection aborted")
|| error_str.contains("bad gateway")
|| error_str.contains("service unavailable")
|| error_str.contains("server unreachable")
}
pub async fn test_connection(&self, test_config: WebDAVTestConnection) -> Result<WebDAVConnectionResult> {
@ -243,7 +323,7 @@ impl WebDAVService {
),
};
info!("🔗 Constructed test URL: {}", test_url);
debug!("🔗 Constructed test URL: {}", test_url);
let resp = self.client
.request(Method::from_bytes(b"PROPFIND").unwrap(), &test_url)
@ -333,7 +413,7 @@ impl WebDAVService {
.collect();
for folder_path in folders {
info!("Analyzing folder: {}", folder_path);
debug!("Analyzing folder: {}", folder_path);
match self.analyze_folder(folder_path, &supported_extensions).await {
Ok(folder_info) => {
@ -418,16 +498,16 @@ impl WebDAVService {
/// Optimized discovery that checks directory ETag first to avoid unnecessary deep scans
pub async fn discover_files_in_folder_optimized(&self, folder_path: &str, user_id: uuid::Uuid, state: &crate::AppState) -> Result<Vec<FileInfo>> {
info!("🔍 Starting optimized discovery for folder: {}", folder_path);
debug!("🔍 Starting optimized discovery for folder: {}", folder_path);
// Check if we should use smart scanning
let use_smart_scan = match self.config.server_type.as_deref() {
Some("nextcloud") | Some("owncloud") => {
info!("🚀 Using smart scanning for Nextcloud/ownCloud server");
debug!("🚀 Using smart scanning for Nextcloud/ownCloud server");
true
}
_ => {
info!("📁 Using traditional scanning for generic WebDAV server");
debug!("📁 Using traditional scanning for generic WebDAV server");
false
}
};
@ -461,7 +541,7 @@ impl WebDAVService {
match state.db.get_webdav_directory(user_id, folder_path).await {
Ok(Some(stored_dir)) => {
if stored_dir.directory_etag == current_dir_etag {
info!("✅ Directory {} unchanged (ETag: {}), checking subdirectories individually", folder_path, current_dir_etag);
debug!("✅ Directory {} unchanged (ETag: {}), checking subdirectories individually", folder_path, current_dir_etag);
// Update last_scanned_at to show we checked
let update = crate::models::UpdateWebDAVDirectory {
@ -479,12 +559,12 @@ impl WebDAVService {
let changed_files = self.check_subdirectories_for_changes(folder_path, user_id, state).await?;
return Ok(changed_files);
} else {
info!("🔄 Directory {} changed (old ETag: {}, new ETag: {}), performing deep scan",
debug!("🔄 Directory {} changed (old ETag: {}, new ETag: {}), performing deep scan",
folder_path, stored_dir.directory_etag, current_dir_etag);
}
}
Ok(None) => {
info!("🆕 New directory {}, performing initial scan", folder_path);
debug!("🆕 New directory {}, performing initial scan", folder_path);
}
Err(e) => {
warn!("Database error checking directory {}: {}, proceeding with scan", folder_path, e);
@ -509,7 +589,7 @@ impl WebDAVService {
if let Err(e) = state.db.create_or_update_webdav_directory(&directory_record).await {
error!("Failed to update directory tracking for {}: {}", folder_path, e);
} else {
info!("📊 Updated directory tracking: {} files, {} bytes, ETag: {}",
debug!("📊 Updated directory tracking: {} files, {} bytes, ETag: {}",
file_count, total_size_bytes, current_dir_etag);
}
@ -549,7 +629,7 @@ impl WebDAVService {
}
}
info!("🗂️ Found {} unique directories at all levels", all_directories.len());
debug!("🗂️ Found {} unique directories at all levels", all_directories.len());
// Step 2: Create a mapping of directory -> ETag from the files list
let mut directory_etags: HashMap<String, String> = HashMap::new();
@ -608,7 +688,7 @@ impl WebDAVService {
}
}
info!("✅ Completed tracking {} directories at all depth levels", all_directories.len());
debug!("✅ Completed tracking {} directories at all depth levels", all_directories.len());
}
/// Check if a path is a direct child of a directory (not nested deeper)
@ -643,12 +723,12 @@ impl WebDAVService {
/// Perform targeted re-scanning of only specific paths that have changed
pub async fn discover_files_targeted_rescan(&self, paths_to_scan: &[String], user_id: uuid::Uuid, state: &crate::AppState) -> Result<Vec<FileInfo>> {
info!("🎯 Starting targeted re-scan for {} specific paths", paths_to_scan.len());
debug!("🎯 Starting targeted re-scan for {} specific paths", paths_to_scan.len());
let mut all_files = Vec::new();
for path in paths_to_scan {
info!("🔍 Targeted scan of: {}", path);
debug!("🔍 Targeted scan of: {}", path);
// Check if this specific path has changed
match self.check_directory_etag(path).await {
@ -657,7 +737,7 @@ impl WebDAVService {
let needs_scan = match state.db.get_webdav_directory(user_id, path).await {
Ok(Some(stored_dir)) => {
if stored_dir.directory_etag != current_etag {
info!("🔄 Path {} changed (old: {}, new: {})", path, stored_dir.directory_etag, current_etag);
debug!("🔄 Path {} changed (old: {}, new: {})", path, stored_dir.directory_etag, current_etag);
true
} else {
debug!("✅ Path {} unchanged (ETag: {})", path, current_etag);
@ -665,7 +745,7 @@ impl WebDAVService {
}
}
Ok(None) => {
info!("🆕 New path {} detected", path);
debug!("🆕 New path {} detected", path);
true
}
Err(e) => {
@ -678,7 +758,7 @@ impl WebDAVService {
// Use shallow scan for this specific directory only
match self.discover_files_in_folder_shallow(path).await {
Ok(mut path_files) => {
info!("📂 Found {} files in changed path {}", path_files.len(), path);
debug!("📂 Found {} files in changed path {}", path_files.len(), path);
all_files.append(&mut path_files);
// Update tracking for this specific path
@ -696,7 +776,7 @@ impl WebDAVService {
}
}
info!("🎯 Targeted re-scan completed: {} total files found", all_files.len());
debug!("🎯 Targeted re-scan completed: {} total files found", all_files.len());
Ok(all_files)
}
@ -760,7 +840,7 @@ impl WebDAVService {
match state.db.create_or_update_webdav_directory(&directory_record).await {
Ok(_) => {
info!("📊 Updated single directory tracking: {} ({} files, {} bytes, ETag: {})",
debug!("📊 Updated single directory tracking: {} ({} files, {} bytes, ETag: {})",
directory_path, file_count, total_size_bytes, dir_etag);
}
Err(e) => {
@ -780,7 +860,7 @@ impl WebDAVService {
.map(|dir| dir.directory_path.clone())
.collect();
info!("🕒 Found {} directories not scanned in last {} hours", stale_dirs.len(), max_age_hours);
debug!("🕒 Found {} directories not scanned in last {} hours", stale_dirs.len(), max_age_hours);
Ok(stale_dirs)
}
Err(e) => {
@ -792,12 +872,12 @@ impl WebDAVService {
/// Smart sync mode that combines multiple optimization strategies
pub async fn discover_files_smart_sync(&self, watch_folders: &[String], user_id: uuid::Uuid, state: &crate::AppState) -> Result<Vec<FileInfo>> {
info!("🧠 Starting smart sync for {} watch folders", watch_folders.len());
debug!("🧠 Starting smart sync for {} watch folders", watch_folders.len());
let mut all_files = Vec::new();
for folder_path in watch_folders {
info!("🔍 Smart sync processing folder: {}", folder_path);
debug!("🔍 Smart sync processing folder: {}", folder_path);
// Step 1: Try optimized discovery first (checks directory ETag)
let optimized_result = self.discover_files_in_folder_optimized(folder_path, user_id, state).await;
@ -805,20 +885,20 @@ impl WebDAVService {
match optimized_result {
Ok(files) => {
if !files.is_empty() {
info!("✅ Optimized discovery found {} files in {}", files.len(), folder_path);
debug!("✅ Optimized discovery found {} files in {}", files.len(), folder_path);
all_files.extend(files);
} else {
info!("🔍 Directory {} unchanged, checking for stale subdirectories", folder_path);
debug!("🔍 Directory {} unchanged, checking for stale subdirectories", folder_path);
// Step 2: Check for stale subdirectories that need targeted scanning
let stale_dirs = self.get_stale_subdirectories(folder_path, user_id, state, 24).await?;
if !stale_dirs.is_empty() {
info!("🎯 Found {} stale subdirectories, performing targeted scan", stale_dirs.len());
debug!("🎯 Found {} stale subdirectories, performing targeted scan", stale_dirs.len());
let targeted_files = self.discover_files_targeted_rescan(&stale_dirs, user_id, state).await?;
all_files.extend(targeted_files);
} else {
info!("✅ All subdirectories of {} are fresh, no scan needed", folder_path);
debug!("✅ All subdirectories of {} are fresh, no scan needed", folder_path);
}
}
}
@ -827,7 +907,7 @@ impl WebDAVService {
// Fallback to traditional full scan
match self.discover_files_in_folder(folder_path).await {
Ok(files) => {
info!("📂 Fallback scan found {} files in {}", files.len(), folder_path);
debug!("📂 Fallback scan found {} files in {}", files.len(), folder_path);
all_files.extend(files);
}
Err(fallback_error) => {
@ -839,7 +919,7 @@ impl WebDAVService {
}
}
info!("🧠 Smart sync completed: {} total files discovered", all_files.len());
debug!("🧠 Smart sync completed: {} total files discovered", all_files.len());
Ok(all_files)
}
@ -871,7 +951,7 @@ impl WebDAVService {
/// Perform incremental sync - only scan directories that have actually changed
pub async fn discover_files_incremental(&self, watch_folders: &[String], user_id: uuid::Uuid, state: &crate::AppState) -> Result<Vec<FileInfo>> {
info!("⚡ Starting incremental sync for {} watch folders", watch_folders.len());
debug!("⚡ Starting incremental sync for {} watch folders", watch_folders.len());
let mut changed_files = Vec::new();
let mut unchanged_count = 0;
@ -884,7 +964,7 @@ impl WebDAVService {
let needs_scan = match state.db.get_webdav_directory(user_id, folder_path).await {
Ok(Some(stored_dir)) => {
if stored_dir.directory_etag != current_etag {
info!("🔄 Directory {} changed (ETag: {} → {})", folder_path, stored_dir.directory_etag, current_etag);
debug!("🔄 Directory {} changed (ETag: {} → {})", folder_path, stored_dir.directory_etag, current_etag);
changed_count += 1;
true
} else {
@ -894,7 +974,7 @@ impl WebDAVService {
}
}
Ok(None) => {
info!("🆕 New directory {} detected", folder_path);
debug!("🆕 New directory {} detected", folder_path);
changed_count += 1;
true
}
@ -909,7 +989,7 @@ impl WebDAVService {
// Directory changed - perform targeted scan
match self.discover_files_in_folder_optimized(folder_path, user_id, state).await {
Ok(mut files) => {
info!("📂 Incremental scan found {} files in changed directory {}", files.len(), folder_path);
debug!("📂 Incremental scan found {} files in changed directory {}", files.len(), folder_path);
changed_files.append(&mut files);
}
Err(e) => {
@ -936,7 +1016,7 @@ impl WebDAVService {
}
}
info!("⚡ Incremental sync completed: {} unchanged, {} changed, {} total files found",
debug!("⚡ Incremental sync completed: {} unchanged, {} changed, {} total files found",
unchanged_count, changed_count, changed_files.len());
Ok(changed_files)
@ -952,12 +1032,12 @@ impl WebDAVService {
if supports_recursive_etags {
// With recursive ETags, if parent hasn't changed, nothing inside has changed
info!("🚀 Server supports recursive ETags - parent {} unchanged means all contents unchanged", parent_path);
debug!("🚀 Server supports recursive ETags - parent {} unchanged means all contents unchanged", parent_path);
return Ok(Vec::new());
}
// For servers without recursive ETags, fall back to checking each subdirectory
info!("📁 Server doesn't support recursive ETags, checking subdirectories individually");
debug!("📁 Server doesn't support recursive ETags, checking subdirectories individually");
// Get all known subdirectories from database
let known_directories = match state.db.list_webdav_directories(user_id).await {
@ -974,11 +1054,11 @@ impl WebDAVService {
.collect();
if subdirectories.is_empty() {
info!("📁 No known subdirectories for {}, performing initial scan to discover structure", parent_path);
debug!("📁 No known subdirectories for {}, performing initial scan to discover structure", parent_path);
return self.discover_files_in_folder_impl(parent_path).await;
}
info!("🔍 Checking {} known subdirectories for changes", subdirectories.len());
debug!("🔍 Checking {} known subdirectories for changes", subdirectories.len());
let mut changed_files = Vec::new();
let subdirectory_count = subdirectories.len();
@ -991,13 +1071,13 @@ impl WebDAVService {
match self.check_directory_etag(subdir_path).await {
Ok(current_etag) => {
if current_etag != subdir.directory_etag {
info!("🔄 Subdirectory {} changed (old: {}, new: {}), scanning recursively",
debug!("🔄 Subdirectory {} changed (old: {}, new: {}), scanning recursively",
subdir_path, subdir.directory_etag, current_etag);
// This subdirectory changed - get all its files recursively
match self.discover_files_in_folder_impl(subdir_path).await {
Ok(mut subdir_files) => {
info!("📂 Found {} files in changed subdirectory {}", subdir_files.len(), subdir_path);
debug!("📂 Found {} files in changed subdirectory {}", subdir_files.len(), subdir_path);
changed_files.append(&mut subdir_files);
// Update tracking for this subdirectory and its children
@ -1030,7 +1110,7 @@ impl WebDAVService {
}
}
info!("🎯 Found {} changed files across {} subdirectories", changed_files.len(), subdirectory_count);
debug!("🎯 Found {} changed files across {} subdirectories", changed_files.len(), subdirectory_count);
Ok(changed_files)
}
@ -1160,7 +1240,7 @@ impl WebDAVService {
/// (i.e., parent directory ETags change when child content changes)
/// This test is read-only and checks existing directory structures
pub async fn test_recursive_etag_support(&self) -> Result<bool> {
info!("🔬 Testing recursive ETag support using existing directory structure");
debug!("🔬 Testing recursive ETag support using existing directory structure");
// Find a directory with subdirectories from our watch folders
for watch_folder in &self.config.watch_folders {
@ -1178,7 +1258,7 @@ impl WebDAVService {
// Use the first subdirectory for testing
let test_subdir = &subdirs[0];
info!("Testing with directory: {} and subdirectory: {}", watch_folder, test_subdir.path);
debug!("Testing with directory: {} and subdirectory: {}", watch_folder, test_subdir.path);
// Step 1: Get parent directory ETag
let parent_etag = self.check_directory_etag(watch_folder).await?;
@ -1193,19 +1273,19 @@ impl WebDAVService {
// For now, we'll just check if the server provides ETags at all
if !parent_etag.is_empty() && !subdir_etag.is_empty() {
info!("✅ Server provides ETags for directories");
info!(" Parent ETag: {}", parent_etag);
info!(" Subdir ETag: {}", subdir_etag);
debug!("✅ Server provides ETags for directories");
debug!(" Parent ETag: {}", parent_etag);
debug!(" Subdir ETag: {}", subdir_etag);
// Without write access, we can't definitively test recursive propagation
// But we can make an educated guess based on the server type
let likely_supports_recursive = match self.config.server_type.as_deref() {
Some("nextcloud") | Some("owncloud") => {
info!(" Nextcloud/ownCloud servers typically support recursive ETags");
debug!(" Nextcloud/ownCloud servers typically support recursive ETags");
true
}
_ => {
info!(" Unknown server type - recursive ETag support uncertain");
debug!(" Unknown server type - recursive ETag support uncertain");
false
}
};
@ -1220,10 +1300,29 @@ impl WebDAVService {
}
}
info!("❓ Could not determine recursive ETag support - no suitable directories found");
debug!("❓ Could not determine recursive ETag support - no suitable directories found");
Ok(false)
}
/// Convert full WebDAV path to relative path for use with base_webdav_url
pub fn convert_to_relative_path(&self, full_webdav_path: &str) -> String {
// For Nextcloud/ownCloud paths like "/remote.php/dav/files/username/folder/subfolder/"
// We need to extract just the "folder/subfolder/" part
let webdav_prefix = match self.config.server_type.as_deref() {
Some("nextcloud") | Some("owncloud") => {
format!("/remote.php/dav/files/{}/", self.config.username)
},
_ => "/webdav/".to_string()
};
if let Some(relative_part) = full_webdav_path.strip_prefix(&webdav_prefix) {
format!("/{}", relative_part)
} else {
// If path doesn't match expected format, return as-is
full_webdav_path.to_string()
}
}
/// Smart directory scan that uses depth-1 traversal for efficient synchronization
/// Only scans directories whose ETags have changed, avoiding unnecessary deep scans
pub fn smart_directory_scan<'a>(
@ -1234,14 +1333,18 @@ impl WebDAVService {
state: &'a crate::AppState
) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<Vec<FileInfo>>> + Send + 'a>> {
Box::pin(async move {
info!("🧠 Smart scan starting for path: {}", path);
debug!("🧠 Smart scan starting for path: {}", path);
// Convert full WebDAV path to relative path for existing functions
let relative_path = self.convert_to_relative_path(path);
debug!("🔄 Converted {} to relative path: {}", path, relative_path);
// Step 1: Check current directory ETag
let current_etag = match self.check_directory_etag(path).await {
let current_etag = match self.check_directory_etag(&relative_path).await {
Ok(etag) => etag,
Err(e) => {
warn!("Failed to get directory ETag for {}, falling back to full scan: {}", path, e);
return self.discover_files_in_folder_impl(path).await;
return self.discover_files_in_folder_impl(&relative_path).await;
}
};
@ -1253,17 +1356,17 @@ impl WebDAVService {
};
if supports_recursive {
info!("✅ Directory {} unchanged (recursive ETag: {}), skipping scan", path, current_etag);
debug!("✅ Directory {} unchanged (recursive ETag: {}), skipping scan", path, current_etag);
return Ok(Vec::new());
} else {
info!("📁 Directory {} ETag unchanged but server doesn't support recursive ETags, checking subdirectories", path);
debug!("📁 Directory {} ETag unchanged but server doesn't support recursive ETags, checking subdirectories", path);
}
} else {
info!("🔄 Directory {} changed (old: {:?}, new: {})", path, known_etag, current_etag);
debug!("🔄 Directory {} changed (old: {:?}, new: {})", path, known_etag, current_etag);
}
// Step 3: Directory changed or we need to check subdirectories - do depth-1 scan
let entries = match self.discover_files_in_folder_shallow(path).await {
let entries = match self.discover_files_in_folder_shallow(&relative_path).await {
Ok(files) => files,
Err(e) => {
error!("Failed shallow scan of {}: {}", path, e);
@ -1301,57 +1404,215 @@ impl WebDAVService {
warn!("Failed to update directory tracking for {}: {}", path, e);
}
// Step 4: For each subdirectory, check if it needs scanning
for subdir in subdirs_to_scan {
// Get stored ETag for this subdirectory
let stored_etag = match state.db.get_webdav_directory(user_id, &subdir.path).await {
Ok(Some(dir)) => Some(dir.directory_etag),
Ok(None) => {
info!("🆕 New subdirectory discovered: {}", subdir.path);
None
}
Err(e) => {
warn!("Database error checking subdirectory {}: {}", subdir.path, e);
None
}
};
// Step 4: Process subdirectories concurrently with controlled parallelism
if !subdirs_to_scan.is_empty() {
let semaphore = std::sync::Arc::new(Semaphore::new(self.concurrency_config.max_concurrent_scans));
let subdirs_stream = stream::iter(subdirs_to_scan)
.map(|subdir| {
let semaphore = semaphore.clone();
let service = self.clone();
async move {
let _permit = semaphore.acquire().await.map_err(|e| anyhow!("Semaphore error: {}", e))?;
// Get stored ETag for this subdirectory
let stored_etag = match state.db.get_webdav_directory(user_id, &subdir.path).await {
Ok(Some(dir)) => Some(dir.directory_etag),
Ok(None) => {
debug!("🆕 New subdirectory discovered: {}", subdir.path);
None
}
Err(e) => {
warn!("Database error checking subdirectory {}: {}", subdir.path, e);
None
}
};
// If ETag changed or new directory, scan it recursively
if stored_etag.as_deref() != Some(&subdir.etag) {
debug!("🔄 Subdirectory {} needs scanning (old: {:?}, new: {})",
subdir.path, stored_etag, subdir.etag);
match service.smart_directory_scan(&subdir.path, stored_etag.as_deref(), user_id, state).await {
Ok(subdir_files) => {
debug!("📂 Found {} entries in subdirectory {}", subdir_files.len(), subdir.path);
Result::<Vec<FileInfo>, anyhow::Error>::Ok(subdir_files)
}
Err(e) => {
error!("Failed to scan subdirectory {}: {}", subdir.path, e);
Result::<Vec<FileInfo>, anyhow::Error>::Ok(Vec::new()) // Continue with other subdirectories
}
}
} else {
debug!("✅ Subdirectory {} unchanged (ETag: {})", subdir.path, subdir.etag);
// Update last_scanned_at
let update = crate::models::UpdateWebDAVDirectory {
directory_etag: subdir.etag.clone(),
last_scanned_at: chrono::Utc::now(),
file_count: 0, // Will be preserved by database
total_size_bytes: 0,
};
if let Err(e) = state.db.update_webdav_directory(user_id, &subdir.path, &update).await {
warn!("Failed to update scan time for {}: {}", subdir.path, e);
}
Result::<Vec<FileInfo>, anyhow::Error>::Ok(Vec::new())
}
}
})
.buffer_unordered(self.concurrency_config.max_concurrent_scans);
// If ETag changed or new directory, scan it recursively
if stored_etag.as_deref() != Some(&subdir.etag) {
info!("🔄 Subdirectory {} needs scanning (old: {:?}, new: {})",
subdir.path, stored_etag, subdir.etag);
match self.smart_directory_scan(&subdir.path, stored_etag.as_deref(), user_id, state).await {
// Collect all results concurrently
let mut subdirs_stream = std::pin::pin!(subdirs_stream);
while let Some(result) = subdirs_stream.next().await {
match result {
Ok(mut subdir_files) => {
info!("📂 Found {} entries in subdirectory {}", subdir_files.len(), subdir.path);
all_files.append(&mut subdir_files);
}
Err(e) => {
error!("Failed to scan subdirectory {}: {}", subdir.path, e);
// Continue with other subdirectories
warn!("Concurrent subdirectory scan error: {}", e);
// Continue processing other subdirectories
}
}
} else {
debug!("✅ Subdirectory {} unchanged (ETag: {})", subdir.path, subdir.etag);
// Update last_scanned_at
let update = crate::models::UpdateWebDAVDirectory {
directory_etag: subdir.etag.clone(),
last_scanned_at: chrono::Utc::now(),
file_count: 0, // Will be preserved by database
total_size_bytes: 0,
};
if let Err(e) = state.db.update_webdav_directory(user_id, &subdir.path, &update).await {
warn!("Failed to update scan time for {}: {}", subdir.path, e);
}
}
}
info!("🧠 Smart scan completed for {}: {} total entries found", path, all_files.len());
debug!("🧠 Smart scan completed for {}: {} total entries found", path, all_files.len());
Ok(all_files)
})
}
/// Resume a deep scan from a checkpoint after server restart/interruption
pub async fn resume_deep_scan(&self, checkpoint_path: &str, user_id: uuid::Uuid, state: &crate::AppState) -> Result<Vec<FileInfo>> {
info!("🔄 Resuming deep scan from checkpoint: {}", checkpoint_path);
// Check if the checkpoint directory is still accessible
match self.check_directory_etag(checkpoint_path).await {
Ok(current_etag) => {
info!("✅ Checkpoint directory accessible, resuming scan");
// Check if directory changed since checkpoint
match state.db.get_webdav_directory(user_id, checkpoint_path).await {
Ok(Some(stored_dir)) => {
if stored_dir.directory_etag != current_etag {
info!("🔄 Directory changed since checkpoint, performing full rescan");
} else {
info!("✅ Directory unchanged since checkpoint, can skip");
return Ok(Vec::new());
}
}
Ok(None) => {
info!("🆕 New checkpoint directory, performing full scan");
}
Err(e) => {
warn!("Database error checking checkpoint {}: {}, performing full scan", checkpoint_path, e);
}
}
// Resume with smart scanning from this point
self.discover_files_in_folder_optimized(checkpoint_path, user_id, state).await
}
Err(e) => {
warn!("Checkpoint directory {} inaccessible after restart: {}", checkpoint_path, e);
// Server might have restarted, wait a bit and retry
tokio::time::sleep(Duration::from_secs(5)).await;
match self.check_directory_etag(checkpoint_path).await {
Ok(_) => {
info!("🔄 Server recovered, resuming scan");
self.discover_files_in_folder_optimized(checkpoint_path, user_id, state).await
}
Err(e2) => {
error!("Failed to resume deep scan after server restart: {}", e2);
Err(anyhow!("Cannot resume deep scan: server unreachable after restart"))
}
}
}
}
}
/// Discover files in multiple folders concurrently with rate limiting
pub async fn discover_files_concurrent(&self, folders: &[String], user_id: uuid::Uuid, state: &crate::AppState) -> Result<Vec<FileInfo>> {
if folders.is_empty() {
return Ok(Vec::new());
}
info!("🚀 Starting concurrent discovery for {} folders", folders.len());
let semaphore = std::sync::Arc::new(Semaphore::new(self.concurrency_config.max_concurrent_scans));
let folders_stream = stream::iter(folders.iter())
.map(|folder_path| {
let semaphore = semaphore.clone();
let service = self.clone();
let folder_path = folder_path.clone();
async move {
let _permit = semaphore.acquire().await.map_err(|e| anyhow!("Semaphore error: {}", e))?;
info!("📂 Scanning folder: {}", folder_path);
let start_time = std::time::Instant::now();
// Save checkpoint for resumption after interruption
let checkpoint_record = crate::models::CreateWebDAVDirectory {
user_id,
directory_path: folder_path.clone(),
directory_etag: "scanning".to_string(), // Temporary marker
file_count: 0,
total_size_bytes: 0,
};
if let Err(e) = state.db.create_or_update_webdav_directory(&checkpoint_record).await {
warn!("Failed to save scan checkpoint for {}: {}", folder_path, e);
}
let result = service.discover_files_in_folder_optimized(&folder_path, user_id, state).await;
match &result {
Ok(files) => {
let duration = start_time.elapsed();
info!("✅ Completed folder {} in {:?}: {} files found",
folder_path, duration, files.len());
}
Err(e) => {
// Check if this was a server restart/connection issue
if service.is_server_restart_error(e) {
warn!("🔄 Server restart detected during scan of {}, will resume later", folder_path);
// Keep checkpoint for resumption
return Err(anyhow!("Server restart detected: {}", e));
} else {
error!("❌ Failed to scan folder {}: {}", folder_path, e);
}
}
}
result.map(|files| (folder_path, files))
}
})
.buffer_unordered(self.concurrency_config.max_concurrent_scans);
let mut all_files = Vec::new();
let mut success_count = 0;
let mut error_count = 0;
let mut folders_stream = std::pin::pin!(folders_stream);
while let Some(result) = folders_stream.next().await {
match result {
Ok((folder_path, mut files)) => {
debug!("📁 Folder {} contributed {} files", folder_path, files.len());
all_files.append(&mut files);
success_count += 1;
}
Err(e) => {
warn!("Folder scan error: {}", e);
error_count += 1;
}
}
}
info!("🎯 Concurrent discovery completed: {} folders successful, {} failed, {} total files",
success_count, error_count, all_files.len());
Ok(all_files)
}
pub async fn download_file(&self, file_path: &str) -> Result<Vec<u8>> {
self.retry_with_backoff("download_file", || {
self.download_file_impl(file_path)

View File

@ -0,0 +1,254 @@
use readur::services::webdav_service::{WebDAVService, WebDAVConfig};
// Helper function to create test WebDAV service for Nextcloud
fn create_nextcloud_webdav_service() -> WebDAVService {
let config = WebDAVConfig {
server_url: "https://nas.example.com".to_string(),
username: "testuser".to_string(),
password: "testpass".to_string(),
watch_folders: vec!["/Documents".to_string()],
file_extensions: vec!["pdf".to_string(), "txt".to_string()],
timeout_seconds: 30,
server_type: Some("nextcloud".to_string()),
};
WebDAVService::new(config).unwrap()
}
// Helper function to create test WebDAV service for generic servers
fn create_generic_webdav_service() -> WebDAVService {
let config = WebDAVConfig {
server_url: "https://webdav.example.com".to_string(),
username: "testuser".to_string(),
password: "testpass".to_string(),
watch_folders: vec!["/Documents".to_string()],
file_extensions: vec!["pdf".to_string(), "txt".to_string()],
timeout_seconds: 30,
server_type: Some("generic".to_string()),
};
WebDAVService::new(config).unwrap()
}
#[tokio::test]
async fn test_nextcloud_path_conversion_basic() {
let service = create_nextcloud_webdav_service();
// Test basic path conversion
let full_webdav_path = "/remote.php/dav/files/testuser/Documents/";
let relative_path = service.convert_to_relative_path(full_webdav_path);
assert_eq!(relative_path, "/Documents/");
}
#[tokio::test]
async fn test_nextcloud_path_conversion_nested() {
let service = create_nextcloud_webdav_service();
// Test nested path conversion
let full_webdav_path = "/remote.php/dav/files/testuser/FullerDocuments/NicoleDocuments/Projects/";
let relative_path = service.convert_to_relative_path(full_webdav_path);
assert_eq!(relative_path, "/FullerDocuments/NicoleDocuments/Projects/");
}
#[tokio::test]
async fn test_nextcloud_path_conversion_with_spaces() {
let service = create_nextcloud_webdav_service();
// Test path with URL-encoded spaces (the actual bug scenario)
let full_webdav_path = "/remote.php/dav/files/testuser/Documents/Melanie%20Martinez%20June%207%202023/";
let relative_path = service.convert_to_relative_path(full_webdav_path);
assert_eq!(relative_path, "/Documents/Melanie%20Martinez%20June%207%202023/");
}
#[tokio::test]
async fn test_nextcloud_path_conversion_with_special_chars() {
let service = create_nextcloud_webdav_service();
// Test path with various special characters
let full_webdav_path = "/remote.php/dav/files/testuser/Documents/Maranatha%20Work/";
let relative_path = service.convert_to_relative_path(full_webdav_path);
assert_eq!(relative_path, "/Documents/Maranatha%20Work/");
}
#[tokio::test]
async fn test_generic_webdav_path_conversion() {
let service = create_generic_webdav_service();
// Test generic WebDAV path conversion
let full_webdav_path = "/webdav/Documents/Projects/";
let relative_path = service.convert_to_relative_path(full_webdav_path);
assert_eq!(relative_path, "/Documents/Projects/");
}
#[tokio::test]
async fn test_path_conversion_with_mismatched_prefix() {
let service = create_nextcloud_webdav_service();
// Test path that doesn't match expected prefix (should return as-is)
let unexpected_path = "/some/other/path/Documents/";
let relative_path = service.convert_to_relative_path(unexpected_path);
assert_eq!(relative_path, "/some/other/path/Documents/");
}
#[tokio::test]
async fn test_url_construction_validation() {
let service = create_nextcloud_webdav_service();
// Test that we can identify the problem that caused the bug
// This simulates what was happening before the fix
// What we get from XML parser (full WebDAV path)
let full_webdav_path = "/remote.php/dav/files/testuser/FullerDocuments/NicoleDocuments/";
// What the old code would do (WRONG - double construction)
let base_url = "https://nas.example.com/remote.php/dav/files/testuser";
let wrong_url = format!("{}{}", base_url, full_webdav_path);
// This would create a malformed URL
assert_eq!(wrong_url, "https://nas.example.com/remote.php/dav/files/testuser/remote.php/dav/files/testuser/FullerDocuments/NicoleDocuments/");
// What the new code does (CORRECT)
let relative_path = service.convert_to_relative_path(full_webdav_path);
let correct_url = format!("{}{}", base_url, relative_path);
assert_eq!(correct_url, "https://nas.example.com/remote.php/dav/files/testuser/FullerDocuments/NicoleDocuments/");
// Verify they're different (this is the bug we fixed)
assert_ne!(wrong_url, correct_url);
}
#[tokio::test]
async fn test_real_world_nextcloud_paths() {
let service = create_nextcloud_webdav_service();
// Test real-world paths that would come from Nextcloud XML responses
let real_world_paths = vec![
"/remote.php/dav/files/testuser/",
"/remote.php/dav/files/testuser/Documents/",
"/remote.php/dav/files/testuser/FullerDocuments/",
"/remote.php/dav/files/testuser/FullerDocuments/JonDocuments/",
"/remote.php/dav/files/testuser/FullerDocuments/NicoleDocuments/",
"/remote.php/dav/files/testuser/FullerDocuments/NicoleDocuments/Maranatha%20Work/",
"/remote.php/dav/files/testuser/FullerDocuments/NicoleDocuments/Melanie%20Martinez%20June%207%202023/",
"/remote.php/dav/files/testuser/FullerDocuments/NicoleDocuments/Misc/",
"/remote.php/dav/files/testuser/FullerDocuments/NicoleDocuments/Nicole-Barakat-Website/",
"/remote.php/dav/files/testuser/FullerDocuments/NicoleDocuments/RDP/",
];
let expected_relative_paths = vec![
"/",
"/Documents/",
"/FullerDocuments/",
"/FullerDocuments/JonDocuments/",
"/FullerDocuments/NicoleDocuments/",
"/FullerDocuments/NicoleDocuments/Maranatha%20Work/",
"/FullerDocuments/NicoleDocuments/Melanie%20Martinez%20June%207%202023/",
"/FullerDocuments/NicoleDocuments/Misc/",
"/FullerDocuments/NicoleDocuments/Nicole-Barakat-Website/",
"/FullerDocuments/NicoleDocuments/RDP/",
];
for (full_path, expected_relative) in real_world_paths.iter().zip(expected_relative_paths.iter()) {
let result = service.convert_to_relative_path(full_path);
assert_eq!(&result, expected_relative,
"Failed to convert {} to {}, got {}", full_path, expected_relative, result);
}
}
#[tokio::test]
async fn test_url_construction_end_to_end() {
let service = create_nextcloud_webdav_service();
// Test the complete URL construction process
let base_webdav_url = "https://nas.example.com/remote.php/dav/files/testuser";
// Simulate a path that would cause 404 with the old bug
let problematic_path = "/remote.php/dav/files/testuser/FullerDocuments/NicoleDocuments/Melanie%20Martinez%20June%207%202023/";
// Convert to relative path
let relative_path = service.convert_to_relative_path(problematic_path);
// Construct final URL
let final_url = format!("{}{}", base_webdav_url, relative_path);
// Verify the URL is correctly constructed
assert_eq!(final_url, "https://nas.example.com/remote.php/dav/files/testuser/FullerDocuments/NicoleDocuments/Melanie%20Martinez%20June%207%202023/");
// Verify it doesn't contain double paths
assert!(!final_url.contains("/remote.php/dav/files/testuser/remote.php/dav/files/testuser/"));
}
#[tokio::test]
async fn test_different_usernames() {
// Test with different usernames to ensure the path conversion works correctly
let usernames = vec!["testuser", "perf3ct", "admin", "user123", "user.name"];
for username in usernames {
let config = WebDAVConfig {
server_url: "https://nas.example.com".to_string(),
username: username.to_string(),
password: "testpass".to_string(),
watch_folders: vec!["/Documents".to_string()],
file_extensions: vec!["pdf".to_string()],
timeout_seconds: 30,
server_type: Some("nextcloud".to_string()),
};
let service = WebDAVService::new(config).unwrap();
let full_path = format!("/remote.php/dav/files/{}/Documents/TestFolder/", username);
let relative_path = service.convert_to_relative_path(&full_path);
assert_eq!(relative_path, "/Documents/TestFolder/",
"Failed for username: {}", username);
}
}
// Test that validates the fix prevents the exact error scenario
#[tokio::test]
async fn test_fix_prevents_original_bug() {
// Create service with the same username as in the problematic path
let config = WebDAVConfig {
server_url: "https://nas.jonathonfuller.com".to_string(),
username: "perf3ct".to_string(),
password: "testpass".to_string(),
watch_folders: vec!["/Documents".to_string()],
file_extensions: vec!["pdf".to_string()],
timeout_seconds: 30,
server_type: Some("nextcloud".to_string()),
};
let service = WebDAVService::new(config).unwrap();
// This is the exact path from the error logs that was causing 404s
let problematic_path = "/remote.php/dav/files/perf3ct/FullerDocuments/NicoleDocuments/Melanie%20Martinez%20June%207%202023/";
// Before fix: This would have been used directly, causing double path construction
let base_url = "https://nas.jonathonfuller.com/remote.php/dav/files/perf3ct";
let old_buggy_url = format!("{}{}", base_url, problematic_path);
// After fix: Convert to relative path first
let relative_path = service.convert_to_relative_path(problematic_path);
let fixed_url = format!("{}{}", base_url, relative_path);
// Debug: print what we got
println!("Original path: {}", problematic_path);
println!("Relative path: {}", relative_path);
println!("Old buggy URL: {}", old_buggy_url);
println!("Fixed URL: {}", fixed_url);
// The old URL would have been malformed (causing 404)
assert!(old_buggy_url.contains("/remote.php/dav/files/perf3ct/remote.php/dav/files/perf3ct/"));
// The new URL should be properly formed
assert_eq!(fixed_url, "https://nas.jonathonfuller.com/remote.php/dav/files/perf3ct/FullerDocuments/NicoleDocuments/Melanie%20Martinez%20June%207%202023/");
assert!(!fixed_url.contains("/remote.php/dav/files/perf3ct/remote.php/dav/files/perf3ct/"));
// Most importantly, they should be different (proving the bug was fixed)
assert_ne!(old_buggy_url, fixed_url, "The fix should produce different URLs than the buggy version");
}