diff --git a/src/config.rs b/src/config.rs index c0c6f97..d47f63e 100644 --- a/src/config.rs +++ b/src/config.rs @@ -26,70 +26,371 @@ pub struct Config { impl Config { pub fn from_env() -> Result { - dotenvy::dotenv().ok(); + // Load .env file if present + match dotenvy::dotenv() { + Ok(path) => println!("šŸ”§ Loaded environment variables from: {}", path.display()), + Err(_) => println!("šŸ”§ No .env file found, using system environment variables"), + } - let config = Config { - database_url: env::var("DATABASE_URL") - .unwrap_or_else(|_| "postgresql://readur:readur@localhost/readur".to_string()), - server_address: { - // Support both SERVER_ADDRESS (full address) and SERVER_PORT (just port) - if let Ok(addr) = env::var("SERVER_ADDRESS") { - addr + // Log all environment variable loading with detailed information + println!("\nšŸ“‹ CONFIGURATION LOADING:"); + println!("{}", "=".repeat(50)); + + // Database Configuration + let database_url = match env::var("DATABASE_URL") { + Ok(val) => { + // Mask sensitive parts of database URL for logging + let masked_url = if val.contains('@') { + let parts: Vec<&str> = val.split('@').collect(); + if parts.len() >= 2 { + let credentials_part = parts[0]; + let remaining_part = parts[1..].join("@"); + + // Extract just the username part before the password + if let Some(username_start) = credentials_part.rfind("://") { + let protocol = &credentials_part[..username_start + 3]; + let credentials = &credentials_part[username_start + 3..]; + if let Some(colon_pos) = credentials.find(':') { + let username = &credentials[..colon_pos]; + format!("{}{}:***@{}", protocol, username, remaining_part) + } else { + format!("{}***@{}", protocol, remaining_part) + } + } else { + "***masked***".to_string() + } + } else { + "***masked***".to_string() + } } else { - let host = env::var("SERVER_HOST").unwrap_or_else(|_| "0.0.0.0".to_string()); - let port = env::var("SERVER_PORT").unwrap_or_else(|_| "8000".to_string()); - format!("{}:{}", host, port) - } - }, - jwt_secret: env::var("JWT_SECRET") - .unwrap_or_else(|_| "your-secret-key".to_string()), - upload_path: env::var("UPLOAD_PATH") - .unwrap_or_else(|_| "./uploads".to_string()), - watch_folder: env::var("WATCH_FOLDER") - .unwrap_or_else(|_| "./watch".to_string()), - allowed_file_types: env::var("ALLOWED_FILE_TYPES") - .unwrap_or_else(|_| "pdf,txt,doc,docx,png,jpg,jpeg".to_string()) - .split(',') - .map(|s| s.trim().to_lowercase()) - .collect(), - watch_interval_seconds: env::var("WATCH_INTERVAL_SECONDS") - .ok() - .and_then(|s| s.parse().ok()), - file_stability_check_ms: env::var("FILE_STABILITY_CHECK_MS") - .ok() - .and_then(|s| s.parse().ok()), - max_file_age_hours: env::var("MAX_FILE_AGE_HOURS") - .ok() - .and_then(|s| s.parse().ok()), - - // OCR Configuration - ocr_language: env::var("OCR_LANGUAGE") - .unwrap_or_else(|_| "eng".to_string()), - concurrent_ocr_jobs: env::var("CONCURRENT_OCR_JOBS") - .ok() - .and_then(|s| s.parse().ok()) - .unwrap_or(4), - ocr_timeout_seconds: env::var("OCR_TIMEOUT_SECONDS") - .ok() - .and_then(|s| s.parse().ok()) - .unwrap_or(300), - max_file_size_mb: env::var("MAX_FILE_SIZE_MB") - .ok() - .and_then(|s| s.parse().ok()) - .unwrap_or(50), - - // Performance - memory_limit_mb: env::var("MEMORY_LIMIT_MB") - .ok() - .and_then(|s| s.parse().ok()) - .unwrap_or(512), - cpu_priority: env::var("CPU_PRIORITY") - .unwrap_or_else(|_| "normal".to_string()), + val.clone() + }; + println!("āœ… DATABASE_URL: {} (loaded from env)", masked_url); + val + } + Err(_) => { + let default_url = "postgresql://readur:readur@localhost/readur".to_string(); + println!("āš ļø DATABASE_URL: {} (using default - env var not set)", + "postgresql://readur:***@localhost/readur"); + default_url + } }; + let config = Config { + database_url, + server_address: { + // Support both SERVER_ADDRESS (full address) and SERVER_PORT (just port) + match env::var("SERVER_ADDRESS") { + Ok(addr) => { + println!("āœ… SERVER_ADDRESS: {} (loaded from env)", addr); + addr + } + Err(_) => { + let host = match env::var("SERVER_HOST") { + Ok(h) => { + println!("āœ… SERVER_HOST: {} (loaded from env)", h); + h + } + Err(_) => { + let default_host = "0.0.0.0".to_string(); + println!("āš ļø SERVER_HOST: {} (using default - env var not set)", default_host); + default_host + } + }; + + let port = match env::var("SERVER_PORT") { + Ok(p) => { + println!("āœ… SERVER_PORT: {} (loaded from env)", p); + p + } + Err(_) => { + let default_port = "8000".to_string(); + println!("āš ļø SERVER_PORT: {} (using default - env var not set)", default_port); + default_port + } + }; + + let combined_address = format!("{}:{}", host, port); + println!("šŸ”— Combined server_address: {}", combined_address); + combined_address + } + } + }, + jwt_secret: match env::var("JWT_SECRET") { + Ok(secret) => { + if secret == "your-secret-key" { + println!("āš ļø JWT_SECRET: Using default value (SECURITY RISK in production!)"); + } else { + println!("āœ… JWT_SECRET: ***hidden*** (loaded from env, {} chars)", secret.len()); + } + secret + } + Err(_) => { + let default_secret = "your-secret-key".to_string(); + println!("āš ļø JWT_SECRET: Using default value (SECURITY RISK - env var not set!)"); + default_secret + } + }, + upload_path: match env::var("UPLOAD_PATH") { + Ok(path) => { + println!("āœ… UPLOAD_PATH: {} (loaded from env)", path); + path + } + Err(_) => { + let default_path = "./uploads".to_string(); + println!("āš ļø UPLOAD_PATH: {} (using default - env var not set)", default_path); + default_path + } + }, + watch_folder: match env::var("WATCH_FOLDER") { + Ok(folder) => { + println!("āœ… WATCH_FOLDER: {} (loaded from env)", folder); + folder + } + Err(_) => { + let default_folder = "./watch".to_string(); + println!("āš ļø WATCH_FOLDER: {} (using default - env var not set)", default_folder); + default_folder + } + }, + allowed_file_types: { + let file_types_str = match env::var("ALLOWED_FILE_TYPES") { + Ok(types) => { + println!("āœ… ALLOWED_FILE_TYPES: {} (loaded from env)", types); + types + } + Err(_) => { + let default_types = "pdf,txt,doc,docx,png,jpg,jpeg".to_string(); + println!("āš ļø ALLOWED_FILE_TYPES: {} (using default - env var not set)", default_types); + default_types + } + }; + + let types_vec: Vec = file_types_str + .split(',') + .map(|s| s.trim().to_lowercase()) + .collect(); + + println!("šŸ“„ Parsed file types: {:?}", types_vec); + types_vec + }, + // Watcher Configuration + watch_interval_seconds: { + match env::var("WATCH_INTERVAL_SECONDS") { + Ok(val) => match val.parse::() { + Ok(parsed) => { + println!("āœ… WATCH_INTERVAL_SECONDS: {} (loaded from env)", parsed); + Some(parsed) + } + Err(e) => { + println!("āŒ WATCH_INTERVAL_SECONDS: Invalid value '{}' - {}, using default", val, e); + None + } + }, + Err(_) => { + println!("āš ļø WATCH_INTERVAL_SECONDS: Not set, using default behavior"); + None + } + } + }, + file_stability_check_ms: { + match env::var("FILE_STABILITY_CHECK_MS") { + Ok(val) => match val.parse::() { + Ok(parsed) => { + println!("āœ… FILE_STABILITY_CHECK_MS: {} (loaded from env)", parsed); + Some(parsed) + } + Err(e) => { + println!("āŒ FILE_STABILITY_CHECK_MS: Invalid value '{}' - {}, using default", val, e); + None + } + }, + Err(_) => { + println!("āš ļø FILE_STABILITY_CHECK_MS: Not set, using default behavior"); + None + } + } + }, + max_file_age_hours: { + match env::var("MAX_FILE_AGE_HOURS") { + Ok(val) => match val.parse::() { + Ok(parsed) => { + println!("āœ… MAX_FILE_AGE_HOURS: {} (loaded from env)", parsed); + Some(parsed) + } + Err(e) => { + println!("āŒ MAX_FILE_AGE_HOURS: Invalid value '{}' - {}, using unlimited", val, e); + None + } + }, + Err(_) => { + println!("āš ļø MAX_FILE_AGE_HOURS: Not set, files will not expire"); + None + } + } + }, + + // OCR Configuration + ocr_language: match env::var("OCR_LANGUAGE") { + Ok(lang) => { + println!("āœ… OCR_LANGUAGE: {} (loaded from env)", lang); + lang + } + Err(_) => { + let default_lang = "eng".to_string(); + println!("āš ļø OCR_LANGUAGE: {} (using default - env var not set)", default_lang); + default_lang + } + }, + concurrent_ocr_jobs: { + match env::var("CONCURRENT_OCR_JOBS") { + Ok(val) => match val.parse::() { + Ok(parsed) => { + println!("āœ… CONCURRENT_OCR_JOBS: {} (loaded from env)", parsed); + parsed + } + Err(e) => { + let default_jobs = 4; + println!("āŒ CONCURRENT_OCR_JOBS: Invalid value '{}' - {}, using default {}", val, e, default_jobs); + default_jobs + } + }, + Err(_) => { + let default_jobs = 4; + println!("āš ļø CONCURRENT_OCR_JOBS: {} (using default - env var not set)", default_jobs); + default_jobs + } + } + }, + ocr_timeout_seconds: { + match env::var("OCR_TIMEOUT_SECONDS") { + Ok(val) => match val.parse::() { + Ok(parsed) => { + println!("āœ… OCR_TIMEOUT_SECONDS: {} (loaded from env)", parsed); + parsed + } + Err(e) => { + let default_timeout = 300; + println!("āŒ OCR_TIMEOUT_SECONDS: Invalid value '{}' - {}, using default {}", val, e, default_timeout); + default_timeout + } + }, + Err(_) => { + let default_timeout = 300; + println!("āš ļø OCR_TIMEOUT_SECONDS: {} (using default - env var not set)", default_timeout); + default_timeout + } + } + }, + max_file_size_mb: { + match env::var("MAX_FILE_SIZE_MB") { + Ok(val) => match val.parse::() { + Ok(parsed) => { + println!("āœ… MAX_FILE_SIZE_MB: {} (loaded from env)", parsed); + parsed + } + Err(e) => { + let default_size = 50; + println!("āŒ MAX_FILE_SIZE_MB: Invalid value '{}' - {}, using default {}", val, e, default_size); + default_size + } + }, + Err(_) => { + let default_size = 50; + println!("āš ļø MAX_FILE_SIZE_MB: {} (using default - env var not set)", default_size); + default_size + } + } + }, + + // Performance Configuration + memory_limit_mb: { + match env::var("MEMORY_LIMIT_MB") { + Ok(val) => match val.parse::() { + Ok(parsed) => { + println!("āœ… MEMORY_LIMIT_MB: {} (loaded from env)", parsed); + parsed + } + Err(e) => { + let default_memory = 512; + println!("āŒ MEMORY_LIMIT_MB: Invalid value '{}' - {}, using default {}", val, e, default_memory); + default_memory + } + }, + Err(_) => { + let default_memory = 512; + println!("āš ļø MEMORY_LIMIT_MB: {} (using default - env var not set)", default_memory); + default_memory + } + } + }, + cpu_priority: match env::var("CPU_PRIORITY") { + Ok(priority) => { + println!("āœ… CPU_PRIORITY: {} (loaded from env)", priority); + priority + } + Err(_) => { + let default_priority = "normal".to_string(); + println!("āš ļø CPU_PRIORITY: {} (using default - env var not set)", default_priority); + default_priority + } + }, + }; + + println!("\nšŸ” CONFIGURATION VALIDATION:"); + println!("{}", "=".repeat(50)); + + // Validate server address format + if !config.server_address.contains(':') { + println!("āŒ SERVER_ADDRESS: Invalid format '{}' - missing port", config.server_address); + return Err(anyhow::anyhow!( + "Invalid server address format: '{}'. Expected format: 'host:port' (e.g., '0.0.0.0:8000')", + config.server_address + )); + } + + // Validate database URL format + if !config.database_url.starts_with("postgresql://") && !config.database_url.starts_with("postgres://") { + println!("āŒ DATABASE_URL: Invalid format - must start with 'postgresql://' or 'postgres://'"); + return Err(anyhow::anyhow!( + "Invalid database URL format. Must start with 'postgresql://' or 'postgres://'" + )); + } + // Validate configuration to prevent recursion issues + println!("šŸ” Validating directory paths for conflicts..."); config.validate_paths()?; + println!("\nšŸ“Š CONFIGURATION SUMMARY:"); + println!("{}", "=".repeat(50)); + println!("🌐 Server will bind to: {}", config.server_address); + println!("šŸ“ Upload directory: {}", config.upload_path); + println!("šŸ‘ļø Watch directory: {}", config.watch_folder); + println!("šŸ“„ Allowed file types: {:?}", config.allowed_file_types); + println!("🧠 OCR language: {}", config.ocr_language); + println!("āš™ļø Concurrent OCR jobs: {}", config.concurrent_ocr_jobs); + println!("ā±ļø OCR timeout: {}s", config.ocr_timeout_seconds); + println!("šŸ“ Max file size: {}MB", config.max_file_size_mb); + println!("šŸ’¾ Memory limit: {}MB", config.memory_limit_mb); + + // Warning checks + println!("\nāš ļø CONFIGURATION WARNINGS:"); + println!("{}", "=".repeat(50)); + if config.jwt_secret == "your-secret-key" { + println!("🚨 SECURITY WARNING: Using default JWT secret! Set JWT_SECRET environment variable in production!"); + } + if config.server_address.starts_with("0.0.0.0") { + println!("šŸŒ INFO: Server will listen on all interfaces (0.0.0.0)"); + } + if config.max_file_size_mb > 100 { + println!("šŸ“ INFO: Large file size limit ({}MB) may impact performance", config.max_file_size_mb); + } + if config.concurrent_ocr_jobs > 8 { + println!("āš™ļø INFO: High OCR concurrency ({}) may use significant CPU/memory", config.concurrent_ocr_jobs); + } + + println!("āœ… Configuration validation completed successfully!\n"); + Ok(config) } @@ -99,16 +400,52 @@ impl Config { let upload_path = Path::new(&self.upload_path); let watch_path = Path::new(&self.watch_folder); + println!("šŸ“ Checking upload directory: {}", self.upload_path); + println!("šŸ‘ļø Checking watch directory: {}", self.watch_folder); + + // Check if paths exist and are accessible + if !upload_path.exists() { + println!("āš ļø Upload directory does not exist yet: {}", self.upload_path); + } else if !upload_path.is_dir() { + println!("āŒ Upload path exists but is not a directory: {}", self.upload_path); + return Err(anyhow::anyhow!( + "Upload path '{}' exists but is not a directory", self.upload_path + )); + } else { + println!("āœ… Upload directory exists and is accessible"); + } + + if !watch_path.exists() { + println!("āš ļø Watch directory does not exist yet: {}", self.watch_folder); + } else if !watch_path.is_dir() { + println!("āŒ Watch path exists but is not a directory: {}", self.watch_folder); + return Err(anyhow::anyhow!( + "Watch folder '{}' exists but is not a directory", self.watch_folder + )); + } else { + println!("āœ… Watch directory exists and is accessible"); + } + // Normalize paths to handle relative paths and symlinks let upload_canonical = upload_path.canonicalize() - .unwrap_or_else(|_| upload_path.to_path_buf()); + .unwrap_or_else(|_| { + println!("āš ļø Could not canonicalize upload path, using as-is"); + upload_path.to_path_buf() + }); let watch_canonical = watch_path.canonicalize() - .unwrap_or_else(|_| watch_path.to_path_buf()); + .unwrap_or_else(|_| { + println!("āš ļø Could not canonicalize watch path, using as-is"); + watch_path.to_path_buf() + }); + + println!("šŸ“ Canonical upload path: {}", upload_canonical.display()); + println!("šŸ“ Canonical watch path: {}", watch_canonical.display()); // Check if paths are the same if upload_canonical == watch_canonical { + println!("āŒ CRITICAL ERROR: Upload and watch directories are the same!"); return Err(anyhow::anyhow!( - "Configuration Error: UPLOAD_PATH and WATCH_FOLDER cannot be the same directory.\n\ + "āŒ Configuration Error: UPLOAD_PATH and WATCH_FOLDER cannot be the same directory.\n\ This would cause infinite recursion where WebDAV files are downloaded to the upload \n\ directory and then immediately reprocessed by the watcher.\n\ Current config:\n\ @@ -121,8 +458,9 @@ impl Config { // Check if watch folder is inside upload folder if watch_canonical.starts_with(&upload_canonical) { + println!("āŒ CRITICAL ERROR: Watch folder is inside upload directory!"); return Err(anyhow::anyhow!( - "Configuration Error: WATCH_FOLDER cannot be inside UPLOAD_PATH.\n\ + "āŒ Configuration Error: WATCH_FOLDER cannot be inside UPLOAD_PATH.\n\ This would cause recursion where WebDAV files downloaded to uploads are \n\ detected by the watcher as new files.\n\ Current config:\n\ @@ -135,8 +473,9 @@ impl Config { // Check if upload folder is inside watch folder if upload_canonical.starts_with(&watch_canonical) { + println!("āŒ CRITICAL ERROR: Upload directory is inside watch folder!"); return Err(anyhow::anyhow!( - "Configuration Error: UPLOAD_PATH cannot be inside WATCH_FOLDER.\n\ + "āŒ Configuration Error: UPLOAD_PATH cannot be inside WATCH_FOLDER.\n\ This would cause recursion where files from the watch folder are \n\ copied to uploads (inside the watch folder) and reprocessed.\n\ Current config:\n\ @@ -147,6 +486,7 @@ impl Config { )); } + println!("āœ… Directory path validation passed - no conflicts detected"); Ok(()) } } \ No newline at end of file diff --git a/src/db/sources.rs b/src/db/sources.rs index 2b13ddf..40a9241 100644 --- a/src/db/sources.rs +++ b/src/db/sources.rs @@ -2,6 +2,7 @@ use anyhow::Result; use chrono::Utc; use sqlx::Row; use uuid::Uuid; +use tracing::{info, warn, error}; use super::Database; @@ -260,6 +261,8 @@ impl Database { } pub async fn get_sources_for_sync(&self) -> Result> { + info!("šŸ” Loading sources from database for sync check..."); + let rows = sqlx::query( r#"SELECT id, user_id, name, source_type, enabled, config, status, last_sync_at, last_error, last_error_at, total_files_synced, @@ -269,20 +272,57 @@ impl Database { ORDER BY last_sync_at ASC NULLS FIRST"# ) .fetch_all(&self.pool) - .await?; + .await + .map_err(|e| { + error!("āŒ Failed to load sources from database: {}", e); + e + })?; + + info!("šŸ“Š Database query returned {} sources for sync processing", rows.len()); let mut sources = Vec::new(); - for row in rows { - sources.push(crate::models::Source { - id: row.get("id"), + for (index, row) in rows.iter().enumerate() { + let source_id: uuid::Uuid = row.get("id"); + let source_name: String = row.get("name"); + let source_type_str: String = row.get("source_type"); + let config_json: serde_json::Value = row.get("config"); + + info!("šŸ“‹ Processing source {}: ID={}, Name='{}', Type={}", + index + 1, source_id, source_name, source_type_str); + + // Log config structure for debugging + if source_type_str == "WebDAV" { + if let Some(config_obj) = config_json.as_object() { + if let Some(server_url) = config_obj.get("server_url").and_then(|v| v.as_str()) { + info!(" šŸ”— WebDAV server_url: '{}'", server_url); + } else { + warn!(" āš ļø WebDAV config missing server_url field"); + } + } else { + warn!(" āš ļø WebDAV config is not a JSON object"); + } + + // Pretty print the config for debugging + if let Ok(pretty_config) = serde_json::to_string_pretty(&config_json) { + info!(" šŸ“„ Full config:\n{}", pretty_config); + } else { + warn!(" āš ļø Unable to serialize config JSON"); + } + } + + let source = crate::models::Source { + id: source_id, user_id: row.get("user_id"), - name: row.get("name"), - source_type: row.get::("source_type").try_into() - .map_err(|e| anyhow::anyhow!("Invalid source type: {}", e))?, + name: source_name.clone(), + source_type: source_type_str.clone().try_into() + .map_err(|e| anyhow::anyhow!("Invalid source type '{}' for source '{}': {}", source_type_str, source_name, e))?, enabled: row.get("enabled"), - config: row.get("config"), - status: row.get::("status").try_into() - .map_err(|e| anyhow::anyhow!("Invalid source status: {}", e))?, + config: config_json, + status: { + let status_str: String = row.get("status"); + status_str.clone().try_into() + .map_err(|e| anyhow::anyhow!("Invalid source status '{}' for source '{}': {}", status_str, source_name, e))? + }, last_sync_at: row.get("last_sync_at"), last_error: row.get("last_error"), last_error_at: row.get("last_error_at"), @@ -291,7 +331,9 @@ impl Database { total_size_bytes: row.get("total_size_bytes"), created_at: row.get("created_at"), updated_at: row.get("updated_at"), - }); + }; + + sources.push(source); } Ok(sources) diff --git a/src/main.rs b/src/main.rs index 17bfc50..2bb97a2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,6 +6,7 @@ use sqlx::Row; use std::sync::Arc; use tower_http::{cors::CorsLayer, services::{ServeDir, ServeFile}}; use tracing::{info, error, warn}; +use anyhow; use readur::{config::Config, db::Database, AppState, *}; @@ -50,10 +51,56 @@ fn determine_static_files_path() -> std::path::PathBuf { } #[tokio::main] -async fn main() -> Result<(), Box> { +async fn main() -> anyhow::Result<()> { tracing_subscriber::fmt::init(); - let config = Config::from_env()?; + println!("\nšŸš€ READUR APPLICATION STARTUP"); + println!("{}", "=".repeat(60)); + + // Load and validate configuration with comprehensive logging + let config = match Config::from_env() { + Ok(cfg) => { + println!("āœ… Configuration loaded and validated successfully"); + cfg + } + Err(e) => { + println!("āŒ CRITICAL: Configuration loading failed!"); + println!("Error: {}", e); + println!("\nšŸ”§ Please check your environment variables and fix the configuration issues above."); + return Err(e); + } + }; + + // Log critical configuration values that affect startup + println!("\nšŸ”— STARTUP CONFIGURATION:"); + println!("{}", "=".repeat(50)); + println!("🌐 Server will start on: {}", config.server_address); + // Parse database URL safely without exposing credentials + let db_info = if let Some(at_pos) = config.database_url.find('@') { + let host_part = &config.database_url[at_pos + 1..]; + let protocol = if config.database_url.starts_with("postgresql://") { "postgresql" } else { "postgres" }; + + // Extract just username from credentials part (before @) + let creds_part = &config.database_url[..at_pos]; + let username = if let Some(proto_end) = creds_part.find("://") { + let after_proto = &creds_part[proto_end + 3..]; + if let Some(colon_pos) = after_proto.find(':') { + &after_proto[..colon_pos] + } else { + after_proto + } + } else { + "unknown" + }; + + format!("{}://{}:***@{}", protocol, username, host_part) + } else { + "Invalid database URL format".to_string() + }; + + println!("šŸ—„ļø Database connection: {}", db_info); + println!("šŸ“ Upload directory: {}", config.upload_path); + println!("šŸ‘ļø Watch directory: {}", config.watch_folder); // Initialize upload directory structure info!("Initializing upload directory structure..."); @@ -72,8 +119,38 @@ async fn main() -> Result<(), Box> { } // Create separate database pools for different workloads - let web_db = Database::new_with_pool_config(&config.database_url, 20, 2).await?; // Web UI pool - let background_db = Database::new_with_pool_config(&config.database_url, 30, 3).await?; // Background operations pool + println!("\nšŸ—„ļø DATABASE CONNECTION:"); + println!("{}", "=".repeat(50)); + + let web_db = match Database::new_with_pool_config(&config.database_url, 20, 2).await { + Ok(db) => { + println!("āœ… Web database pool created (max: 20 connections, min idle: 2)"); + db + } + Err(e) => { + println!("āŒ CRITICAL: Failed to connect to database for web operations!"); + println!("Database URL: {}", db_info); // Use the already-masked URL + println!("Error: {}", e); + println!("\nšŸ”§ Please verify:"); + println!(" - Database server is running"); + println!(" - DATABASE_URL is correct"); + println!(" - Database credentials are valid"); + println!(" - Network connectivity to database"); + return Err(e.into()); + } + }; + + let background_db = match Database::new_with_pool_config(&config.database_url, 30, 3).await { + Ok(db) => { + println!("āœ… Background database pool created (max: 30 connections, min idle: 3)"); + db + } + Err(e) => { + println!("āŒ CRITICAL: Failed to connect to database for background operations!"); + println!("Error: {}", e); + return Err(e.into()); + } + }; // Don't run the old migration system - let SQLx handle everything // db.migrate().await?; @@ -275,10 +352,15 @@ async fn main() -> Result<(), Box> { }); // Create universal source scheduler with background state (handles WebDAV, Local, S3) + println!("\nšŸ“… SCHEDULER INITIALIZATION:"); + println!("{}", "=".repeat(50)); + let source_scheduler = Arc::new(readur::source_scheduler::SourceScheduler::new(background_state.clone())); + println!("āœ… Universal source scheduler created (handles WebDAV, Local, S3)"); // Keep WebDAV scheduler for backward compatibility with existing WebDAV endpoints let webdav_scheduler = Arc::new(readur::webdav_scheduler::WebDAVScheduler::new(background_state.clone())); + println!("āœ… Legacy WebDAV scheduler created (backward compatibility)"); // Update the web state to include scheduler references let updated_web_state = AppState { @@ -291,12 +373,13 @@ async fn main() -> Result<(), Box> { let web_state = Arc::new(updated_web_state); // Start universal source scheduler on background runtime + println!("ā° Scheduling background source sync to start in 30 seconds"); let scheduler_for_background = source_scheduler.clone(); background_runtime.spawn(async move { info!("Starting universal source sync scheduler with 30-second startup delay"); // Wait 30 seconds before starting scheduler to allow server to fully initialize tokio::time::sleep(tokio::time::Duration::from_secs(30)).await; - info!("Universal source sync scheduler starting after startup delay"); + info!("šŸ”„ Universal source sync scheduler starting after startup delay - this will check for WebDAV sources!"); scheduler_for_background.start().await; }); @@ -333,8 +416,35 @@ async fn main() -> Result<(), Box> { .layer(CorsLayer::permissive()) .with_state(web_state.clone()); - let listener = tokio::net::TcpListener::bind(&config.server_address).await?; - info!("Server starting on {}", config.server_address); + println!("\n🌐 STARTING HTTP SERVER:"); + println!("{}", "=".repeat(50)); + + let listener = match tokio::net::TcpListener::bind(&config.server_address).await { + Ok(listener) => { + println!("āœ… HTTP server bound to: {}", config.server_address); + listener + } + Err(e) => { + println!("āŒ CRITICAL: Failed to bind to address: {}", config.server_address); + println!("Error: {}", e); + println!("\nšŸ”§ Please check:"); + println!(" - Address {} is not already in use", config.server_address); + println!(" - SERVER_HOST and SERVER_PORT environment variables are correct"); + println!(" - You have permission to bind to this address"); + return Err(e.into()); + } + }; + + println!("\nšŸŽ‰ READUR APPLICATION READY!"); + println!("{}", "=".repeat(60)); + println!("🌐 Server: http://{}", config.server_address); + println!("šŸ“ Upload Directory: {}", config.upload_path); + println!("šŸ‘ļø Watch Directory: {}", config.watch_folder); + println!("šŸ”„ Source Scheduler: Will start in 30 seconds"); + println!("šŸ“‹ Check logs above for any configuration warnings"); + println!("{}", "=".repeat(60)); + + info!("šŸš€ Readur server is now running and accepting connections"); axum::serve(listener, app).await?; diff --git a/src/source_scheduler.rs b/src/source_scheduler.rs index 02c8662..50c5704 100644 --- a/src/source_scheduler.rs +++ b/src/source_scheduler.rs @@ -57,9 +57,29 @@ impl SourceScheduler { info!("Checking for interrupted source syncs to resume"); // Get all enabled sources that might have been interrupted - let sources = self.state.db.get_sources_for_sync().await?; + let sources = match self.state.db.get_sources_for_sync().await { + Ok(sources) => { + info!("Successfully loaded {} sources from database for sync check", sources.len()); + sources + } + Err(e) => { + error!("Failed to load sources from database during startup: {}", e); + return Err(e.into()); + } + }; for source in sources { + info!("Processing source during startup check: ID={}, Name='{}', Type={}, Status={}", + source.id, source.name, source.source_type.to_string(), source.status.to_string()); + + // Validate source configuration before attempting any operations + if let Err(e) = self.validate_source_config(&source) { + error!("āŒ CONFIGURATION ERROR for source '{}' (ID: {}): {}", + source.name, source.id, e); + error!("Source config JSON: {}", serde_json::to_string_pretty(&source.config).unwrap_or_else(|_| "Invalid JSON".to_string())); + continue; + } + // Check if this source was likely interrupted during sync // This is a simplified check - you might want to add specific interrupted tracking if source.status.to_string() == "syncing" { @@ -132,6 +152,26 @@ impl SourceScheduler { let sources = self.state.db.get_sources_for_sync().await?; for source in sources { + // Validate source configuration before checking if sync is due + if let Err(e) = self.validate_source_config(&source) { + error!("āŒ CONFIGURATION ERROR during background sync check for source '{}' (ID: {}): {}", + source.name, source.id, e); + error!("Source config JSON: {}", serde_json::to_string_pretty(&source.config).unwrap_or_else(|_| "Invalid JSON".to_string())); + + // Update source with error status + if let Err(update_err) = sqlx::query( + r#"UPDATE sources SET status = 'error', last_error = $1, last_error_at = NOW(), updated_at = NOW() WHERE id = $2"# + ) + .bind(format!("Configuration error: {}", e)) + .bind(source.id) + .execute(self.state.db.get_pool()) + .await { + error!("Failed to update source error status: {}", update_err); + } + + continue; + } + // Check if sync is due for this source if self.is_sync_due(&source).await? { info!("Starting background sync for source: {} ({})", source.name, source.source_type); @@ -378,4 +418,102 @@ impl SourceScheduler { Err("No running sync found for this source".into()) } } + + /// Validates a source configuration and provides detailed error messages for debugging + fn validate_source_config(&self, source: &crate::models::Source) -> Result<(), String> { + use crate::models::{SourceType, WebDAVSourceConfig, S3SourceConfig, LocalFolderSourceConfig}; + + match source.source_type { + SourceType::WebDAV => { + // Attempt to deserialize WebDAV config + let config: WebDAVSourceConfig = serde_json::from_value(source.config.clone()) + .map_err(|e| format!("Failed to parse WebDAV configuration JSON: {}", e))?; + + // Validate server URL format + self.validate_webdav_url(&config.server_url, &source.name)?; + + // Additional WebDAV validations + if config.username.trim().is_empty() { + return Err(format!("WebDAV username cannot be empty")); + } + + if config.password.trim().is_empty() { + return Err(format!("WebDAV password cannot be empty")); + } + + if config.watch_folders.is_empty() { + return Err(format!("WebDAV watch_folders cannot be empty")); + } + + Ok(()) + } + SourceType::S3 => { + let _config: S3SourceConfig = serde_json::from_value(source.config.clone()) + .map_err(|e| format!("Failed to parse S3 configuration JSON: {}", e))?; + Ok(()) + } + SourceType::LocalFolder => { + let _config: LocalFolderSourceConfig = serde_json::from_value(source.config.clone()) + .map_err(|e| format!("Failed to parse Local Folder configuration JSON: {}", e))?; + Ok(()) + } + } + } + + /// Validates WebDAV server URL and provides specific error messages + fn validate_webdav_url(&self, server_url: &str, source_name: &str) -> Result<(), String> { + if server_url.trim().is_empty() { + return Err(format!("WebDAV server_url is empty")); + } + + // Check if URL starts with a valid scheme + if !server_url.starts_with("http://") && !server_url.starts_with("https://") { + return Err(format!( + "WebDAV server_url must start with 'http://' or 'https://'. \ + Current value: '{}'. \ + Examples of valid URLs: \ + - https://cloud.example.com \ + - http://192.168.1.100:8080 \ + - https://nextcloud.mydomain.com:443", + server_url + )); + } + + // Try to parse as URL to catch other issues + match reqwest::Url::parse(server_url) { + Ok(url) => { + if url.scheme() != "http" && url.scheme() != "https" { + return Err(format!( + "WebDAV server_url has invalid scheme '{}'. Only 'http' and 'https' are supported. \ + Current URL: '{}'", + url.scheme(), server_url + )); + } + + if url.host_str().is_none() { + return Err(format!( + "WebDAV server_url is missing hostname. \ + Current URL: '{}'. \ + Example: https://cloud.example.com", + server_url + )); + } + + info!("āœ… WebDAV URL validation passed for source '{}': {}", source_name, server_url); + Ok(()) + } + Err(e) => { + Err(format!( + "WebDAV server_url is not a valid URL: {}. \ + Current value: '{}'. \ + The URL must be absolute and include the full domain. \ + Examples: \ + - https://cloud.example.com \ + - http://192.168.1.100:8080/webdav \ + - https://nextcloud.mydomain.com", + e, server_url + )) + } + } + } } \ No newline at end of file diff --git a/src/webdav_service.rs b/src/webdav_service.rs index bc47e30..2cc4302 100644 --- a/src/webdav_service.rs +++ b/src/webdav_service.rs @@ -64,17 +64,56 @@ impl WebDAVService { .timeout(Duration::from_secs(config.timeout_seconds)) .build()?; + // Validate server URL before constructing WebDAV URLs + if config.server_url.trim().is_empty() { + return Err(anyhow!("āŒ WebDAV Configuration Error: server_url is empty")); + } + + if !config.server_url.starts_with("http://") && !config.server_url.starts_with("https://") { + return Err(anyhow!( + "āŒ WebDAV Configuration Error: server_url must start with 'http://' or 'https://'. \ + Current value: '{}'. \ + Examples: \ + - https://cloud.example.com \ + - http://192.168.1.100:8080 \ + - https://nextcloud.mydomain.com", + config.server_url + )); + } + + // Validate that server_url can be parsed as a proper URL + if let Err(e) = reqwest::Url::parse(&config.server_url) { + return Err(anyhow!( + "āŒ WebDAV Configuration Error: server_url is not a valid URL: {}. \ + Current value: '{}'. \ + The URL must be absolute and include the full domain. \ + Examples: \ + - https://cloud.example.com \ + - http://192.168.1.100:8080/webdav \ + - https://nextcloud.mydomain.com", + e, config.server_url + )); + } + // Construct WebDAV URL based on server type let base_webdav_url = match config.server_type.as_deref() { - Some("nextcloud") | Some("owncloud") => format!( - "{}/remote.php/dav/files/{}", - config.server_url.trim_end_matches('/'), - config.username - ), - _ => format!( - "{}/webdav", - config.server_url.trim_end_matches('/') - ), + Some("nextcloud") | Some("owncloud") => { + let url = format!( + "{}/remote.php/dav/files/{}", + config.server_url.trim_end_matches('/'), + config.username + ); + info!("šŸ”— Constructed Nextcloud/ownCloud WebDAV URL: {}", url); + url + }, + _ => { + let url = format!( + "{}/webdav", + config.server_url.trim_end_matches('/') + ); + info!("šŸ”— Constructed generic WebDAV URL: {}", url); + url + }, }; Ok(Self { @@ -153,6 +192,45 @@ impl WebDAVService { test_config.server_url, test_config.server_type.as_deref().unwrap_or("generic")); + // Validate server URL before constructing test URL + if test_config.server_url.trim().is_empty() { + return Ok(WebDAVConnectionResult { + success: false, + message: "āŒ WebDAV server_url is empty".to_string(), + server_version: None, + server_type: None, + }); + } + + if !test_config.server_url.starts_with("http://") && !test_config.server_url.starts_with("https://") { + return Ok(WebDAVConnectionResult { + success: false, + message: format!( + "āŒ WebDAV server_url must start with 'http://' or 'https://'. \ + Current value: '{}'. \ + Examples: https://cloud.example.com, http://192.168.1.100:8080", + test_config.server_url + ), + server_version: None, + server_type: None, + }); + } + + // Validate URL can be parsed + if let Err(e) = reqwest::Url::parse(&test_config.server_url) { + return Ok(WebDAVConnectionResult { + success: false, + message: format!( + "āŒ WebDAV server_url is not a valid URL: {}. \ + Current value: '{}'. \ + Must be absolute URL like: https://cloud.example.com", + e, test_config.server_url + ), + server_version: None, + server_type: None, + }); + } + let test_url = match test_config.server_type.as_deref() { Some("nextcloud") | Some("owncloud") => format!( "{}/remote.php/dav/files/{}/", @@ -164,8 +242,10 @@ impl WebDAVService { test_config.server_url.trim_end_matches('/') ), }; + + info!("šŸ”— Constructed test URL: {}", test_url); - let response = self.client + let resp = self.client .request(Method::from_bytes(b"PROPFIND").unwrap(), &test_url) .basic_auth(&test_config.username, Some(&test_config.password)) .header("Depth", "0") @@ -176,42 +256,35 @@ impl WebDAVService { "#) .send() - .await; + .await + .map_err(|e| { + error!("āŒ WebDAV HTTP request failed for URL '{}': {}", test_url, e); + anyhow!("WebDAV HTTP request failed for URL '{}': {}. \ + This often indicates a URL configuration issue. \ + Verify the server_url is correct and accessible.", test_url, e) + })?; - match response { - Ok(resp) => { - if resp.status().is_success() { - info!("āœ… WebDAV connection successful"); - - // Try to get server info - let (version, server_type) = self.get_server_info(&test_config).await; - - Ok(WebDAVConnectionResult { - success: true, - message: format!("Successfully connected to WebDAV server ({})", - server_type.as_deref().unwrap_or("Generic WebDAV")), - server_version: version, - server_type, - }) - } else { - error!("āŒ WebDAV connection failed with status: {}", resp.status()); - Ok(WebDAVConnectionResult { - success: false, - message: format!("Connection failed: HTTP {}", resp.status()), - server_version: None, - server_type: None, - }) - } - } - Err(e) => { - error!("āŒ WebDAV connection error: {}", e); - Ok(WebDAVConnectionResult { - success: false, - message: format!("Connection error: {}", e), - server_version: None, - server_type: None, - }) - } + if resp.status().is_success() { + info!("āœ… WebDAV connection successful"); + + // Try to get server info + let (version, server_type) = self.get_server_info(&test_config).await; + + Ok(WebDAVConnectionResult { + success: true, + message: format!("Successfully connected to WebDAV server ({})", + server_type.as_deref().unwrap_or("Generic WebDAV")), + server_version: version, + server_type, + }) + } else { + error!("āŒ WebDAV connection failed with status: {} for URL: {}", resp.status(), test_url); + Ok(WebDAVConnectionResult { + success: false, + message: format!("Connection failed: HTTP {} for URL: {}", resp.status(), test_url), + server_version: None, + server_type: None, + }) } } diff --git a/tests/webdav_enhanced_unit_tests.rs b/tests/webdav_enhanced_unit_tests.rs index d229205..95f94df 100644 --- a/tests/webdav_enhanced_unit_tests.rs +++ b/tests/webdav_enhanced_unit_tests.rs @@ -117,7 +117,7 @@ fn test_webdav_config_validation() { assert!(WebDAVService::new(valid_config).is_ok()); - // Test config with empty server URL + // Test config with empty server URL - should fail with our enhanced validation let invalid_config = WebDAVConfig { server_url: "".to_string(), username: "testuser".to_string(), @@ -128,8 +128,34 @@ fn test_webdav_config_validation() { server_type: Some("nextcloud".to_string()), }; - // Should still create service, validation happens during actual requests - assert!(WebDAVService::new(invalid_config).is_ok()); + // Should fail early with enhanced validation + assert!(WebDAVService::new(invalid_config).is_err()); + + // Test config with invalid URL scheme - should also fail + let invalid_scheme_config = WebDAVConfig { + server_url: "ftp://cloud.example.com".to_string(), + username: "testuser".to_string(), + password: "testpass".to_string(), + watch_folders: vec!["/Documents".to_string()], + file_extensions: vec!["pdf".to_string()], + timeout_seconds: 30, + server_type: Some("nextcloud".to_string()), + }; + + assert!(WebDAVService::new(invalid_scheme_config).is_err()); + + // Test config with relative URL - should also fail + let relative_url_config = WebDAVConfig { + server_url: "/webdav".to_string(), + username: "testuser".to_string(), + password: "testpass".to_string(), + watch_folders: vec!["/Documents".to_string()], + file_extensions: vec!["pdf".to_string()], + timeout_seconds: 30, + server_type: Some("nextcloud".to_string()), + }; + + assert!(WebDAVService::new(relative_url_config).is_err()); } #[test]