Merge pull request #41 from readur/feat/better-config-erroring

feat(server): implement better error for configuration issues
This commit is contained in:
Jon Fuller 2025-06-25 13:58:35 -07:00 committed by GitHub
commit d837204bc9
6 changed files with 858 additions and 129 deletions

View File

@ -26,70 +26,371 @@ pub struct Config {
impl Config { impl Config {
pub fn from_env() -> Result<Self> { pub fn from_env() -> Result<Self> {
dotenvy::dotenv().ok(); // Load .env file if present
match dotenvy::dotenv() {
let config = Config { Ok(path) => println!("🔧 Loaded environment variables from: {}", path.display()),
database_url: env::var("DATABASE_URL") Err(_) => println!("🔧 No .env file found, using system environment variables"),
.unwrap_or_else(|_| "postgresql://readur:readur@localhost/readur".to_string()),
server_address: {
// Support both SERVER_ADDRESS (full address) and SERVER_PORT (just port)
if let Ok(addr) = env::var("SERVER_ADDRESS") {
addr
} else {
let host = env::var("SERVER_HOST").unwrap_or_else(|_| "0.0.0.0".to_string());
let port = env::var("SERVER_PORT").unwrap_or_else(|_| "8000".to_string());
format!("{}:{}", host, port)
} }
},
jwt_secret: env::var("JWT_SECRET")
.unwrap_or_else(|_| "your-secret-key".to_string()),
upload_path: env::var("UPLOAD_PATH")
.unwrap_or_else(|_| "./uploads".to_string()),
watch_folder: env::var("WATCH_FOLDER")
.unwrap_or_else(|_| "./watch".to_string()),
allowed_file_types: env::var("ALLOWED_FILE_TYPES")
.unwrap_or_else(|_| "pdf,txt,doc,docx,png,jpg,jpeg".to_string())
.split(',')
.map(|s| s.trim().to_lowercase())
.collect(),
watch_interval_seconds: env::var("WATCH_INTERVAL_SECONDS")
.ok()
.and_then(|s| s.parse().ok()),
file_stability_check_ms: env::var("FILE_STABILITY_CHECK_MS")
.ok()
.and_then(|s| s.parse().ok()),
max_file_age_hours: env::var("MAX_FILE_AGE_HOURS")
.ok()
.and_then(|s| s.parse().ok()),
// OCR Configuration // Log all environment variable loading with detailed information
ocr_language: env::var("OCR_LANGUAGE") println!("\n📋 CONFIGURATION LOADING:");
.unwrap_or_else(|_| "eng".to_string()), println!("{}", "=".repeat(50));
concurrent_ocr_jobs: env::var("CONCURRENT_OCR_JOBS")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(4),
ocr_timeout_seconds: env::var("OCR_TIMEOUT_SECONDS")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(300),
max_file_size_mb: env::var("MAX_FILE_SIZE_MB")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(50),
// Performance // Database Configuration
memory_limit_mb: env::var("MEMORY_LIMIT_MB") let database_url = match env::var("DATABASE_URL") {
.ok() Ok(val) => {
.and_then(|s| s.parse().ok()) // Mask sensitive parts of database URL for logging
.unwrap_or(512), let masked_url = if val.contains('@') {
cpu_priority: env::var("CPU_PRIORITY") let parts: Vec<&str> = val.split('@').collect();
.unwrap_or_else(|_| "normal".to_string()), if parts.len() >= 2 {
let credentials_part = parts[0];
let remaining_part = parts[1..].join("@");
// Extract just the username part before the password
if let Some(username_start) = credentials_part.rfind("://") {
let protocol = &credentials_part[..username_start + 3];
let credentials = &credentials_part[username_start + 3..];
if let Some(colon_pos) = credentials.find(':') {
let username = &credentials[..colon_pos];
format!("{}{}:***@{}", protocol, username, remaining_part)
} else {
format!("{}***@{}", protocol, remaining_part)
}
} else {
"***masked***".to_string()
}
} else {
"***masked***".to_string()
}
} else {
val.clone()
};
println!("✅ DATABASE_URL: {} (loaded from env)", masked_url);
val
}
Err(_) => {
let default_url = "postgresql://readur:readur@localhost/readur".to_string();
println!("⚠️ DATABASE_URL: {} (using default - env var not set)",
"postgresql://readur:***@localhost/readur");
default_url
}
}; };
let config = Config {
database_url,
server_address: {
// Support both SERVER_ADDRESS (full address) and SERVER_PORT (just port)
match env::var("SERVER_ADDRESS") {
Ok(addr) => {
println!("✅ SERVER_ADDRESS: {} (loaded from env)", addr);
addr
}
Err(_) => {
let host = match env::var("SERVER_HOST") {
Ok(h) => {
println!("✅ SERVER_HOST: {} (loaded from env)", h);
h
}
Err(_) => {
let default_host = "0.0.0.0".to_string();
println!("⚠️ SERVER_HOST: {} (using default - env var not set)", default_host);
default_host
}
};
let port = match env::var("SERVER_PORT") {
Ok(p) => {
println!("✅ SERVER_PORT: {} (loaded from env)", p);
p
}
Err(_) => {
let default_port = "8000".to_string();
println!("⚠️ SERVER_PORT: {} (using default - env var not set)", default_port);
default_port
}
};
let combined_address = format!("{}:{}", host, port);
println!("🔗 Combined server_address: {}", combined_address);
combined_address
}
}
},
jwt_secret: match env::var("JWT_SECRET") {
Ok(secret) => {
if secret == "your-secret-key" {
println!("⚠️ JWT_SECRET: Using default value (SECURITY RISK in production!)");
} else {
println!("✅ JWT_SECRET: ***hidden*** (loaded from env, {} chars)", secret.len());
}
secret
}
Err(_) => {
let default_secret = "your-secret-key".to_string();
println!("⚠️ JWT_SECRET: Using default value (SECURITY RISK - env var not set!)");
default_secret
}
},
upload_path: match env::var("UPLOAD_PATH") {
Ok(path) => {
println!("✅ UPLOAD_PATH: {} (loaded from env)", path);
path
}
Err(_) => {
let default_path = "./uploads".to_string();
println!("⚠️ UPLOAD_PATH: {} (using default - env var not set)", default_path);
default_path
}
},
watch_folder: match env::var("WATCH_FOLDER") {
Ok(folder) => {
println!("✅ WATCH_FOLDER: {} (loaded from env)", folder);
folder
}
Err(_) => {
let default_folder = "./watch".to_string();
println!("⚠️ WATCH_FOLDER: {} (using default - env var not set)", default_folder);
default_folder
}
},
allowed_file_types: {
let file_types_str = match env::var("ALLOWED_FILE_TYPES") {
Ok(types) => {
println!("✅ ALLOWED_FILE_TYPES: {} (loaded from env)", types);
types
}
Err(_) => {
let default_types = "pdf,txt,doc,docx,png,jpg,jpeg".to_string();
println!("⚠️ ALLOWED_FILE_TYPES: {} (using default - env var not set)", default_types);
default_types
}
};
let types_vec: Vec<String> = file_types_str
.split(',')
.map(|s| s.trim().to_lowercase())
.collect();
println!("📄 Parsed file types: {:?}", types_vec);
types_vec
},
// Watcher Configuration
watch_interval_seconds: {
match env::var("WATCH_INTERVAL_SECONDS") {
Ok(val) => match val.parse::<u64>() {
Ok(parsed) => {
println!("✅ WATCH_INTERVAL_SECONDS: {} (loaded from env)", parsed);
Some(parsed)
}
Err(e) => {
println!("❌ WATCH_INTERVAL_SECONDS: Invalid value '{}' - {}, using default", val, e);
None
}
},
Err(_) => {
println!("⚠️ WATCH_INTERVAL_SECONDS: Not set, using default behavior");
None
}
}
},
file_stability_check_ms: {
match env::var("FILE_STABILITY_CHECK_MS") {
Ok(val) => match val.parse::<u64>() {
Ok(parsed) => {
println!("✅ FILE_STABILITY_CHECK_MS: {} (loaded from env)", parsed);
Some(parsed)
}
Err(e) => {
println!("❌ FILE_STABILITY_CHECK_MS: Invalid value '{}' - {}, using default", val, e);
None
}
},
Err(_) => {
println!("⚠️ FILE_STABILITY_CHECK_MS: Not set, using default behavior");
None
}
}
},
max_file_age_hours: {
match env::var("MAX_FILE_AGE_HOURS") {
Ok(val) => match val.parse::<u64>() {
Ok(parsed) => {
println!("✅ MAX_FILE_AGE_HOURS: {} (loaded from env)", parsed);
Some(parsed)
}
Err(e) => {
println!("❌ MAX_FILE_AGE_HOURS: Invalid value '{}' - {}, using unlimited", val, e);
None
}
},
Err(_) => {
println!("⚠️ MAX_FILE_AGE_HOURS: Not set, files will not expire");
None
}
}
},
// OCR Configuration
ocr_language: match env::var("OCR_LANGUAGE") {
Ok(lang) => {
println!("✅ OCR_LANGUAGE: {} (loaded from env)", lang);
lang
}
Err(_) => {
let default_lang = "eng".to_string();
println!("⚠️ OCR_LANGUAGE: {} (using default - env var not set)", default_lang);
default_lang
}
},
concurrent_ocr_jobs: {
match env::var("CONCURRENT_OCR_JOBS") {
Ok(val) => match val.parse::<usize>() {
Ok(parsed) => {
println!("✅ CONCURRENT_OCR_JOBS: {} (loaded from env)", parsed);
parsed
}
Err(e) => {
let default_jobs = 4;
println!("❌ CONCURRENT_OCR_JOBS: Invalid value '{}' - {}, using default {}", val, e, default_jobs);
default_jobs
}
},
Err(_) => {
let default_jobs = 4;
println!("⚠️ CONCURRENT_OCR_JOBS: {} (using default - env var not set)", default_jobs);
default_jobs
}
}
},
ocr_timeout_seconds: {
match env::var("OCR_TIMEOUT_SECONDS") {
Ok(val) => match val.parse::<u64>() {
Ok(parsed) => {
println!("✅ OCR_TIMEOUT_SECONDS: {} (loaded from env)", parsed);
parsed
}
Err(e) => {
let default_timeout = 300;
println!("❌ OCR_TIMEOUT_SECONDS: Invalid value '{}' - {}, using default {}", val, e, default_timeout);
default_timeout
}
},
Err(_) => {
let default_timeout = 300;
println!("⚠️ OCR_TIMEOUT_SECONDS: {} (using default - env var not set)", default_timeout);
default_timeout
}
}
},
max_file_size_mb: {
match env::var("MAX_FILE_SIZE_MB") {
Ok(val) => match val.parse::<u64>() {
Ok(parsed) => {
println!("✅ MAX_FILE_SIZE_MB: {} (loaded from env)", parsed);
parsed
}
Err(e) => {
let default_size = 50;
println!("❌ MAX_FILE_SIZE_MB: Invalid value '{}' - {}, using default {}", val, e, default_size);
default_size
}
},
Err(_) => {
let default_size = 50;
println!("⚠️ MAX_FILE_SIZE_MB: {} (using default - env var not set)", default_size);
default_size
}
}
},
// Performance Configuration
memory_limit_mb: {
match env::var("MEMORY_LIMIT_MB") {
Ok(val) => match val.parse::<usize>() {
Ok(parsed) => {
println!("✅ MEMORY_LIMIT_MB: {} (loaded from env)", parsed);
parsed
}
Err(e) => {
let default_memory = 512;
println!("❌ MEMORY_LIMIT_MB: Invalid value '{}' - {}, using default {}", val, e, default_memory);
default_memory
}
},
Err(_) => {
let default_memory = 512;
println!("⚠️ MEMORY_LIMIT_MB: {} (using default - env var not set)", default_memory);
default_memory
}
}
},
cpu_priority: match env::var("CPU_PRIORITY") {
Ok(priority) => {
println!("✅ CPU_PRIORITY: {} (loaded from env)", priority);
priority
}
Err(_) => {
let default_priority = "normal".to_string();
println!("⚠️ CPU_PRIORITY: {} (using default - env var not set)", default_priority);
default_priority
}
},
};
println!("\n🔍 CONFIGURATION VALIDATION:");
println!("{}", "=".repeat(50));
// Validate server address format
if !config.server_address.contains(':') {
println!("❌ SERVER_ADDRESS: Invalid format '{}' - missing port", config.server_address);
return Err(anyhow::anyhow!(
"Invalid server address format: '{}'. Expected format: 'host:port' (e.g., '0.0.0.0:8000')",
config.server_address
));
}
// Validate database URL format
if !config.database_url.starts_with("postgresql://") && !config.database_url.starts_with("postgres://") {
println!("❌ DATABASE_URL: Invalid format - must start with 'postgresql://' or 'postgres://'");
return Err(anyhow::anyhow!(
"Invalid database URL format. Must start with 'postgresql://' or 'postgres://'"
));
}
// Validate configuration to prevent recursion issues // Validate configuration to prevent recursion issues
println!("🔍 Validating directory paths for conflicts...");
config.validate_paths()?; config.validate_paths()?;
println!("\n📊 CONFIGURATION SUMMARY:");
println!("{}", "=".repeat(50));
println!("🌐 Server will bind to: {}", config.server_address);
println!("📁 Upload directory: {}", config.upload_path);
println!("👁️ Watch directory: {}", config.watch_folder);
println!("📄 Allowed file types: {:?}", config.allowed_file_types);
println!("🧠 OCR language: {}", config.ocr_language);
println!("⚙️ Concurrent OCR jobs: {}", config.concurrent_ocr_jobs);
println!("⏱️ OCR timeout: {}s", config.ocr_timeout_seconds);
println!("📏 Max file size: {}MB", config.max_file_size_mb);
println!("💾 Memory limit: {}MB", config.memory_limit_mb);
// Warning checks
println!("\n⚠️ CONFIGURATION WARNINGS:");
println!("{}", "=".repeat(50));
if config.jwt_secret == "your-secret-key" {
println!("🚨 SECURITY WARNING: Using default JWT secret! Set JWT_SECRET environment variable in production!");
}
if config.server_address.starts_with("0.0.0.0") {
println!("🌍 INFO: Server will listen on all interfaces (0.0.0.0)");
}
if config.max_file_size_mb > 100 {
println!("📏 INFO: Large file size limit ({}MB) may impact performance", config.max_file_size_mb);
}
if config.concurrent_ocr_jobs > 8 {
println!("⚙️ INFO: High OCR concurrency ({}) may use significant CPU/memory", config.concurrent_ocr_jobs);
}
println!("✅ Configuration validation completed successfully!\n");
Ok(config) Ok(config)
} }
@ -99,16 +400,52 @@ impl Config {
let upload_path = Path::new(&self.upload_path); let upload_path = Path::new(&self.upload_path);
let watch_path = Path::new(&self.watch_folder); let watch_path = Path::new(&self.watch_folder);
println!("📁 Checking upload directory: {}", self.upload_path);
println!("👁️ Checking watch directory: {}", self.watch_folder);
// Check if paths exist and are accessible
if !upload_path.exists() {
println!("⚠️ Upload directory does not exist yet: {}", self.upload_path);
} else if !upload_path.is_dir() {
println!("❌ Upload path exists but is not a directory: {}", self.upload_path);
return Err(anyhow::anyhow!(
"Upload path '{}' exists but is not a directory", self.upload_path
));
} else {
println!("✅ Upload directory exists and is accessible");
}
if !watch_path.exists() {
println!("⚠️ Watch directory does not exist yet: {}", self.watch_folder);
} else if !watch_path.is_dir() {
println!("❌ Watch path exists but is not a directory: {}", self.watch_folder);
return Err(anyhow::anyhow!(
"Watch folder '{}' exists but is not a directory", self.watch_folder
));
} else {
println!("✅ Watch directory exists and is accessible");
}
// Normalize paths to handle relative paths and symlinks // Normalize paths to handle relative paths and symlinks
let upload_canonical = upload_path.canonicalize() let upload_canonical = upload_path.canonicalize()
.unwrap_or_else(|_| upload_path.to_path_buf()); .unwrap_or_else(|_| {
println!("⚠️ Could not canonicalize upload path, using as-is");
upload_path.to_path_buf()
});
let watch_canonical = watch_path.canonicalize() let watch_canonical = watch_path.canonicalize()
.unwrap_or_else(|_| watch_path.to_path_buf()); .unwrap_or_else(|_| {
println!("⚠️ Could not canonicalize watch path, using as-is");
watch_path.to_path_buf()
});
println!("📍 Canonical upload path: {}", upload_canonical.display());
println!("📍 Canonical watch path: {}", watch_canonical.display());
// Check if paths are the same // Check if paths are the same
if upload_canonical == watch_canonical { if upload_canonical == watch_canonical {
println!("❌ CRITICAL ERROR: Upload and watch directories are the same!");
return Err(anyhow::anyhow!( return Err(anyhow::anyhow!(
"Configuration Error: UPLOAD_PATH and WATCH_FOLDER cannot be the same directory.\n\ "Configuration Error: UPLOAD_PATH and WATCH_FOLDER cannot be the same directory.\n\
This would cause infinite recursion where WebDAV files are downloaded to the upload \n\ This would cause infinite recursion where WebDAV files are downloaded to the upload \n\
directory and then immediately reprocessed by the watcher.\n\ directory and then immediately reprocessed by the watcher.\n\
Current config:\n\ Current config:\n\
@ -121,8 +458,9 @@ impl Config {
// Check if watch folder is inside upload folder // Check if watch folder is inside upload folder
if watch_canonical.starts_with(&upload_canonical) { if watch_canonical.starts_with(&upload_canonical) {
println!("❌ CRITICAL ERROR: Watch folder is inside upload directory!");
return Err(anyhow::anyhow!( return Err(anyhow::anyhow!(
"Configuration Error: WATCH_FOLDER cannot be inside UPLOAD_PATH.\n\ "Configuration Error: WATCH_FOLDER cannot be inside UPLOAD_PATH.\n\
This would cause recursion where WebDAV files downloaded to uploads are \n\ This would cause recursion where WebDAV files downloaded to uploads are \n\
detected by the watcher as new files.\n\ detected by the watcher as new files.\n\
Current config:\n\ Current config:\n\
@ -135,8 +473,9 @@ impl Config {
// Check if upload folder is inside watch folder // Check if upload folder is inside watch folder
if upload_canonical.starts_with(&watch_canonical) { if upload_canonical.starts_with(&watch_canonical) {
println!("❌ CRITICAL ERROR: Upload directory is inside watch folder!");
return Err(anyhow::anyhow!( return Err(anyhow::anyhow!(
"Configuration Error: UPLOAD_PATH cannot be inside WATCH_FOLDER.\n\ "Configuration Error: UPLOAD_PATH cannot be inside WATCH_FOLDER.\n\
This would cause recursion where files from the watch folder are \n\ This would cause recursion where files from the watch folder are \n\
copied to uploads (inside the watch folder) and reprocessed.\n\ copied to uploads (inside the watch folder) and reprocessed.\n\
Current config:\n\ Current config:\n\
@ -147,6 +486,7 @@ impl Config {
)); ));
} }
println!("✅ Directory path validation passed - no conflicts detected");
Ok(()) Ok(())
} }
} }

View File

@ -2,6 +2,7 @@ use anyhow::Result;
use chrono::Utc; use chrono::Utc;
use sqlx::Row; use sqlx::Row;
use uuid::Uuid; use uuid::Uuid;
use tracing::{info, warn, error};
use super::Database; use super::Database;
@ -260,6 +261,8 @@ impl Database {
} }
pub async fn get_sources_for_sync(&self) -> Result<Vec<crate::models::Source>> { pub async fn get_sources_for_sync(&self) -> Result<Vec<crate::models::Source>> {
info!("🔍 Loading sources from database for sync check...");
let rows = sqlx::query( let rows = sqlx::query(
r#"SELECT id, user_id, name, source_type, enabled, config, status, r#"SELECT id, user_id, name, source_type, enabled, config, status,
last_sync_at, last_error, last_error_at, total_files_synced, last_sync_at, last_error, last_error_at, total_files_synced,
@ -269,20 +272,57 @@ impl Database {
ORDER BY last_sync_at ASC NULLS FIRST"# ORDER BY last_sync_at ASC NULLS FIRST"#
) )
.fetch_all(&self.pool) .fetch_all(&self.pool)
.await?; .await
.map_err(|e| {
error!("❌ Failed to load sources from database: {}", e);
e
})?;
info!("📊 Database query returned {} sources for sync processing", rows.len());
let mut sources = Vec::new(); let mut sources = Vec::new();
for row in rows { for (index, row) in rows.iter().enumerate() {
sources.push(crate::models::Source { let source_id: uuid::Uuid = row.get("id");
id: row.get("id"), let source_name: String = row.get("name");
let source_type_str: String = row.get("source_type");
let config_json: serde_json::Value = row.get("config");
info!("📋 Processing source {}: ID={}, Name='{}', Type={}",
index + 1, source_id, source_name, source_type_str);
// Log config structure for debugging
if source_type_str == "WebDAV" {
if let Some(config_obj) = config_json.as_object() {
if let Some(server_url) = config_obj.get("server_url").and_then(|v| v.as_str()) {
info!(" 🔗 WebDAV server_url: '{}'", server_url);
} else {
warn!(" ⚠️ WebDAV config missing server_url field");
}
} else {
warn!(" ⚠️ WebDAV config is not a JSON object");
}
// Pretty print the config for debugging
if let Ok(pretty_config) = serde_json::to_string_pretty(&config_json) {
info!(" 📄 Full config:\n{}", pretty_config);
} else {
warn!(" ⚠️ Unable to serialize config JSON");
}
}
let source = crate::models::Source {
id: source_id,
user_id: row.get("user_id"), user_id: row.get("user_id"),
name: row.get("name"), name: source_name.clone(),
source_type: row.get::<String, _>("source_type").try_into() source_type: source_type_str.clone().try_into()
.map_err(|e| anyhow::anyhow!("Invalid source type: {}", e))?, .map_err(|e| anyhow::anyhow!("Invalid source type '{}' for source '{}': {}", source_type_str, source_name, e))?,
enabled: row.get("enabled"), enabled: row.get("enabled"),
config: row.get("config"), config: config_json,
status: row.get::<String, _>("status").try_into() status: {
.map_err(|e| anyhow::anyhow!("Invalid source status: {}", e))?, let status_str: String = row.get("status");
status_str.clone().try_into()
.map_err(|e| anyhow::anyhow!("Invalid source status '{}' for source '{}': {}", status_str, source_name, e))?
},
last_sync_at: row.get("last_sync_at"), last_sync_at: row.get("last_sync_at"),
last_error: row.get("last_error"), last_error: row.get("last_error"),
last_error_at: row.get("last_error_at"), last_error_at: row.get("last_error_at"),
@ -291,7 +331,9 @@ impl Database {
total_size_bytes: row.get("total_size_bytes"), total_size_bytes: row.get("total_size_bytes"),
created_at: row.get("created_at"), created_at: row.get("created_at"),
updated_at: row.get("updated_at"), updated_at: row.get("updated_at"),
}); };
sources.push(source);
} }
Ok(sources) Ok(sources)

View File

@ -6,6 +6,7 @@ use sqlx::Row;
use std::sync::Arc; use std::sync::Arc;
use tower_http::{cors::CorsLayer, services::{ServeDir, ServeFile}}; use tower_http::{cors::CorsLayer, services::{ServeDir, ServeFile}};
use tracing::{info, error, warn}; use tracing::{info, error, warn};
use anyhow;
use readur::{config::Config, db::Database, AppState, *}; use readur::{config::Config, db::Database, AppState, *};
@ -50,10 +51,56 @@ fn determine_static_files_path() -> std::path::PathBuf {
} }
#[tokio::main] #[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> { async fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt::init(); tracing_subscriber::fmt::init();
let config = Config::from_env()?; println!("\n🚀 READUR APPLICATION STARTUP");
println!("{}", "=".repeat(60));
// Load and validate configuration with comprehensive logging
let config = match Config::from_env() {
Ok(cfg) => {
println!("✅ Configuration loaded and validated successfully");
cfg
}
Err(e) => {
println!("❌ CRITICAL: Configuration loading failed!");
println!("Error: {}", e);
println!("\n🔧 Please check your environment variables and fix the configuration issues above.");
return Err(e);
}
};
// Log critical configuration values that affect startup
println!("\n🔗 STARTUP CONFIGURATION:");
println!("{}", "=".repeat(50));
println!("🌐 Server will start on: {}", config.server_address);
// Parse database URL safely without exposing credentials
let db_info = if let Some(at_pos) = config.database_url.find('@') {
let host_part = &config.database_url[at_pos + 1..];
let protocol = if config.database_url.starts_with("postgresql://") { "postgresql" } else { "postgres" };
// Extract just username from credentials part (before @)
let creds_part = &config.database_url[..at_pos];
let username = if let Some(proto_end) = creds_part.find("://") {
let after_proto = &creds_part[proto_end + 3..];
if let Some(colon_pos) = after_proto.find(':') {
&after_proto[..colon_pos]
} else {
after_proto
}
} else {
"unknown"
};
format!("{}://{}:***@{}", protocol, username, host_part)
} else {
"Invalid database URL format".to_string()
};
println!("🗄️ Database connection: {}", db_info);
println!("📁 Upload directory: {}", config.upload_path);
println!("👁️ Watch directory: {}", config.watch_folder);
// Initialize upload directory structure // Initialize upload directory structure
info!("Initializing upload directory structure..."); info!("Initializing upload directory structure...");
@ -72,8 +119,38 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
} }
// Create separate database pools for different workloads // Create separate database pools for different workloads
let web_db = Database::new_with_pool_config(&config.database_url, 20, 2).await?; // Web UI pool println!("\n🗄️ DATABASE CONNECTION:");
let background_db = Database::new_with_pool_config(&config.database_url, 30, 3).await?; // Background operations pool println!("{}", "=".repeat(50));
let web_db = match Database::new_with_pool_config(&config.database_url, 20, 2).await {
Ok(db) => {
println!("✅ Web database pool created (max: 20 connections, min idle: 2)");
db
}
Err(e) => {
println!("❌ CRITICAL: Failed to connect to database for web operations!");
println!("Database URL: {}", db_info); // Use the already-masked URL
println!("Error: {}", e);
println!("\n🔧 Please verify:");
println!(" - Database server is running");
println!(" - DATABASE_URL is correct");
println!(" - Database credentials are valid");
println!(" - Network connectivity to database");
return Err(e.into());
}
};
let background_db = match Database::new_with_pool_config(&config.database_url, 30, 3).await {
Ok(db) => {
println!("✅ Background database pool created (max: 30 connections, min idle: 3)");
db
}
Err(e) => {
println!("❌ CRITICAL: Failed to connect to database for background operations!");
println!("Error: {}", e);
return Err(e.into());
}
};
// Don't run the old migration system - let SQLx handle everything // Don't run the old migration system - let SQLx handle everything
// db.migrate().await?; // db.migrate().await?;
@ -275,10 +352,15 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
}); });
// Create universal source scheduler with background state (handles WebDAV, Local, S3) // Create universal source scheduler with background state (handles WebDAV, Local, S3)
println!("\n📅 SCHEDULER INITIALIZATION:");
println!("{}", "=".repeat(50));
let source_scheduler = Arc::new(readur::source_scheduler::SourceScheduler::new(background_state.clone())); let source_scheduler = Arc::new(readur::source_scheduler::SourceScheduler::new(background_state.clone()));
println!("✅ Universal source scheduler created (handles WebDAV, Local, S3)");
// Keep WebDAV scheduler for backward compatibility with existing WebDAV endpoints // Keep WebDAV scheduler for backward compatibility with existing WebDAV endpoints
let webdav_scheduler = Arc::new(readur::webdav_scheduler::WebDAVScheduler::new(background_state.clone())); let webdav_scheduler = Arc::new(readur::webdav_scheduler::WebDAVScheduler::new(background_state.clone()));
println!("✅ Legacy WebDAV scheduler created (backward compatibility)");
// Update the web state to include scheduler references // Update the web state to include scheduler references
let updated_web_state = AppState { let updated_web_state = AppState {
@ -291,12 +373,13 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let web_state = Arc::new(updated_web_state); let web_state = Arc::new(updated_web_state);
// Start universal source scheduler on background runtime // Start universal source scheduler on background runtime
println!("⏰ Scheduling background source sync to start in 30 seconds");
let scheduler_for_background = source_scheduler.clone(); let scheduler_for_background = source_scheduler.clone();
background_runtime.spawn(async move { background_runtime.spawn(async move {
info!("Starting universal source sync scheduler with 30-second startup delay"); info!("Starting universal source sync scheduler with 30-second startup delay");
// Wait 30 seconds before starting scheduler to allow server to fully initialize // Wait 30 seconds before starting scheduler to allow server to fully initialize
tokio::time::sleep(tokio::time::Duration::from_secs(30)).await; tokio::time::sleep(tokio::time::Duration::from_secs(30)).await;
info!("Universal source sync scheduler starting after startup delay"); info!("🔄 Universal source sync scheduler starting after startup delay - this will check for WebDAV sources!");
scheduler_for_background.start().await; scheduler_for_background.start().await;
}); });
@ -333,8 +416,35 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
.layer(CorsLayer::permissive()) .layer(CorsLayer::permissive())
.with_state(web_state.clone()); .with_state(web_state.clone());
let listener = tokio::net::TcpListener::bind(&config.server_address).await?; println!("\n🌐 STARTING HTTP SERVER:");
info!("Server starting on {}", config.server_address); println!("{}", "=".repeat(50));
let listener = match tokio::net::TcpListener::bind(&config.server_address).await {
Ok(listener) => {
println!("✅ HTTP server bound to: {}", config.server_address);
listener
}
Err(e) => {
println!("❌ CRITICAL: Failed to bind to address: {}", config.server_address);
println!("Error: {}", e);
println!("\n🔧 Please check:");
println!(" - Address {} is not already in use", config.server_address);
println!(" - SERVER_HOST and SERVER_PORT environment variables are correct");
println!(" - You have permission to bind to this address");
return Err(e.into());
}
};
println!("\n🎉 READUR APPLICATION READY!");
println!("{}", "=".repeat(60));
println!("🌐 Server: http://{}", config.server_address);
println!("📁 Upload Directory: {}", config.upload_path);
println!("👁️ Watch Directory: {}", config.watch_folder);
println!("🔄 Source Scheduler: Will start in 30 seconds");
println!("📋 Check logs above for any configuration warnings");
println!("{}", "=".repeat(60));
info!("🚀 Readur server is now running and accepting connections");
axum::serve(listener, app).await?; axum::serve(listener, app).await?;

View File

@ -57,9 +57,29 @@ impl SourceScheduler {
info!("Checking for interrupted source syncs to resume"); info!("Checking for interrupted source syncs to resume");
// Get all enabled sources that might have been interrupted // Get all enabled sources that might have been interrupted
let sources = self.state.db.get_sources_for_sync().await?; let sources = match self.state.db.get_sources_for_sync().await {
Ok(sources) => {
info!("Successfully loaded {} sources from database for sync check", sources.len());
sources
}
Err(e) => {
error!("Failed to load sources from database during startup: {}", e);
return Err(e.into());
}
};
for source in sources { for source in sources {
info!("Processing source during startup check: ID={}, Name='{}', Type={}, Status={}",
source.id, source.name, source.source_type.to_string(), source.status.to_string());
// Validate source configuration before attempting any operations
if let Err(e) = self.validate_source_config(&source) {
error!("❌ CONFIGURATION ERROR for source '{}' (ID: {}): {}",
source.name, source.id, e);
error!("Source config JSON: {}", serde_json::to_string_pretty(&source.config).unwrap_or_else(|_| "Invalid JSON".to_string()));
continue;
}
// Check if this source was likely interrupted during sync // Check if this source was likely interrupted during sync
// This is a simplified check - you might want to add specific interrupted tracking // This is a simplified check - you might want to add specific interrupted tracking
if source.status.to_string() == "syncing" { if source.status.to_string() == "syncing" {
@ -132,6 +152,26 @@ impl SourceScheduler {
let sources = self.state.db.get_sources_for_sync().await?; let sources = self.state.db.get_sources_for_sync().await?;
for source in sources { for source in sources {
// Validate source configuration before checking if sync is due
if let Err(e) = self.validate_source_config(&source) {
error!("❌ CONFIGURATION ERROR during background sync check for source '{}' (ID: {}): {}",
source.name, source.id, e);
error!("Source config JSON: {}", serde_json::to_string_pretty(&source.config).unwrap_or_else(|_| "Invalid JSON".to_string()));
// Update source with error status
if let Err(update_err) = sqlx::query(
r#"UPDATE sources SET status = 'error', last_error = $1, last_error_at = NOW(), updated_at = NOW() WHERE id = $2"#
)
.bind(format!("Configuration error: {}", e))
.bind(source.id)
.execute(self.state.db.get_pool())
.await {
error!("Failed to update source error status: {}", update_err);
}
continue;
}
// Check if sync is due for this source // Check if sync is due for this source
if self.is_sync_due(&source).await? { if self.is_sync_due(&source).await? {
info!("Starting background sync for source: {} ({})", source.name, source.source_type); info!("Starting background sync for source: {} ({})", source.name, source.source_type);
@ -378,4 +418,102 @@ impl SourceScheduler {
Err("No running sync found for this source".into()) Err("No running sync found for this source".into())
} }
} }
/// Validates a source configuration and provides detailed error messages for debugging
fn validate_source_config(&self, source: &crate::models::Source) -> Result<(), String> {
use crate::models::{SourceType, WebDAVSourceConfig, S3SourceConfig, LocalFolderSourceConfig};
match source.source_type {
SourceType::WebDAV => {
// Attempt to deserialize WebDAV config
let config: WebDAVSourceConfig = serde_json::from_value(source.config.clone())
.map_err(|e| format!("Failed to parse WebDAV configuration JSON: {}", e))?;
// Validate server URL format
self.validate_webdav_url(&config.server_url, &source.name)?;
// Additional WebDAV validations
if config.username.trim().is_empty() {
return Err(format!("WebDAV username cannot be empty"));
}
if config.password.trim().is_empty() {
return Err(format!("WebDAV password cannot be empty"));
}
if config.watch_folders.is_empty() {
return Err(format!("WebDAV watch_folders cannot be empty"));
}
Ok(())
}
SourceType::S3 => {
let _config: S3SourceConfig = serde_json::from_value(source.config.clone())
.map_err(|e| format!("Failed to parse S3 configuration JSON: {}", e))?;
Ok(())
}
SourceType::LocalFolder => {
let _config: LocalFolderSourceConfig = serde_json::from_value(source.config.clone())
.map_err(|e| format!("Failed to parse Local Folder configuration JSON: {}", e))?;
Ok(())
}
}
}
/// Validates WebDAV server URL and provides specific error messages
fn validate_webdav_url(&self, server_url: &str, source_name: &str) -> Result<(), String> {
if server_url.trim().is_empty() {
return Err(format!("WebDAV server_url is empty"));
}
// Check if URL starts with a valid scheme
if !server_url.starts_with("http://") && !server_url.starts_with("https://") {
return Err(format!(
"WebDAV server_url must start with 'http://' or 'https://'. \
Current value: '{}'. \
Examples of valid URLs: \
- https://cloud.example.com \
- http://192.168.1.100:8080 \
- https://nextcloud.mydomain.com:443",
server_url
));
}
// Try to parse as URL to catch other issues
match reqwest::Url::parse(server_url) {
Ok(url) => {
if url.scheme() != "http" && url.scheme() != "https" {
return Err(format!(
"WebDAV server_url has invalid scheme '{}'. Only 'http' and 'https' are supported. \
Current URL: '{}'",
url.scheme(), server_url
));
}
if url.host_str().is_none() {
return Err(format!(
"WebDAV server_url is missing hostname. \
Current URL: '{}'. \
Example: https://cloud.example.com",
server_url
));
}
info!("✅ WebDAV URL validation passed for source '{}': {}", source_name, server_url);
Ok(())
}
Err(e) => {
Err(format!(
"WebDAV server_url is not a valid URL: {}. \
Current value: '{}'. \
The URL must be absolute and include the full domain. \
Examples: \
- https://cloud.example.com \
- http://192.168.1.100:8080/webdav \
- https://nextcloud.mydomain.com",
e, server_url
))
}
}
}
} }

View File

@ -64,17 +64,56 @@ impl WebDAVService {
.timeout(Duration::from_secs(config.timeout_seconds)) .timeout(Duration::from_secs(config.timeout_seconds))
.build()?; .build()?;
// Validate server URL before constructing WebDAV URLs
if config.server_url.trim().is_empty() {
return Err(anyhow!("❌ WebDAV Configuration Error: server_url is empty"));
}
if !config.server_url.starts_with("http://") && !config.server_url.starts_with("https://") {
return Err(anyhow!(
"❌ WebDAV Configuration Error: server_url must start with 'http://' or 'https://'. \
Current value: '{}'. \
Examples: \
- https://cloud.example.com \
- http://192.168.1.100:8080 \
- https://nextcloud.mydomain.com",
config.server_url
));
}
// Validate that server_url can be parsed as a proper URL
if let Err(e) = reqwest::Url::parse(&config.server_url) {
return Err(anyhow!(
"❌ WebDAV Configuration Error: server_url is not a valid URL: {}. \
Current value: '{}'. \
The URL must be absolute and include the full domain. \
Examples: \
- https://cloud.example.com \
- http://192.168.1.100:8080/webdav \
- https://nextcloud.mydomain.com",
e, config.server_url
));
}
// Construct WebDAV URL based on server type // Construct WebDAV URL based on server type
let base_webdav_url = match config.server_type.as_deref() { let base_webdav_url = match config.server_type.as_deref() {
Some("nextcloud") | Some("owncloud") => format!( Some("nextcloud") | Some("owncloud") => {
let url = format!(
"{}/remote.php/dav/files/{}", "{}/remote.php/dav/files/{}",
config.server_url.trim_end_matches('/'), config.server_url.trim_end_matches('/'),
config.username config.username
), );
_ => format!( info!("🔗 Constructed Nextcloud/ownCloud WebDAV URL: {}", url);
url
},
_ => {
let url = format!(
"{}/webdav", "{}/webdav",
config.server_url.trim_end_matches('/') config.server_url.trim_end_matches('/')
), );
info!("🔗 Constructed generic WebDAV URL: {}", url);
url
},
}; };
Ok(Self { Ok(Self {
@ -153,6 +192,45 @@ impl WebDAVService {
test_config.server_url, test_config.server_url,
test_config.server_type.as_deref().unwrap_or("generic")); test_config.server_type.as_deref().unwrap_or("generic"));
// Validate server URL before constructing test URL
if test_config.server_url.trim().is_empty() {
return Ok(WebDAVConnectionResult {
success: false,
message: "❌ WebDAV server_url is empty".to_string(),
server_version: None,
server_type: None,
});
}
if !test_config.server_url.starts_with("http://") && !test_config.server_url.starts_with("https://") {
return Ok(WebDAVConnectionResult {
success: false,
message: format!(
"❌ WebDAV server_url must start with 'http://' or 'https://'. \
Current value: '{}'. \
Examples: https://cloud.example.com, http://192.168.1.100:8080",
test_config.server_url
),
server_version: None,
server_type: None,
});
}
// Validate URL can be parsed
if let Err(e) = reqwest::Url::parse(&test_config.server_url) {
return Ok(WebDAVConnectionResult {
success: false,
message: format!(
"❌ WebDAV server_url is not a valid URL: {}. \
Current value: '{}'. \
Must be absolute URL like: https://cloud.example.com",
e, test_config.server_url
),
server_version: None,
server_type: None,
});
}
let test_url = match test_config.server_type.as_deref() { let test_url = match test_config.server_type.as_deref() {
Some("nextcloud") | Some("owncloud") => format!( Some("nextcloud") | Some("owncloud") => format!(
"{}/remote.php/dav/files/{}/", "{}/remote.php/dav/files/{}/",
@ -165,7 +243,9 @@ impl WebDAVService {
), ),
}; };
let response = self.client info!("🔗 Constructed test URL: {}", test_url);
let resp = self.client
.request(Method::from_bytes(b"PROPFIND").unwrap(), &test_url) .request(Method::from_bytes(b"PROPFIND").unwrap(), &test_url)
.basic_auth(&test_config.username, Some(&test_config.password)) .basic_auth(&test_config.username, Some(&test_config.password))
.header("Depth", "0") .header("Depth", "0")
@ -176,10 +256,14 @@ impl WebDAVService {
</d:prop> </d:prop>
</d:propfind>"#) </d:propfind>"#)
.send() .send()
.await; .await
.map_err(|e| {
error!("❌ WebDAV HTTP request failed for URL '{}': {}", test_url, e);
anyhow!("WebDAV HTTP request failed for URL '{}': {}. \
This often indicates a URL configuration issue. \
Verify the server_url is correct and accessible.", test_url, e)
})?;
match response {
Ok(resp) => {
if resp.status().is_success() { if resp.status().is_success() {
info!("✅ WebDAV connection successful"); info!("✅ WebDAV connection successful");
@ -194,26 +278,15 @@ impl WebDAVService {
server_type, server_type,
}) })
} else { } else {
error!("❌ WebDAV connection failed with status: {}", resp.status()); error!("❌ WebDAV connection failed with status: {} for URL: {}", resp.status(), test_url);
Ok(WebDAVConnectionResult { Ok(WebDAVConnectionResult {
success: false, success: false,
message: format!("Connection failed: HTTP {}", resp.status()), message: format!("Connection failed: HTTP {} for URL: {}", resp.status(), test_url),
server_version: None, server_version: None,
server_type: None, server_type: None,
}) })
} }
} }
Err(e) => {
error!("❌ WebDAV connection error: {}", e);
Ok(WebDAVConnectionResult {
success: false,
message: format!("Connection error: {}", e),
server_version: None,
server_type: None,
})
}
}
}
async fn get_server_info(&self, test_config: &WebDAVTestConnection) -> (Option<String>, Option<String>) { async fn get_server_info(&self, test_config: &WebDAVTestConnection) -> (Option<String>, Option<String>) {
// Try Nextcloud/ownCloud capabilities first // Try Nextcloud/ownCloud capabilities first

View File

@ -117,7 +117,7 @@ fn test_webdav_config_validation() {
assert!(WebDAVService::new(valid_config).is_ok()); assert!(WebDAVService::new(valid_config).is_ok());
// Test config with empty server URL // Test config with empty server URL - should fail with our enhanced validation
let invalid_config = WebDAVConfig { let invalid_config = WebDAVConfig {
server_url: "".to_string(), server_url: "".to_string(),
username: "testuser".to_string(), username: "testuser".to_string(),
@ -128,8 +128,34 @@ fn test_webdav_config_validation() {
server_type: Some("nextcloud".to_string()), server_type: Some("nextcloud".to_string()),
}; };
// Should still create service, validation happens during actual requests // Should fail early with enhanced validation
assert!(WebDAVService::new(invalid_config).is_ok()); assert!(WebDAVService::new(invalid_config).is_err());
// Test config with invalid URL scheme - should also fail
let invalid_scheme_config = WebDAVConfig {
server_url: "ftp://cloud.example.com".to_string(),
username: "testuser".to_string(),
password: "testpass".to_string(),
watch_folders: vec!["/Documents".to_string()],
file_extensions: vec!["pdf".to_string()],
timeout_seconds: 30,
server_type: Some("nextcloud".to_string()),
};
assert!(WebDAVService::new(invalid_scheme_config).is_err());
// Test config with relative URL - should also fail
let relative_url_config = WebDAVConfig {
server_url: "/webdav".to_string(),
username: "testuser".to_string(),
password: "testpass".to_string(),
watch_folders: vec!["/Documents".to_string()],
file_extensions: vec!["pdf".to_string()],
timeout_seconds: 30,
server_type: Some("nextcloud".to_string()),
};
assert!(WebDAVService::new(relative_url_config).is_err());
} }
#[test] #[test]