Readur/src/config.rs

576 lines
25 KiB
Rust

use anyhow::Result;
use std::env;
#[derive(Clone, Debug)]
pub struct Config {
pub database_url: String,
pub server_address: String,
pub jwt_secret: String,
pub upload_path: String,
pub watch_folder: String,
pub allowed_file_types: Vec<String>,
pub watch_interval_seconds: Option<u64>,
pub file_stability_check_ms: Option<u64>,
pub max_file_age_hours: Option<u64>,
// OCR Configuration
pub ocr_language: String,
pub concurrent_ocr_jobs: usize,
pub ocr_timeout_seconds: u64,
pub max_file_size_mb: u64,
// Performance
pub memory_limit_mb: usize,
pub cpu_priority: String,
// OIDC Configuration
pub oidc_enabled: bool,
pub oidc_client_id: Option<String>,
pub oidc_client_secret: Option<String>,
pub oidc_issuer_url: Option<String>,
pub oidc_redirect_uri: Option<String>,
}
impl Config {
pub fn from_env() -> Result<Self> {
// Load .env file if present
match dotenvy::dotenv() {
Ok(path) => println!("🔧 Loaded environment variables from: {}", path.display()),
Err(_) => println!("🔧 No .env file found, using system environment variables"),
}
// Log all environment variable loading with detailed information
println!("\n📋 CONFIGURATION LOADING:");
println!("{}", "=".repeat(50));
// Database Configuration
let database_url = match env::var("DATABASE_URL") {
Ok(val) => {
// Mask sensitive parts of database URL for logging
let masked_url = if val.contains('@') {
let parts: Vec<&str> = val.split('@').collect();
if parts.len() >= 2 {
let credentials_part = parts[0];
let remaining_part = parts[1..].join("@");
// Extract just the username part before the password
if let Some(username_start) = credentials_part.rfind("://") {
let protocol = &credentials_part[..username_start + 3];
let credentials = &credentials_part[username_start + 3..];
if let Some(colon_pos) = credentials.find(':') {
let username = &credentials[..colon_pos];
format!("{}{}:***@{}", protocol, username, remaining_part)
} else {
format!("{}***@{}", protocol, remaining_part)
}
} else {
"***masked***".to_string()
}
} else {
"***masked***".to_string()
}
} else {
val.clone()
};
println!("✅ DATABASE_URL: {} (loaded from env)", masked_url);
val
}
Err(_) => {
let default_url = "postgresql://readur:readur@localhost/readur".to_string();
println!("⚠️ DATABASE_URL: {} (using default - env var not set)",
"postgresql://readur:***@localhost/readur");
default_url
}
};
let config = Config {
database_url,
server_address: {
// Support both SERVER_ADDRESS (full address) and SERVER_PORT (just port)
match env::var("SERVER_ADDRESS") {
Ok(addr) => {
println!("✅ SERVER_ADDRESS: {} (loaded from env)", addr);
addr
}
Err(_) => {
let host = match env::var("SERVER_HOST") {
Ok(h) => {
println!("✅ SERVER_HOST: {} (loaded from env)", h);
h
}
Err(_) => {
let default_host = "0.0.0.0".to_string();
println!("⚠️ SERVER_HOST: {} (using default - env var not set)", default_host);
default_host
}
};
let port = match env::var("SERVER_PORT") {
Ok(p) => {
println!("✅ SERVER_PORT: {} (loaded from env)", p);
p
}
Err(_) => {
let default_port = "8000".to_string();
println!("⚠️ SERVER_PORT: {} (using default - env var not set)", default_port);
default_port
}
};
let combined_address = format!("{}:{}", host, port);
println!("🔗 Combined server_address: {}", combined_address);
combined_address
}
}
},
jwt_secret: match env::var("JWT_SECRET") {
Ok(secret) => {
if secret == "your-secret-key" {
println!("⚠️ JWT_SECRET: Using default value (SECURITY RISK in production!)");
} else {
println!("✅ JWT_SECRET: ***hidden*** (loaded from env, {} chars)", secret.len());
}
secret
}
Err(_) => {
let default_secret = "your-secret-key".to_string();
println!("⚠️ JWT_SECRET: Using default value (SECURITY RISK - env var not set!)");
default_secret
}
},
upload_path: match env::var("UPLOAD_PATH") {
Ok(path) => {
println!("✅ UPLOAD_PATH: {} (loaded from env)", path);
path
}
Err(_) => {
let default_path = "./uploads".to_string();
println!("⚠️ UPLOAD_PATH: {} (using default - env var not set)", default_path);
default_path
}
},
watch_folder: match env::var("WATCH_FOLDER") {
Ok(folder) => {
println!("✅ WATCH_FOLDER: {} (loaded from env)", folder);
folder
}
Err(_) => {
let default_folder = "./watch".to_string();
println!("⚠️ WATCH_FOLDER: {} (using default - env var not set)", default_folder);
default_folder
}
},
allowed_file_types: {
let file_types_str = match env::var("ALLOWED_FILE_TYPES") {
Ok(types) => {
println!("✅ ALLOWED_FILE_TYPES: {} (loaded from env)", types);
types
}
Err(_) => {
let default_types = "pdf,txt,doc,docx,png,jpg,jpeg".to_string();
println!("⚠️ ALLOWED_FILE_TYPES: {} (using default - env var not set)", default_types);
default_types
}
};
let types_vec: Vec<String> = file_types_str
.split(',')
.map(|s| s.trim().to_lowercase())
.collect();
println!("📄 Parsed file types: {:?}", types_vec);
types_vec
},
// Watcher Configuration
watch_interval_seconds: {
match env::var("WATCH_INTERVAL_SECONDS") {
Ok(val) => match val.parse::<u64>() {
Ok(parsed) => {
println!("✅ WATCH_INTERVAL_SECONDS: {} (loaded from env)", parsed);
Some(parsed)
}
Err(e) => {
println!("❌ WATCH_INTERVAL_SECONDS: Invalid value '{}' - {}, using default", val, e);
None
}
},
Err(_) => {
println!("⚠️ WATCH_INTERVAL_SECONDS: Not set, using default behavior");
None
}
}
},
file_stability_check_ms: {
match env::var("FILE_STABILITY_CHECK_MS") {
Ok(val) => match val.parse::<u64>() {
Ok(parsed) => {
println!("✅ FILE_STABILITY_CHECK_MS: {} (loaded from env)", parsed);
Some(parsed)
}
Err(e) => {
println!("❌ FILE_STABILITY_CHECK_MS: Invalid value '{}' - {}, using default", val, e);
None
}
},
Err(_) => {
println!("⚠️ FILE_STABILITY_CHECK_MS: Not set, using default behavior");
None
}
}
},
max_file_age_hours: {
match env::var("MAX_FILE_AGE_HOURS") {
Ok(val) => match val.parse::<u64>() {
Ok(parsed) => {
println!("✅ MAX_FILE_AGE_HOURS: {} (loaded from env)", parsed);
Some(parsed)
}
Err(e) => {
println!("❌ MAX_FILE_AGE_HOURS: Invalid value '{}' - {}, using unlimited", val, e);
None
}
},
Err(_) => {
println!("⚠️ MAX_FILE_AGE_HOURS: Not set, files will not expire");
None
}
}
},
// OCR Configuration
ocr_language: match env::var("OCR_LANGUAGE") {
Ok(lang) => {
println!("✅ OCR_LANGUAGE: {} (loaded from env)", lang);
lang
}
Err(_) => {
let default_lang = "eng".to_string();
println!("⚠️ OCR_LANGUAGE: {} (using default - env var not set)", default_lang);
default_lang
}
},
concurrent_ocr_jobs: {
match env::var("CONCURRENT_OCR_JOBS") {
Ok(val) => match val.parse::<usize>() {
Ok(parsed) => {
println!("✅ CONCURRENT_OCR_JOBS: {} (loaded from env)", parsed);
parsed
}
Err(e) => {
let default_jobs = 4;
println!("❌ CONCURRENT_OCR_JOBS: Invalid value '{}' - {}, using default {}", val, e, default_jobs);
default_jobs
}
},
Err(_) => {
let default_jobs = 4;
println!("⚠️ CONCURRENT_OCR_JOBS: {} (using default - env var not set)", default_jobs);
default_jobs
}
}
},
ocr_timeout_seconds: {
match env::var("OCR_TIMEOUT_SECONDS") {
Ok(val) => match val.parse::<u64>() {
Ok(parsed) => {
println!("✅ OCR_TIMEOUT_SECONDS: {} (loaded from env)", parsed);
parsed
}
Err(e) => {
let default_timeout = 300;
println!("❌ OCR_TIMEOUT_SECONDS: Invalid value '{}' - {}, using default {}", val, e, default_timeout);
default_timeout
}
},
Err(_) => {
let default_timeout = 300;
println!("⚠️ OCR_TIMEOUT_SECONDS: {} (using default - env var not set)", default_timeout);
default_timeout
}
}
},
max_file_size_mb: {
match env::var("MAX_FILE_SIZE_MB") {
Ok(val) => match val.parse::<u64>() {
Ok(parsed) => {
println!("✅ MAX_FILE_SIZE_MB: {} (loaded from env)", parsed);
parsed
}
Err(e) => {
let default_size = 50;
println!("❌ MAX_FILE_SIZE_MB: Invalid value '{}' - {}, using default {}", val, e, default_size);
default_size
}
},
Err(_) => {
let default_size = 50;
println!("⚠️ MAX_FILE_SIZE_MB: {} (using default - env var not set)", default_size);
default_size
}
}
},
// Performance Configuration
memory_limit_mb: {
match env::var("MEMORY_LIMIT_MB") {
Ok(val) => match val.parse::<usize>() {
Ok(parsed) => {
println!("✅ MEMORY_LIMIT_MB: {} (loaded from env)", parsed);
parsed
}
Err(e) => {
let default_memory = 512;
println!("❌ MEMORY_LIMIT_MB: Invalid value '{}' - {}, using default {}", val, e, default_memory);
default_memory
}
},
Err(_) => {
let default_memory = 512;
println!("⚠️ MEMORY_LIMIT_MB: {} (using default - env var not set)", default_memory);
default_memory
}
}
},
cpu_priority: match env::var("CPU_PRIORITY") {
Ok(priority) => {
println!("✅ CPU_PRIORITY: {} (loaded from env)", priority);
priority
}
Err(_) => {
let default_priority = "normal".to_string();
println!("⚠️ CPU_PRIORITY: {} (using default - env var not set)", default_priority);
default_priority
}
},
// OIDC Configuration
oidc_enabled: match env::var("OIDC_ENABLED") {
Ok(val) => match val.to_lowercase().as_str() {
"true" | "1" | "yes" | "on" => {
println!("✅ OIDC_ENABLED: true (loaded from env)");
true
}
_ => {
println!("✅ OIDC_ENABLED: false (loaded from env)");
false
}
},
Err(_) => {
println!("⚠️ OIDC_ENABLED: false (using default - env var not set)");
false
}
},
oidc_client_id: match env::var("OIDC_CLIENT_ID") {
Ok(client_id) => {
println!("✅ OIDC_CLIENT_ID: {} (loaded from env)", client_id);
Some(client_id)
}
Err(_) => {
println!("⚠️ OIDC_CLIENT_ID: Not set");
None
}
},
oidc_client_secret: match env::var("OIDC_CLIENT_SECRET") {
Ok(secret) => {
println!("✅ OIDC_CLIENT_SECRET: ***hidden*** (loaded from env, {} chars)", secret.len());
Some(secret)
}
Err(_) => {
println!("⚠️ OIDC_CLIENT_SECRET: Not set");
None
}
},
oidc_issuer_url: match env::var("OIDC_ISSUER_URL") {
Ok(url) => {
println!("✅ OIDC_ISSUER_URL: {} (loaded from env)", url);
Some(url)
}
Err(_) => {
println!("⚠️ OIDC_ISSUER_URL: Not set");
None
}
},
oidc_redirect_uri: match env::var("OIDC_REDIRECT_URI") {
Ok(uri) => {
println!("✅ OIDC_REDIRECT_URI: {} (loaded from env)", uri);
Some(uri)
}
Err(_) => {
println!("⚠️ OIDC_REDIRECT_URI: Not set");
None
}
},
};
println!("\n🔍 CONFIGURATION VALIDATION:");
println!("{}", "=".repeat(50));
// Validate server address format
if !config.server_address.contains(':') {
println!("❌ SERVER_ADDRESS: Invalid format '{}' - missing port", config.server_address);
return Err(anyhow::anyhow!(
"Invalid server address format: '{}'. Expected format: 'host:port' (e.g., '0.0.0.0:8000')",
config.server_address
));
}
// Validate database URL format
if !config.database_url.starts_with("postgresql://") && !config.database_url.starts_with("postgres://") {
println!("❌ DATABASE_URL: Invalid format - must start with 'postgresql://' or 'postgres://'");
return Err(anyhow::anyhow!(
"Invalid database URL format. Must start with 'postgresql://' or 'postgres://'"
));
}
// Validate configuration to prevent recursion issues
println!("🔍 Validating directory paths for conflicts...");
config.validate_paths()?;
println!("\n📊 CONFIGURATION SUMMARY:");
println!("{}", "=".repeat(50));
println!("🌐 Server will bind to: {}", config.server_address);
println!("📁 Upload directory: {}", config.upload_path);
println!("👁️ Watch directory: {}", config.watch_folder);
println!("📄 Allowed file types: {:?}", config.allowed_file_types);
println!("🧠 OCR language: {}", config.ocr_language);
println!("⚙️ Concurrent OCR jobs: {}", config.concurrent_ocr_jobs);
println!("⏱️ OCR timeout: {}s", config.ocr_timeout_seconds);
println!("📏 Max file size: {}MB", config.max_file_size_mb);
println!("💾 Memory limit: {}MB", config.memory_limit_mb);
// Warning checks
println!("\n⚠️ CONFIGURATION WARNINGS:");
println!("{}", "=".repeat(50));
if config.jwt_secret == "your-secret-key" {
println!("🚨 SECURITY WARNING: Using default JWT secret! Set JWT_SECRET environment variable in production!");
}
if config.server_address.starts_with("0.0.0.0") {
println!("🌍 INFO: Server will listen on all interfaces (0.0.0.0)");
}
if config.max_file_size_mb > 100 {
println!("📏 INFO: Large file size limit ({}MB) may impact performance", config.max_file_size_mb);
}
if config.concurrent_ocr_jobs > 8 {
println!("⚙️ INFO: High OCR concurrency ({}) may use significant CPU/memory", config.concurrent_ocr_jobs);
}
// OIDC validation
if config.oidc_enabled {
println!("🔐 OIDC is enabled");
if config.oidc_client_id.is_none() {
println!("❌ OIDC_CLIENT_ID is required when OIDC is enabled");
}
if config.oidc_client_secret.is_none() {
println!("❌ OIDC_CLIENT_SECRET is required when OIDC is enabled");
}
if config.oidc_issuer_url.is_none() {
println!("❌ OIDC_ISSUER_URL is required when OIDC is enabled");
}
if config.oidc_redirect_uri.is_none() {
println!("❌ OIDC_REDIRECT_URI is required when OIDC is enabled");
}
} else {
println!("🔐 OIDC is disabled");
}
println!("✅ Configuration validation completed successfully!\n");
Ok(config)
}
fn validate_paths(&self) -> Result<()> {
use std::path::Path;
let upload_path = Path::new(&self.upload_path);
let watch_path = Path::new(&self.watch_folder);
println!("📁 Checking upload directory: {}", self.upload_path);
println!("👁️ Checking watch directory: {}", self.watch_folder);
// Check if paths exist and are accessible
if !upload_path.exists() {
println!("⚠️ Upload directory does not exist yet: {}", self.upload_path);
} else if !upload_path.is_dir() {
println!("❌ Upload path exists but is not a directory: {}", self.upload_path);
return Err(anyhow::anyhow!(
"Upload path '{}' exists but is not a directory", self.upload_path
));
} else {
println!("✅ Upload directory exists and is accessible");
}
if !watch_path.exists() {
println!("⚠️ Watch directory does not exist yet: {}", self.watch_folder);
} else if !watch_path.is_dir() {
println!("❌ Watch path exists but is not a directory: {}", self.watch_folder);
return Err(anyhow::anyhow!(
"Watch folder '{}' exists but is not a directory", self.watch_folder
));
} else {
println!("✅ Watch directory exists and is accessible");
}
// Normalize paths to handle relative paths and symlinks
let upload_canonical = upload_path.canonicalize()
.unwrap_or_else(|_| {
println!("⚠️ Could not canonicalize upload path, using as-is");
upload_path.to_path_buf()
});
let watch_canonical = watch_path.canonicalize()
.unwrap_or_else(|_| {
println!("⚠️ Could not canonicalize watch path, using as-is");
watch_path.to_path_buf()
});
println!("📍 Canonical upload path: {}", upload_canonical.display());
println!("📍 Canonical watch path: {}", watch_canonical.display());
// Check if paths are the same
if upload_canonical == watch_canonical {
println!("❌ CRITICAL ERROR: Upload and watch directories are the same!");
return Err(anyhow::anyhow!(
"❌ Configuration Error: UPLOAD_PATH and WATCH_FOLDER cannot be the same directory.\n\
This would cause infinite recursion where WebDAV files are downloaded to the upload \n\
directory and then immediately reprocessed by the watcher.\n\
Current config:\n\
- UPLOAD_PATH: {}\n\
- WATCH_FOLDER: {}\n\
Please set them to different directories.",
self.upload_path, self.watch_folder
));
}
// Check if watch folder is inside upload folder
if watch_canonical.starts_with(&upload_canonical) {
println!("❌ CRITICAL ERROR: Watch folder is inside upload directory!");
return Err(anyhow::anyhow!(
"❌ Configuration Error: WATCH_FOLDER cannot be inside UPLOAD_PATH.\n\
This would cause recursion where WebDAV files downloaded to uploads are \n\
detected by the watcher as new files.\n\
Current config:\n\
- UPLOAD_PATH: {}\n\
- WATCH_FOLDER: {}\n\
Please move the watch folder outside the upload directory.",
self.upload_path, self.watch_folder
));
}
// Check if upload folder is inside watch folder
if upload_canonical.starts_with(&watch_canonical) {
println!("❌ CRITICAL ERROR: Upload directory is inside watch folder!");
return Err(anyhow::anyhow!(
"❌ Configuration Error: UPLOAD_PATH cannot be inside WATCH_FOLDER.\n\
This would cause recursion where files from the watch folder are \n\
copied to uploads (inside the watch folder) and reprocessed.\n\
Current config:\n\
- UPLOAD_PATH: {}\n\
- WATCH_FOLDER: {}\n\
Please move the upload directory outside the watch folder.",
self.upload_path, self.watch_folder
));
}
println!("✅ Directory path validation passed - no conflicts detected");
Ok(())
}
}