//! Security utilities for input validation and sanitization use anyhow::Result; use std::path::{Path, PathBuf, Component}; use tracing::{warn, debug}; /// Validate and sanitize file paths to prevent path traversal attacks pub fn validate_and_sanitize_path(input_path: &str) -> Result { // Check for null bytes (not allowed in file paths) if input_path.contains('\0') { return Err(anyhow::anyhow!("Path contains null bytes")); } // Check for excessively long paths if input_path.len() > 4096 { return Err(anyhow::anyhow!("Path too long (max 4096 characters)")); } // Convert to Path for normalization let path = Path::new(input_path); // Check for path traversal attempts for component in path.components() { match component { Component::ParentDir => { warn!("Path traversal attempt detected: {}", input_path); return Err(anyhow::anyhow!("Path traversal not allowed")); } Component::Normal(name) => { let name_str = name.to_string_lossy(); // Check for dangerous file names if is_dangerous_filename(&name_str) { return Err(anyhow::anyhow!("Potentially dangerous filename: {}", name_str)); } // Check for control characters (except newline and tab which might be in file content) for ch in name_str.chars() { if ch.is_control() && ch != '\n' && ch != '\t' { return Err(anyhow::anyhow!("Filename contains control characters")); } } } _ => {} // Allow root, current dir, and prefix components } } // Normalize the path to remove redundant components let normalized = normalize_path(path); Ok(normalized.to_string_lossy().to_string()) } /// Validate filename for document storage pub fn validate_filename(filename: &str) -> Result { // Basic length check if filename.is_empty() { return Err(anyhow::anyhow!("Filename cannot be empty")); } if filename.len() > 255 { return Err(anyhow::anyhow!("Filename too long (max 255 characters)")); } // Check for null bytes if filename.contains('\0') { return Err(anyhow::anyhow!("Filename contains null bytes")); } // Check for path separators (filenames should not contain them) if filename.contains('/') || filename.contains('\\') { return Err(anyhow::anyhow!("Filename cannot contain path separators")); } // Check for control characters for ch in filename.chars() { if ch.is_control() && ch != '\n' && ch != '\t' { return Err(anyhow::anyhow!("Filename contains control characters")); } } // Check for dangerous patterns if is_dangerous_filename(filename) { return Err(anyhow::anyhow!("Potentially dangerous filename: {}", filename)); } // Sanitize the filename by replacing problematic characters let sanitized = sanitize_filename(filename); Ok(sanitized) } /// Check if a filename is potentially dangerous fn is_dangerous_filename(filename: &str) -> bool { let filename_lower = filename.to_lowercase(); // Windows reserved names let reserved_names = [ "con", "prn", "aux", "nul", "com1", "com2", "com3", "com4", "com5", "com6", "com7", "com8", "com9", "lpt1", "lpt2", "lpt3", "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9", ]; // Check if filename (without extension) matches reserved names let name_without_ext = filename_lower.split('.').next().unwrap_or(""); if reserved_names.contains(&name_without_ext) { return true; } // Check for suspicious patterns if filename_lower.starts_with('.') && filename_lower.len() > 1 { // Allow common hidden files but reject suspicious ones let allowed_hidden = [".env", ".gitignore", ".htaccess"]; if !allowed_hidden.iter().any(|&allowed| filename_lower.starts_with(allowed)) { // Be more permissive with document files that might have dots if !filename_lower.contains(&['.', 'd', 'o', 'c']) && !filename_lower.contains(&['.', 'p', 'd', 'f']) && !filename_lower.contains(&['.', 't', 'x', 't']) { return true; } } } false } /// Sanitize filename by replacing problematic characters fn sanitize_filename(filename: &str) -> String { let mut sanitized = String::new(); for ch in filename.chars() { match ch { // Replace problematic characters with underscores '<' | '>' | ':' | '"' | '|' | '?' | '*' => sanitized.push('_'), // Allow most other characters _ if !ch.is_control() || ch == '\n' || ch == '\t' => sanitized.push(ch), // Skip control characters _ => {} } } // Trim whitespace from ends sanitized.trim().to_string() } /// Normalize a path by resolving . and .. components without filesystem access fn normalize_path(path: &Path) -> PathBuf { let mut normalized = PathBuf::new(); for component in path.components() { match component { Component::Normal(_) | Component::RootDir | Component::Prefix(_) => { normalized.push(component); } Component::CurDir => { // Skip current directory references } Component::ParentDir => { // This should have been caught earlier, but handle it safely if normalized.parent().is_some() { normalized.pop(); } // If we can't go up, just ignore the .. component } } } normalized } /// Validate that a path is within the allowed base directory pub fn validate_path_within_base(path: &str, base_dir: &str) -> Result<()> { let path_buf = PathBuf::from(path); let base_buf = PathBuf::from(base_dir); // Convert both paths to absolute paths for consistent comparison let current_dir = std::env::current_dir().unwrap_or_default(); let absolute_base = if base_buf.is_absolute() { base_buf } else { current_dir.join(&base_buf) }; let absolute_path = if path_buf.is_absolute() { path_buf } else { current_dir.join(&path_buf) }; // Try to canonicalize both paths, with consistent fallback behavior let canonical_base = absolute_base.canonicalize().unwrap_or_else(|_| { // If canonicalization fails, use the absolute path through normalize_path normalize_path(&absolute_base) }); let canonical_path = if absolute_path.exists() { // If the file exists, canonicalize it absolute_path.canonicalize().unwrap_or_else(|_| normalize_path(&absolute_path)) } else { // If file doesn't exist, try to canonicalize its parent directory and append the filename if let Some(parent) = absolute_path.parent() { if let Some(filename) = absolute_path.file_name() { let canonical_parent = parent.canonicalize().unwrap_or_else(|_| normalize_path(parent)); canonical_parent.join(filename) } else { normalize_path(&absolute_path) } } else { normalize_path(&absolute_path) } }; // Add debug logging to diagnose path validation issues debug!("Path validation: input_path='{}', base_dir='{}'", path, base_dir); debug!("Path validation: absolute_path='{}', absolute_base='{}'", absolute_path.display(), absolute_base.display()); debug!("Path validation: canonical_path='{}', canonical_base='{}'", canonical_path.display(), canonical_base.display()); debug!("Path validation: starts_with_check={}", canonical_path.starts_with(&canonical_base)); if !canonical_path.starts_with(&canonical_base) { return Err(anyhow::anyhow!( "Path '{}' is not within allowed base directory '{}'", path, base_dir )); } Ok(()) } #[cfg(test)] mod tests { use super::*; #[test] fn test_validate_filename() { // Valid filenames assert!(validate_filename("document.pdf").is_ok()); assert!(validate_filename("my-file_2023.docx").is_ok()); assert!(validate_filename("report (final).txt").is_ok()); // Invalid filenames assert!(validate_filename("").is_err()); assert!(validate_filename("../etc/passwd").is_err()); assert!(validate_filename("file\0name.txt").is_err()); assert!(validate_filename("con.txt").is_err()); assert!(validate_filename("file/name.txt").is_err()); } #[test] fn test_validate_path() { // Valid paths assert!(validate_and_sanitize_path("documents/file.pdf").is_ok()); assert!(validate_and_sanitize_path("./uploads/document.txt").is_ok()); // Invalid paths assert!(validate_and_sanitize_path("../../../etc/passwd").is_err()); assert!(validate_and_sanitize_path("documents/../config.txt").is_err()); assert!(validate_and_sanitize_path("file\0name.txt").is_err()); } #[test] fn test_sanitize_filename() { assert_eq!(sanitize_filename("file<>name.txt"), "file__name.txt"); assert_eq!(sanitize_filename(" report.pdf "), "report.pdf"); assert_eq!(sanitize_filename("file:name|test.doc"), "file_name_test.doc"); } #[test] fn test_validate_path_within_base() { use std::fs; // Setup test directories let test_base = "test_uploads_validation"; let test_docs = format!("{}/documents", test_base); // Clean up any existing test directories fs::remove_dir_all(test_base).unwrap_or(()); // Test 1: Neither base nor parent exists let result = validate_path_within_base( "./test_uploads_validation/documents/test.txt", "./test_uploads_validation" ); assert!(result.is_ok(), "Should allow paths within base even when directories don't exist"); // Test 2: Base exists but parent doesn't (the problematic case) fs::create_dir_all(test_base).unwrap(); let result = validate_path_within_base( "./test_uploads_validation/documents/test.txt", "./test_uploads_validation" ); assert!(result.is_ok(), "Should allow subdirectory paths when base exists but parent doesn't"); // Test 3: Both base and parent exist fs::create_dir_all(&test_docs).unwrap(); let result = validate_path_within_base( "./test_uploads_validation/documents/test.txt", "./test_uploads_validation" ); assert!(result.is_ok(), "Should allow paths when both base and parent exist"); // Test 4: Path outside base directory should fail let result = validate_path_within_base( "../outside.txt", "./test_uploads_validation" ); assert!(result.is_err(), "Should reject paths outside base directory"); // Test 5: Absolute paths let current_dir = std::env::current_dir().unwrap(); let abs_base = current_dir.join(test_base); let abs_path = abs_base.join("documents/test.txt"); let result = validate_path_within_base( &abs_path.to_string_lossy(), &abs_base.to_string_lossy() ); assert!(result.is_ok(), "Should handle absolute paths correctly"); // Test 6: Mixed absolute and relative paths let result = validate_path_within_base( &abs_path.to_string_lossy(), "./test_uploads_validation" ); assert!(result.is_ok(), "Should handle mixed absolute/relative paths"); // Clean up fs::remove_dir_all(test_base).unwrap_or(()); } #[test] fn test_validate_path_within_base_traversal_attempts() { use std::fs; let test_base = "test_security_validation"; fs::create_dir_all(test_base).unwrap_or(()); // Test various path traversal attempts let traversal_attempts = vec![ "../../../etc/passwd", "./test_security_validation/../../../etc/passwd", "test_security_validation/../outside.txt", "./test_security_validation/documents/../../outside.txt", ]; for attempt in traversal_attempts { let result = validate_path_within_base(attempt, "./test_security_validation"); assert!(result.is_err(), "Should reject path traversal attempt: {}", attempt); } // Clean up fs::remove_dir_all(test_base).unwrap_or(()); } }