Readur/src/utils/security.rs

352 lines
13 KiB
Rust

//! Security utilities for input validation and sanitization
use anyhow::Result;
use std::path::{Path, PathBuf, Component};
use tracing::{warn, debug};
/// Validate and sanitize file paths to prevent path traversal attacks
pub fn validate_and_sanitize_path(input_path: &str) -> Result<String> {
// Check for null bytes (not allowed in file paths)
if input_path.contains('\0') {
return Err(anyhow::anyhow!("Path contains null bytes"));
}
// Check for excessively long paths
if input_path.len() > 4096 {
return Err(anyhow::anyhow!("Path too long (max 4096 characters)"));
}
// Convert to Path for normalization
let path = Path::new(input_path);
// Check for path traversal attempts
for component in path.components() {
match component {
Component::ParentDir => {
warn!("Path traversal attempt detected: {}", input_path);
return Err(anyhow::anyhow!("Path traversal not allowed"));
}
Component::Normal(name) => {
let name_str = name.to_string_lossy();
// Check for dangerous file names
if is_dangerous_filename(&name_str) {
return Err(anyhow::anyhow!("Potentially dangerous filename: {}", name_str));
}
// Check for control characters (except newline and tab which might be in file content)
for ch in name_str.chars() {
if ch.is_control() && ch != '\n' && ch != '\t' {
return Err(anyhow::anyhow!("Filename contains control characters"));
}
}
}
_ => {} // Allow root, current dir, and prefix components
}
}
// Normalize the path to remove redundant components
let normalized = normalize_path(path);
Ok(normalized.to_string_lossy().to_string())
}
/// Validate filename for document storage
pub fn validate_filename(filename: &str) -> Result<String> {
// Basic length check
if filename.is_empty() {
return Err(anyhow::anyhow!("Filename cannot be empty"));
}
if filename.len() > 255 {
return Err(anyhow::anyhow!("Filename too long (max 255 characters)"));
}
// Check for null bytes
if filename.contains('\0') {
return Err(anyhow::anyhow!("Filename contains null bytes"));
}
// Check for path separators (filenames should not contain them)
if filename.contains('/') || filename.contains('\\') {
return Err(anyhow::anyhow!("Filename cannot contain path separators"));
}
// Check for control characters
for ch in filename.chars() {
if ch.is_control() && ch != '\n' && ch != '\t' {
return Err(anyhow::anyhow!("Filename contains control characters"));
}
}
// Check for dangerous patterns
if is_dangerous_filename(filename) {
return Err(anyhow::anyhow!("Potentially dangerous filename: {}", filename));
}
// Sanitize the filename by replacing problematic characters
let sanitized = sanitize_filename(filename);
Ok(sanitized)
}
/// Check if a filename is potentially dangerous
fn is_dangerous_filename(filename: &str) -> bool {
let filename_lower = filename.to_lowercase();
// Windows reserved names
let reserved_names = [
"con", "prn", "aux", "nul",
"com1", "com2", "com3", "com4", "com5", "com6", "com7", "com8", "com9",
"lpt1", "lpt2", "lpt3", "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9",
];
// Check if filename (without extension) matches reserved names
let name_without_ext = filename_lower.split('.').next().unwrap_or("");
if reserved_names.contains(&name_without_ext) {
return true;
}
// Check for suspicious patterns
if filename_lower.starts_with('.') && filename_lower.len() > 1 {
// Allow common hidden files but reject suspicious ones
let allowed_hidden = [".env", ".gitignore", ".htaccess"];
if !allowed_hidden.iter().any(|&allowed| filename_lower.starts_with(allowed)) {
// Be more permissive with document files that might have dots
if !filename_lower.contains(&['.', 'd', 'o', 'c']) &&
!filename_lower.contains(&['.', 'p', 'd', 'f']) &&
!filename_lower.contains(&['.', 't', 'x', 't']) {
return true;
}
}
}
false
}
/// Sanitize filename by replacing problematic characters
fn sanitize_filename(filename: &str) -> String {
let mut sanitized = String::new();
for ch in filename.chars() {
match ch {
// Replace problematic characters with underscores
'<' | '>' | ':' | '"' | '|' | '?' | '*' => sanitized.push('_'),
// Allow most other characters
_ if !ch.is_control() || ch == '\n' || ch == '\t' => sanitized.push(ch),
// Skip control characters
_ => {}
}
}
// Trim whitespace from ends
sanitized.trim().to_string()
}
/// Normalize a path by resolving . and .. components without filesystem access
fn normalize_path(path: &Path) -> PathBuf {
let mut normalized = PathBuf::new();
for component in path.components() {
match component {
Component::Normal(_) | Component::RootDir | Component::Prefix(_) => {
normalized.push(component);
}
Component::CurDir => {
// Skip current directory references
}
Component::ParentDir => {
// This should have been caught earlier, but handle it safely
if normalized.parent().is_some() {
normalized.pop();
}
// If we can't go up, just ignore the .. component
}
}
}
normalized
}
/// Validate that a path is within the allowed base directory
pub fn validate_path_within_base(path: &str, base_dir: &str) -> Result<()> {
let path_buf = PathBuf::from(path);
let base_buf = PathBuf::from(base_dir);
// Convert both paths to absolute paths for consistent comparison
let current_dir = std::env::current_dir().unwrap_or_default();
let absolute_base = if base_buf.is_absolute() {
base_buf
} else {
current_dir.join(&base_buf)
};
let absolute_path = if path_buf.is_absolute() {
path_buf
} else {
current_dir.join(&path_buf)
};
// Try to canonicalize both paths, with consistent fallback behavior
let canonical_base = absolute_base.canonicalize().unwrap_or_else(|_| {
// If canonicalization fails, use the absolute path through normalize_path
normalize_path(&absolute_base)
});
let canonical_path = if absolute_path.exists() {
// If the file exists, canonicalize it
absolute_path.canonicalize().unwrap_or_else(|_| normalize_path(&absolute_path))
} else {
// If file doesn't exist, try to canonicalize its parent directory and append the filename
if let Some(parent) = absolute_path.parent() {
if let Some(filename) = absolute_path.file_name() {
let canonical_parent = parent.canonicalize().unwrap_or_else(|_| normalize_path(parent));
canonical_parent.join(filename)
} else {
normalize_path(&absolute_path)
}
} else {
normalize_path(&absolute_path)
}
};
// Add debug logging to diagnose path validation issues
debug!("Path validation: input_path='{}', base_dir='{}'", path, base_dir);
debug!("Path validation: absolute_path='{}', absolute_base='{}'", absolute_path.display(), absolute_base.display());
debug!("Path validation: canonical_path='{}', canonical_base='{}'", canonical_path.display(), canonical_base.display());
debug!("Path validation: starts_with_check={}", canonical_path.starts_with(&canonical_base));
if !canonical_path.starts_with(&canonical_base) {
return Err(anyhow::anyhow!(
"Path '{}' is not within allowed base directory '{}'",
path, base_dir
));
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_validate_filename() {
// Valid filenames
assert!(validate_filename("document.pdf").is_ok());
assert!(validate_filename("my-file_2023.docx").is_ok());
assert!(validate_filename("report (final).txt").is_ok());
// Invalid filenames
assert!(validate_filename("").is_err());
assert!(validate_filename("../etc/passwd").is_err());
assert!(validate_filename("file\0name.txt").is_err());
assert!(validate_filename("con.txt").is_err());
assert!(validate_filename("file/name.txt").is_err());
}
#[test]
fn test_validate_path() {
// Valid paths
assert!(validate_and_sanitize_path("documents/file.pdf").is_ok());
assert!(validate_and_sanitize_path("./uploads/document.txt").is_ok());
// Invalid paths
assert!(validate_and_sanitize_path("../../../etc/passwd").is_err());
assert!(validate_and_sanitize_path("documents/../config.txt").is_err());
assert!(validate_and_sanitize_path("file\0name.txt").is_err());
}
#[test]
fn test_sanitize_filename() {
assert_eq!(sanitize_filename("file<>name.txt"), "file__name.txt");
assert_eq!(sanitize_filename(" report.pdf "), "report.pdf");
assert_eq!(sanitize_filename("file:name|test.doc"), "file_name_test.doc");
}
#[test]
fn test_validate_path_within_base() {
use std::fs;
// Setup test directories
let test_base = "test_uploads_validation";
let test_docs = format!("{}/documents", test_base);
// Clean up any existing test directories
fs::remove_dir_all(test_base).unwrap_or(());
// Test 1: Neither base nor parent exists
let result = validate_path_within_base(
"./test_uploads_validation/documents/test.txt",
"./test_uploads_validation"
);
assert!(result.is_ok(), "Should allow paths within base even when directories don't exist");
// Test 2: Base exists but parent doesn't (the problematic case)
fs::create_dir_all(test_base).unwrap();
let result = validate_path_within_base(
"./test_uploads_validation/documents/test.txt",
"./test_uploads_validation"
);
assert!(result.is_ok(), "Should allow subdirectory paths when base exists but parent doesn't");
// Test 3: Both base and parent exist
fs::create_dir_all(&test_docs).unwrap();
let result = validate_path_within_base(
"./test_uploads_validation/documents/test.txt",
"./test_uploads_validation"
);
assert!(result.is_ok(), "Should allow paths when both base and parent exist");
// Test 4: Path outside base directory should fail
let result = validate_path_within_base(
"../outside.txt",
"./test_uploads_validation"
);
assert!(result.is_err(), "Should reject paths outside base directory");
// Test 5: Absolute paths
let current_dir = std::env::current_dir().unwrap();
let abs_base = current_dir.join(test_base);
let abs_path = abs_base.join("documents/test.txt");
let result = validate_path_within_base(
&abs_path.to_string_lossy(),
&abs_base.to_string_lossy()
);
assert!(result.is_ok(), "Should handle absolute paths correctly");
// Test 6: Mixed absolute and relative paths
let result = validate_path_within_base(
&abs_path.to_string_lossy(),
"./test_uploads_validation"
);
assert!(result.is_ok(), "Should handle mixed absolute/relative paths");
// Clean up
fs::remove_dir_all(test_base).unwrap_or(());
}
#[test]
fn test_validate_path_within_base_traversal_attempts() {
use std::fs;
let test_base = "test_security_validation";
fs::create_dir_all(test_base).unwrap_or(());
// Test various path traversal attempts
let traversal_attempts = vec![
"../../../etc/passwd",
"./test_security_validation/../../../etc/passwd",
"test_security_validation/../outside.txt",
"./test_security_validation/documents/../../outside.txt",
];
for attempt in traversal_attempts {
let result = validate_path_within_base(attempt, "./test_security_validation");
assert!(result.is_err(), "Should reject path traversal attempt: {}", attempt);
}
// Clean up
fs::remove_dir_all(test_base).unwrap_or(());
}
}