use anyhow::Result; use chrono::Utc; use std::path::{Path, PathBuf}; use std::sync::Arc; use tokio::fs; use uuid::Uuid; use tracing::{info, warn, error}; use crate::models::Document; use crate::services::s3_service::S3Service; use crate::storage::{StorageBackend, StorageConfig, factory}; #[cfg(feature = "ocr")] use image::{DynamicImage, ImageFormat, imageops::FilterType}; #[derive(Clone)] pub struct FileService { upload_path: String, /// Storage backend for all file operations storage: Arc, /// Legacy S3 service reference for backward compatibility /// TODO: Remove this after all usage sites are migrated s3_service: Option>, } impl FileService { /// Create a new FileService with local storage (backward compatible) #[deprecated(note = "Use from_config() with storage factory pattern instead")] pub fn new(upload_path: String) -> Self { use crate::storage::local::LocalStorageBackend; let local_backend = LocalStorageBackend::new(upload_path.clone()); Self { upload_path, storage: Arc::new(local_backend), s3_service: None, } } /// Create a new FileService with S3 storage (backward compatible) #[deprecated(note = "Use from_config() with storage factory pattern instead")] pub fn new_with_s3(upload_path: String, s3_service: Arc) -> Self { let storage_backend = s3_service.clone() as Arc; Self { upload_path, storage: storage_backend, s3_service: Some(s3_service), } } /// Create a new FileService with a specific storage backend (new API) pub fn with_storage(upload_path: String, storage: Arc) -> Self { Self { upload_path, storage, s3_service: None, // New API doesn't need legacy S3 reference } } /// Create FileService from storage configuration (factory pattern) pub async fn from_config(config: StorageConfig, upload_path: String) -> Result { let storage = factory::create_storage_backend(config).await?; Ok(Self::with_storage(upload_path, storage)) } /// Check if S3 storage is enabled pub fn is_s3_enabled(&self) -> bool { // Check if storage backend is S3 type self.storage.storage_type() == "s3" || self.s3_service.is_some() } /// Get the storage backend type pub fn storage_type(&self) -> &'static str { self.storage.storage_type() } /// Initialize the storage backend and directory structure pub async fn initialize_storage(&self) -> Result<()> { // Initialize the storage backend first self.storage.initialize().await?; Ok(()) } /// Initialize the upload directory structure (legacy method for local storage) pub async fn initialize_directory_structure(&self) -> Result<()> { // For non-local storage, this is a no-op if self.storage.storage_type() != "local" { info!("Skipping directory structure initialization for {} storage", self.storage.storage_type()); return Ok(()); } let base_path = Path::new(&self.upload_path); // Create subdirectories for organized file storage let directories = [ "documents", // Final uploaded documents "thumbnails", // Document thumbnails "processed_images", // OCR processed images for review "temp", // Temporary files during processing "backups", // Document backups ]; for dir in directories.iter() { let dir_path = base_path.join(dir); if let Err(e) = fs::create_dir_all(&dir_path).await { error!("Failed to create directory {:?}: {}", dir_path, e); return Err(anyhow::anyhow!("Failed to create directory structure: {}", e)); } info!("Ensured directory exists: {:?}", dir_path); } Ok(()) } /// Get the path for a specific subdirectory pub fn get_subdirectory_path(&self, subdir: &str) -> PathBuf { Path::new(&self.upload_path).join(subdir) } /// Get the documents directory path pub fn get_documents_path(&self) -> PathBuf { self.get_subdirectory_path("documents") } /// Get the thumbnails directory path pub fn get_thumbnails_path(&self) -> PathBuf { self.get_subdirectory_path("thumbnails") } /// Get the processed images directory path pub fn get_processed_images_path(&self) -> PathBuf { self.get_subdirectory_path("processed_images") } /// Get the temp directory path pub fn get_temp_path(&self) -> PathBuf { self.get_subdirectory_path("temp") } /// Migrate existing files from the root upload directory to the structured format pub async fn migrate_existing_files(&self) -> Result<()> { let base_path = Path::new(&self.upload_path); let documents_dir = self.get_documents_path(); let thumbnails_dir = self.get_thumbnails_path(); info!("Starting migration of existing files to structured directories..."); let mut migrated_count = 0; let mut thumbnail_count = 0; // Read all files in the base upload directory let mut entries = fs::read_dir(base_path).await?; while let Some(entry) = entries.next_entry().await? { let file_path = entry.path(); // Skip directories and already structured subdirectories if file_path.is_dir() { continue; } if let Some(filename) = file_path.file_name().and_then(|n| n.to_str()) { // Handle thumbnail files if filename.ends_with("_thumb.jpg") { let new_path = thumbnails_dir.join(filename); if let Err(e) = fs::rename(&file_path, &new_path).await { warn!("Failed to migrate thumbnail {}: {}", filename, e); } else { thumbnail_count += 1; info!("Migrated thumbnail: {} -> {:?}", filename, new_path); } } // Handle regular document files else { let new_path = documents_dir.join(filename); if let Err(e) = fs::rename(&file_path, &new_path).await { warn!("Failed to migrate document {}: {}", filename, e); } else { migrated_count += 1; info!("Migrated document: {} -> {:?}", filename, new_path); } } } } info!("Migration completed: {} documents, {} thumbnails moved to structured directories", migrated_count, thumbnail_count); Ok(()) } pub async fn save_file(&self, filename: &str, data: &[u8]) -> Result { let file_id = Uuid::new_v4(); let extension = Path::new(filename) .extension() .and_then(|ext| ext.to_str()) .unwrap_or(""); let saved_filename = if extension.is_empty() { file_id.to_string() } else { format!("{}.{}", file_id, extension) }; // Save to documents subdirectory let documents_dir = self.get_documents_path(); let file_path = documents_dir.join(&saved_filename); // Ensure the documents directory exists if let Err(e) = fs::create_dir_all(&documents_dir).await { error!("Failed to create documents directory: {}", e); return Err(anyhow::anyhow!("Failed to create documents directory: {}", e)); } fs::write(&file_path, data).await?; Ok(file_path.to_string_lossy().to_string()) } /// Save file for a specific document (works with both local and S3) pub async fn save_document_file(&self, user_id: Uuid, document_id: Uuid, filename: &str, data: &[u8]) -> Result { let storage_path = self.storage.store_document(user_id, document_id, filename, data).await?; info!("Saved document via storage backend: {}", storage_path); Ok(storage_path) } /// Save thumbnail (works with both local and S3) pub async fn save_thumbnail(&self, user_id: Uuid, document_id: Uuid, data: &[u8]) -> Result { let storage_path = self.storage.store_thumbnail(user_id, document_id, data).await?; info!("Saved thumbnail via storage backend: {}", storage_path); Ok(storage_path) } /// Save processed image (works with both local and S3) pub async fn save_processed_image(&self, user_id: Uuid, document_id: Uuid, data: &[u8]) -> Result { let storage_path = self.storage.store_processed_image(user_id, document_id, data).await?; info!("Saved processed image via storage backend: {}", storage_path); Ok(storage_path) } pub fn create_document( &self, filename: &str, original_filename: &str, file_path: &str, file_size: i64, mime_type: &str, user_id: Uuid, file_hash: Option, original_created_at: Option>, original_modified_at: Option>, source_path: Option, source_type: Option, source_id: Option, file_permissions: Option, file_owner: Option, file_group: Option, source_metadata: Option, ) -> Document { self.create_document_with_id( Uuid::new_v4(), filename, original_filename, file_path, file_size, mime_type, user_id, file_hash, original_created_at, original_modified_at, source_path, source_type, source_id, file_permissions, file_owner, file_group, source_metadata, ) } pub fn create_document_with_id( &self, document_id: Uuid, filename: &str, original_filename: &str, file_path: &str, file_size: i64, mime_type: &str, user_id: Uuid, file_hash: Option, original_created_at: Option>, original_modified_at: Option>, source_path: Option, source_type: Option, source_id: Option, file_permissions: Option, file_owner: Option, file_group: Option, source_metadata: Option, ) -> Document { Document { id: document_id, filename: filename.to_string(), original_filename: original_filename.to_string(), file_path: file_path.to_string(), file_size, mime_type: mime_type.to_string(), content: None, ocr_text: None, ocr_confidence: None, ocr_word_count: None, ocr_processing_time_ms: None, ocr_status: Some("pending".to_string()), ocr_error: None, ocr_completed_at: None, ocr_retry_count: None, ocr_failure_reason: None, tags: Vec::new(), created_at: Utc::now(), updated_at: Utc::now(), user_id, file_hash, original_created_at, original_modified_at, source_path, source_type, source_id, file_permissions, file_owner, file_group, source_metadata, } } pub fn is_allowed_file_type(&self, filename: &str, allowed_types: &[String]) -> bool { if let Some(extension) = Path::new(filename) .extension() .and_then(|ext| ext.to_str()) { let ext_lower = extension.to_lowercase(); allowed_types.contains(&ext_lower) } else { false } } /// Resolve file path to actual location, handling both old and new directory structures pub async fn resolve_file_path(&self, file_path: &str) -> Result { // If the file exists at the given path, use it if Path::new(file_path).exists() { return Ok(file_path.to_string()); } // Try to find the file in the new structured directory if file_path.starts_with("./uploads/") && !file_path.contains("/documents/") { let new_path = file_path.replace("./uploads/", "./uploads/documents/"); if Path::new(&new_path).exists() { info!("Found file in new structured directory: {} -> {}", file_path, new_path); return Ok(new_path); } } // Try without the ./ prefix if file_path.starts_with("uploads/") && !file_path.contains("/documents/") { let new_path = file_path.replace("uploads/", "uploads/documents/"); if Path::new(&new_path).exists() { info!("Found file in new structured directory: {} -> {}", file_path, new_path); return Ok(new_path); } } // File not found in any expected location Err(anyhow::anyhow!("File not found: {} (checked original path and structured directory)", file_path)) } pub async fn read_file(&self, file_path: &str) -> Result> { // Check if this is a storage backend path (s3:// or other prefixes) if file_path.starts_with("s3://") { // Strip the s3:// prefix and delegate to storage backend let storage_key = file_path.strip_prefix("s3://").unwrap_or(file_path); return self.storage.retrieve_file(storage_key).await; } // For local files, we might need to use the storage backend or fall back to direct file access // Try storage backend first, then fall back to legacy file resolution match self.storage.retrieve_file(file_path).await { Ok(data) => Ok(data), Err(_) => { // Fall back to legacy file resolution for backward compatibility let resolved_path = self.resolve_file_path(file_path).await?; let data = fs::read(&resolved_path).await?; Ok(data) } } } #[cfg(feature = "ocr")] pub async fn get_or_generate_thumbnail(&self, file_path: &str, filename: &str) -> Result> { // Use the structured thumbnails directory let thumbnails_dir = self.get_thumbnails_path(); if !thumbnails_dir.exists() { if let Err(e) = fs::create_dir_all(&thumbnails_dir).await { error!("Failed to create thumbnails directory: {}", e); return Err(anyhow::anyhow!("Failed to create thumbnails directory: {}", e)); } } // Generate thumbnail filename based on original file path let file_stem = Path::new(file_path) .file_stem() .and_then(|s| s.to_str()) .unwrap_or("unknown"); let thumbnail_path = thumbnails_dir.join(format!("{}_thumb.jpg", file_stem)); // Check if thumbnail already exists if thumbnail_path.exists() { return self.read_file(&thumbnail_path.to_string_lossy()).await; } // Resolve file path and generate thumbnail let resolved_path = self.resolve_file_path(file_path).await?; let thumbnail_data = self.generate_thumbnail(&resolved_path, filename).await?; // Save thumbnail to cache fs::write(&thumbnail_path, &thumbnail_data).await?; Ok(thumbnail_data) } #[cfg(feature = "ocr")] async fn generate_thumbnail(&self, file_path: &str, filename: &str) -> Result> { let file_data = self.read_file(file_path).await?; // Determine file type from extension let extension = Path::new(filename) .extension() .and_then(|ext| ext.to_str()) .unwrap_or("") .to_lowercase(); match extension.as_str() { "jpg" | "jpeg" | "png" | "bmp" | "tiff" | "gif" => { self.generate_image_thumbnail(&file_data).await } "pdf" => { self.generate_pdf_thumbnail(&file_data).await } "txt" => { self.generate_text_thumbnail(&file_data).await } "doc" | "docx" => { self.generate_placeholder_thumbnail("DOC").await } _ => { // For other file types, generate a placeholder self.generate_placeholder_thumbnail(&extension.to_uppercase()).await } } } #[cfg(feature = "ocr")] async fn generate_image_thumbnail(&self, file_data: &[u8]) -> Result> { let img = image::load_from_memory(file_data)?; let thumbnail = img.resize(200, 200, FilterType::Lanczos3); // Convert to RGB if the image has an alpha channel (RGBA) // JPEG doesn't support transparency, so we need to remove the alpha channel let rgb_thumbnail = match thumbnail { image::DynamicImage::ImageRgba8(_) => { // Convert RGBA to RGB by compositing against a white background let rgb_img = image::DynamicImage::ImageRgb8( thumbnail.to_rgb8() ); rgb_img }, _ => thumbnail, // Already RGB or other compatible format }; let mut buffer = Vec::new(); let mut cursor = std::io::Cursor::new(&mut buffer); rgb_thumbnail.write_to(&mut cursor, ImageFormat::Jpeg)?; Ok(buffer) } #[cfg(feature = "ocr")] async fn generate_pdf_thumbnail(&self, file_data: &[u8]) -> Result> { use std::process::Command; use tokio::fs; use uuid::Uuid; // Create a temporary file for the PDF let temp_id = Uuid::new_v4(); let temp_pdf_path = format!("/tmp/pdf_thumb_{}.pdf", temp_id); let temp_png_path = format!("/tmp/pdf_thumb_{}.png", temp_id); // Write PDF data to temporary file if let Err(e) = fs::write(&temp_pdf_path, file_data).await { error!("Failed to write temporary PDF file: {}", e); return self.generate_placeholder_thumbnail("PDF").await; } // Use pdftoppm to convert first page to PNG let output = Command::new("pdftoppm") .arg("-f").arg("1") // First page only .arg("-l").arg("1") // Last page (same as first) .arg("-scale-to").arg("200") // Scale to 200px width .arg("-png") // Output as PNG .arg(&temp_pdf_path) .arg(&format!("/tmp/pdf_thumb_{}", temp_id)) // Output prefix .output(); // Clean up temporary PDF file let _ = fs::remove_file(&temp_pdf_path).await; match output { Ok(result) if result.status.success() => { // pdftoppm adds "-1" to the filename for the first page let actual_png_path = format!("/tmp/pdf_thumb_{}-1.png", temp_id); // Read the generated PNG file match fs::read(&actual_png_path).await { Ok(png_data) => { // Clean up temporary PNG file let _ = fs::remove_file(&actual_png_path).await; // Convert PNG to JPEG thumbnail match image::load_from_memory(&png_data) { Ok(img) => { // Resize to 200x200 maintaining aspect ratio let thumbnail = img.resize(200, 200, image::imageops::FilterType::Lanczos3); // Convert to JPEG let mut buffer = Vec::new(); let mut cursor = std::io::Cursor::new(&mut buffer); if thumbnail.write_to(&mut cursor, ImageFormat::Jpeg).is_ok() { Ok(buffer) } else { self.generate_placeholder_thumbnail("PDF").await } } Err(_) => self.generate_placeholder_thumbnail("PDF").await, } } Err(_) => { let _ = fs::remove_file(&actual_png_path).await; self.generate_placeholder_thumbnail("PDF").await } } } _ => { // Clean up any potential PNG files let _ = fs::remove_file(&temp_png_path).await; let _ = fs::remove_file(&format!("/tmp/pdf_thumb_{}-1.png", temp_id)).await; self.generate_placeholder_thumbnail("PDF").await } } } #[cfg(feature = "ocr")] async fn generate_text_thumbnail(&self, file_data: &[u8]) -> Result> { use image::Rgb; // Convert bytes to text let text = String::from_utf8_lossy(file_data); self.generate_text_based_thumbnail(&text, "TXT", Rgb([34, 139, 34])).await } #[cfg(feature = "ocr")] async fn generate_text_based_thumbnail(&self, text: &str, file_type: &str, bg_color: image::Rgb) -> Result> { use image::{RgbImage, Rgb, DynamicImage, ImageFormat}; let width = 200; let height = 200; let mut img = RgbImage::new(width, height); // Fill background for pixel in img.pixels_mut() { *pixel = bg_color; } // Add file type indicator at the top let text_color = Rgb([255, 255, 255]); // White text let preview_text = if text.len() > 300 { format!("{}\n{}", file_type, &text[..300].trim()) } else { format!("{}\n{}", file_type, text.trim()) }; // Simple text rendering - just place some characters as visual indicators // For a more sophisticated approach, you'd use a text rendering library let lines: Vec<&str> = preview_text.lines().take(15).collect(); for (line_idx, line) in lines.iter().enumerate() { let y_offset = 20 + (line_idx * 12); if y_offset >= height as usize - 10 { break; } // Simple character placement (very basic text rendering) for (char_idx, _) in line.chars().take(25).enumerate() { let x_offset = 10 + (char_idx * 7); if x_offset >= width as usize - 10 { break; } // Draw a simple "character" representation as white pixels if x_offset < width as usize && y_offset < height as usize { if let Some(pixel) = img.get_pixel_mut_checked(x_offset as u32, y_offset as u32) { *pixel = text_color; } // Add some thickness if let Some(pixel) = img.get_pixel_mut_checked(x_offset as u32 + 1, y_offset as u32) { *pixel = text_color; } } } } let dynamic_img = DynamicImage::ImageRgb8(img); let mut buffer = Vec::new(); let mut cursor = std::io::Cursor::new(&mut buffer); dynamic_img.write_to(&mut cursor, ImageFormat::Jpeg)?; Ok(buffer) } #[cfg(feature = "ocr")] async fn generate_placeholder_thumbnail(&self, file_type: &str) -> Result> { // Create a simple colored rectangle as placeholder use image::{RgbImage, Rgb}; let mut img = RgbImage::new(200, 200); // Different colors for different file types let color = match file_type { "PDF" => Rgb([220, 38, 27]), // Red for PDF "TXT" => Rgb([34, 139, 34]), // Green for text "DOC" | "DOCX" => Rgb([41, 128, 185]), // Blue for Word docs _ => Rgb([108, 117, 125]), // Gray for unknown }; // Fill with solid color for pixel in img.pixels_mut() { *pixel = color; } let dynamic_img = DynamicImage::ImageRgb8(img); let mut buffer = Vec::new(); let mut cursor = std::io::Cursor::new(&mut buffer); dynamic_img.write_to(&mut cursor, ImageFormat::Jpeg)?; Ok(buffer) } #[cfg(not(feature = "ocr"))] pub async fn get_or_generate_thumbnail(&self, _file_path: &str, _filename: &str) -> Result> { anyhow::bail!("Thumbnail generation requires OCR feature") } pub async fn delete_document_files(&self, document: &Document) -> Result<()> { // Use storage backend for deletion - it handles both S3 and local storage match self.storage.delete_document_files(document.user_id, document.id, &document.filename).await { Ok(_) => { info!("Successfully deleted files for document {} via storage backend", document.id); return Ok(()); } Err(e) => { warn!("Storage backend deletion failed for document {}: {}. Falling back to legacy deletion.", document.id, e); // Fall back to legacy deletion logic for backward compatibility } } // Handle local file deletion let mut deleted_files = Vec::new(); let mut serious_errors = Vec::new(); // Helper function to safely delete a file, handling concurrent deletion scenarios async fn safe_delete(path: &Path, serious_errors: &mut Vec) -> Option { match fs::remove_file(path).await { Ok(_) => { info!("Deleted file: {}", path.display()); Some(path.to_string_lossy().to_string()) } Err(e) => { match e.kind() { std::io::ErrorKind::NotFound => { // File already deleted (possibly by concurrent request) - this is fine info!("File already deleted: {}", path.display()); None } _ => { // Other errors (permissions, I/O errors, etc.) are serious warn!("Failed to delete file {}: {}", path.display(), e); serious_errors.push(format!("Failed to delete file {}: {}", path.display(), e)); None } } } } } // Delete main document file let main_file = Path::new(&document.file_path); if let Some(deleted_path) = safe_delete(&main_file, &mut serious_errors).await { deleted_files.push(deleted_path); } // Delete thumbnail if it exists let thumbnail_filename = format!("{}_thumb.jpg", document.id); let thumbnail_path = self.get_thumbnails_path().join(&thumbnail_filename); if let Some(deleted_path) = safe_delete(&thumbnail_path, &mut serious_errors).await { deleted_files.push(deleted_path); } // Delete processed image if it exists let processed_image_filename = format!("{}_processed.png", document.id); let processed_image_path = self.get_processed_images_path().join(&processed_image_filename); if let Some(deleted_path) = safe_delete(&processed_image_path, &mut serious_errors).await { deleted_files.push(deleted_path); } // Only fail if there were serious errors (not "file not found") if !serious_errors.is_empty() { error!("Serious errors occurred while deleting files for document {}: {}", document.id, serious_errors.join("; ")); return Err(anyhow::anyhow!("File deletion errors: {}", serious_errors.join("; "))); } if deleted_files.is_empty() { info!("No files needed deletion for document {} (all files already removed)", document.id); } else { info!("Successfully deleted {} files for document {} via legacy method", deleted_files.len(), document.id); } Ok(()) } }