use anyhow::Result; use chrono::Utc; use std::path::{Path, PathBuf}; use tokio::fs; use uuid::Uuid; use tracing::{info, warn, error}; use crate::models::Document; #[cfg(feature = "ocr")] use image::{DynamicImage, ImageFormat, imageops::FilterType, Rgb, RgbImage, Rgba, ImageBuffer}; #[derive(Clone)] pub struct FileService { upload_path: String, } impl FileService { pub fn new(upload_path: String) -> Self { Self { upload_path } } /// Initialize the upload directory structure pub async fn initialize_directory_structure(&self) -> Result<()> { let base_path = Path::new(&self.upload_path); // Create subdirectories for organized file storage let directories = [ "documents", // Final uploaded documents "thumbnails", // Document thumbnails "processed_images", // OCR processed images for review "temp", // Temporary files during processing "backups", // Document backups ]; for dir in directories.iter() { let dir_path = base_path.join(dir); if let Err(e) = fs::create_dir_all(&dir_path).await { error!("Failed to create directory {:?}: {}", dir_path, e); return Err(anyhow::anyhow!("Failed to create directory structure: {}", e)); } info!("Ensured directory exists: {:?}", dir_path); } Ok(()) } /// Get the path for a specific subdirectory pub fn get_subdirectory_path(&self, subdir: &str) -> PathBuf { Path::new(&self.upload_path).join(subdir) } /// Get the documents directory path pub fn get_documents_path(&self) -> PathBuf { self.get_subdirectory_path("documents") } /// Get the thumbnails directory path pub fn get_thumbnails_path(&self) -> PathBuf { self.get_subdirectory_path("thumbnails") } /// Get the processed images directory path pub fn get_processed_images_path(&self) -> PathBuf { self.get_subdirectory_path("processed_images") } /// Get the temp directory path pub fn get_temp_path(&self) -> PathBuf { self.get_subdirectory_path("temp") } /// Migrate existing files from the root upload directory to the structured format pub async fn migrate_existing_files(&self) -> Result<()> { let base_path = Path::new(&self.upload_path); let documents_dir = self.get_documents_path(); let thumbnails_dir = self.get_thumbnails_path(); info!("Starting migration of existing files to structured directories..."); let mut migrated_count = 0; let mut thumbnail_count = 0; // Read all files in the base upload directory let mut entries = fs::read_dir(base_path).await?; while let Some(entry) = entries.next_entry().await? { let file_path = entry.path(); // Skip directories and already structured subdirectories if file_path.is_dir() { continue; } if let Some(filename) = file_path.file_name().and_then(|n| n.to_str()) { // Handle thumbnail files if filename.ends_with("_thumb.jpg") { let new_path = thumbnails_dir.join(filename); if let Err(e) = fs::rename(&file_path, &new_path).await { warn!("Failed to migrate thumbnail {}: {}", filename, e); } else { thumbnail_count += 1; info!("Migrated thumbnail: {} -> {:?}", filename, new_path); } } // Handle regular document files else { let new_path = documents_dir.join(filename); if let Err(e) = fs::rename(&file_path, &new_path).await { warn!("Failed to migrate document {}: {}", filename, e); } else { migrated_count += 1; info!("Migrated document: {} -> {:?}", filename, new_path); } } } } info!("Migration completed: {} documents, {} thumbnails moved to structured directories", migrated_count, thumbnail_count); Ok(()) } pub async fn save_file(&self, filename: &str, data: &[u8]) -> Result { let file_id = Uuid::new_v4(); let extension = Path::new(filename) .extension() .and_then(|ext| ext.to_str()) .unwrap_or(""); let saved_filename = if extension.is_empty() { file_id.to_string() } else { format!("{}.{}", file_id, extension) }; // Save to documents subdirectory let documents_dir = self.get_documents_path(); let file_path = documents_dir.join(&saved_filename); // Ensure the documents directory exists if let Err(e) = fs::create_dir_all(&documents_dir).await { error!("Failed to create documents directory: {}", e); return Err(anyhow::anyhow!("Failed to create documents directory: {}", e)); } fs::write(&file_path, data).await?; Ok(file_path.to_string_lossy().to_string()) } pub fn create_document( &self, filename: &str, original_filename: &str, file_path: &str, file_size: i64, mime_type: &str, user_id: Uuid, file_hash: Option, ) -> Document { Document { id: Uuid::new_v4(), filename: filename.to_string(), original_filename: original_filename.to_string(), file_path: file_path.to_string(), file_size, mime_type: mime_type.to_string(), content: None, ocr_text: None, ocr_confidence: None, ocr_word_count: None, ocr_processing_time_ms: None, ocr_status: Some("pending".to_string()), ocr_error: None, ocr_completed_at: None, tags: Vec::new(), created_at: Utc::now(), updated_at: Utc::now(), user_id, file_hash, } } pub fn is_allowed_file_type(&self, filename: &str, allowed_types: &[String]) -> bool { if let Some(extension) = Path::new(filename) .extension() .and_then(|ext| ext.to_str()) { let ext_lower = extension.to_lowercase(); allowed_types.contains(&ext_lower) } else { false } } /// Resolve file path to actual location, handling both old and new directory structures pub async fn resolve_file_path(&self, file_path: &str) -> Result { // If the file exists at the given path, use it if Path::new(file_path).exists() { return Ok(file_path.to_string()); } // Try to find the file in the new structured directory if file_path.starts_with("./uploads/") && !file_path.contains("/documents/") { let new_path = file_path.replace("./uploads/", "./uploads/documents/"); if Path::new(&new_path).exists() { info!("Found file in new structured directory: {} -> {}", file_path, new_path); return Ok(new_path); } } // Try without the ./ prefix if file_path.starts_with("uploads/") && !file_path.contains("/documents/") { let new_path = file_path.replace("uploads/", "uploads/documents/"); if Path::new(&new_path).exists() { info!("Found file in new structured directory: {} -> {}", file_path, new_path); return Ok(new_path); } } // File not found in any expected location Err(anyhow::anyhow!("File not found: {} (checked original path and structured directory)", file_path)) } pub async fn read_file(&self, file_path: &str) -> Result> { let resolved_path = self.resolve_file_path(file_path).await?; let data = fs::read(&resolved_path).await?; Ok(data) } #[cfg(feature = "ocr")] pub async fn get_or_generate_thumbnail(&self, file_path: &str, filename: &str) -> Result> { // Use the structured thumbnails directory let thumbnails_dir = self.get_thumbnails_path(); if !thumbnails_dir.exists() { if let Err(e) = fs::create_dir_all(&thumbnails_dir).await { error!("Failed to create thumbnails directory: {}", e); return Err(anyhow::anyhow!("Failed to create thumbnails directory: {}", e)); } } // Generate thumbnail filename based on original file path let file_stem = Path::new(file_path) .file_stem() .and_then(|s| s.to_str()) .unwrap_or("unknown"); let thumbnail_path = thumbnails_dir.join(format!("{}_thumb.jpg", file_stem)); // Check if thumbnail already exists if thumbnail_path.exists() { return self.read_file(&thumbnail_path.to_string_lossy()).await; } // Resolve file path and generate thumbnail let resolved_path = self.resolve_file_path(file_path).await?; let thumbnail_data = self.generate_thumbnail(&resolved_path, filename).await?; // Save thumbnail to cache fs::write(&thumbnail_path, &thumbnail_data).await?; Ok(thumbnail_data) } #[cfg(feature = "ocr")] async fn generate_thumbnail(&self, file_path: &str, filename: &str) -> Result> { let file_data = self.read_file(file_path).await?; // Determine file type from extension let extension = Path::new(filename) .extension() .and_then(|ext| ext.to_str()) .unwrap_or("") .to_lowercase(); match extension.as_str() { "jpg" | "jpeg" | "png" | "bmp" | "tiff" | "gif" => { self.generate_image_thumbnail(&file_data).await } "pdf" => { self.generate_pdf_thumbnail(&file_data).await } "txt" => { self.generate_text_thumbnail(&file_data).await } "doc" | "docx" => { self.generate_placeholder_thumbnail("DOC").await } _ => { // For other file types, generate a placeholder self.generate_placeholder_thumbnail(&extension.to_uppercase()).await } } } #[cfg(feature = "ocr")] async fn generate_image_thumbnail(&self, file_data: &[u8]) -> Result> { let img = image::load_from_memory(file_data)?; let thumbnail = img.resize(200, 200, FilterType::Lanczos3); // Convert to RGB if the image has an alpha channel (RGBA) // JPEG doesn't support transparency, so we need to remove the alpha channel let rgb_thumbnail = match thumbnail { image::DynamicImage::ImageRgba8(_) => { // Convert RGBA to RGB by compositing against a white background let rgb_img = image::DynamicImage::ImageRgb8( thumbnail.to_rgb8() ); rgb_img }, _ => thumbnail, // Already RGB or other compatible format }; let mut buffer = Vec::new(); let mut cursor = std::io::Cursor::new(&mut buffer); rgb_thumbnail.write_to(&mut cursor, ImageFormat::Jpeg)?; Ok(buffer) } #[cfg(feature = "ocr")] async fn generate_pdf_thumbnail(&self, file_data: &[u8]) -> Result> { use image::Rgb; // Try to extract first page as image using pdf-extract match pdf_extract::extract_text_from_mem(file_data) { Ok(text) => { // If we can extract text, create a text-based thumbnail self.generate_text_based_thumbnail(&text, "PDF", Rgb([220, 38, 27])).await } Err(_) => { // Fall back to placeholder if PDF extraction fails self.generate_placeholder_thumbnail("PDF").await } } } #[cfg(feature = "ocr")] async fn generate_text_thumbnail(&self, file_data: &[u8]) -> Result> { use image::Rgb; // Convert bytes to text let text = String::from_utf8_lossy(file_data); self.generate_text_based_thumbnail(&text, "TXT", Rgb([34, 139, 34])).await } #[cfg(feature = "ocr")] async fn generate_text_based_thumbnail(&self, text: &str, file_type: &str, bg_color: image::Rgb) -> Result> { use image::{RgbImage, Rgb, DynamicImage, ImageFormat}; let width = 200; let height = 200; let mut img = RgbImage::new(width, height); // Fill background for pixel in img.pixels_mut() { *pixel = bg_color; } // Add file type indicator at the top let text_color = Rgb([255, 255, 255]); // White text let preview_text = if text.len() > 300 { format!("{}\n{}", file_type, &text[..300].trim()) } else { format!("{}\n{}", file_type, text.trim()) }; // Simple text rendering - just place some characters as visual indicators // For a more sophisticated approach, you'd use a text rendering library let lines: Vec<&str> = preview_text.lines().take(15).collect(); for (line_idx, line) in lines.iter().enumerate() { let y_offset = 20 + (line_idx * 12); if y_offset >= height as usize - 10 { break; } // Simple character placement (very basic text rendering) for (char_idx, _) in line.chars().take(25).enumerate() { let x_offset = 10 + (char_idx * 7); if x_offset >= width as usize - 10 { break; } // Draw a simple "character" representation as white pixels if x_offset < width as usize && y_offset < height as usize { if let Some(pixel) = img.get_pixel_mut_checked(x_offset as u32, y_offset as u32) { *pixel = text_color; } // Add some thickness if let Some(pixel) = img.get_pixel_mut_checked(x_offset as u32 + 1, y_offset as u32) { *pixel = text_color; } } } } let dynamic_img = DynamicImage::ImageRgb8(img); let mut buffer = Vec::new(); let mut cursor = std::io::Cursor::new(&mut buffer); dynamic_img.write_to(&mut cursor, ImageFormat::Jpeg)?; Ok(buffer) } #[cfg(feature = "ocr")] async fn generate_placeholder_thumbnail(&self, file_type: &str) -> Result> { // Create a simple colored rectangle as placeholder use image::{RgbImage, Rgb}; let mut img = RgbImage::new(200, 200); // Different colors for different file types let color = match file_type { "PDF" => Rgb([220, 38, 27]), // Red for PDF "TXT" => Rgb([34, 139, 34]), // Green for text "DOC" | "DOCX" => Rgb([41, 128, 185]), // Blue for Word docs _ => Rgb([108, 117, 125]), // Gray for unknown }; // Fill with solid color for pixel in img.pixels_mut() { *pixel = color; } let dynamic_img = DynamicImage::ImageRgb8(img); let mut buffer = Vec::new(); let mut cursor = std::io::Cursor::new(&mut buffer); dynamic_img.write_to(&mut cursor, ImageFormat::Jpeg)?; Ok(buffer) } #[cfg(not(feature = "ocr"))] pub async fn get_or_generate_thumbnail(&self, _file_path: &str, _filename: &str) -> Result> { anyhow::bail!("Thumbnail generation requires OCR feature") } pub async fn delete_document_files(&self, document: &Document) -> Result<()> { let mut deleted_files = Vec::new(); let mut serious_errors = Vec::new(); // Helper function to safely delete a file, handling concurrent deletion scenarios async fn safe_delete(path: &Path, serious_errors: &mut Vec) -> Option { match fs::remove_file(path).await { Ok(_) => { info!("Deleted file: {}", path.display()); Some(path.to_string_lossy().to_string()) } Err(e) => { match e.kind() { std::io::ErrorKind::NotFound => { // File already deleted (possibly by concurrent request) - this is fine info!("File already deleted: {}", path.display()); None } _ => { // Other errors (permissions, I/O errors, etc.) are serious warn!("Failed to delete file {}: {}", path.display(), e); serious_errors.push(format!("Failed to delete file {}: {}", path.display(), e)); None } } } } } // Delete main document file let main_file = Path::new(&document.file_path); if let Some(deleted_path) = safe_delete(&main_file, &mut serious_errors).await { deleted_files.push(deleted_path); } // Delete thumbnail if it exists let thumbnail_filename = format!("{}_thumb.jpg", document.id); let thumbnail_path = self.get_thumbnails_path().join(&thumbnail_filename); if let Some(deleted_path) = safe_delete(&thumbnail_path, &mut serious_errors).await { deleted_files.push(deleted_path); } // Delete processed image if it exists let processed_image_filename = format!("{}_processed.png", document.id); let processed_image_path = self.get_processed_images_path().join(&processed_image_filename); if let Some(deleted_path) = safe_delete(&processed_image_path, &mut serious_errors).await { deleted_files.push(deleted_path); } // Only fail if there were serious errors (not "file not found") if !serious_errors.is_empty() { error!("Serious errors occurred while deleting files for document {}: {}", document.id, serious_errors.join("; ")); return Err(anyhow::anyhow!("File deletion errors: {}", serious_errors.join("; "))); } if deleted_files.is_empty() { info!("No files needed deletion for document {} (all files already removed)", document.id); } else { info!("Successfully deleted {} files for document {}", deleted_files.len(), document.id); } Ok(()) } }