From ea43f79a903185e9873ec62236f1371d608e67e4 Mon Sep 17 00:00:00 2001 From: perf3ct Date: Thu, 10 Jul 2025 21:40:16 +0000 Subject: [PATCH] feat(server): show source metadata better, and implement tests --- .../src/components/FileIntegrityDisplay.tsx | 157 ++++++++++++++- frontend/src/pages/DocumentDetailsPage.tsx | 125 +----------- src/ingestion/batch_ingest.rs | 8 +- src/ingestion/document_ingestion.rs | 10 +- src/lib.rs | 1 + src/metadata_extraction.rs | 179 ++++++++++++++++++ src/models/document.rs | 2 +- src/routes/documents/crud.rs | 46 +++-- src/routes/webdav/webdav_sync.rs | 4 +- src/scheduling/source_sync.rs | 10 +- src/scheduling/watcher.rs | 8 +- src/services/local_folder_service.rs | 12 +- src/services/s3_service.rs | 6 +- src/services/s3_service_stub.rs | 4 +- src/services/webdav/discovery.rs | 18 +- src/services/webdav/service.rs | 16 +- .../webdav/subdirectory_edge_cases_tests.rs | 30 +-- src/webdav_xml_parser.rs | 10 +- ...ration_source_sync_hash_duplicate_tests.rs | 6 +- ...ntegration_webdav_first_time_scan_tests.rs | 26 +-- ...integration_webdav_hash_duplicate_tests.rs | 8 +- tests/unit_webdav_directory_tracking_tests.rs | 26 +-- tests/unit_webdav_edge_cases_tests.rs | 38 ++-- tests/unit_webdav_enhanced_unit_tests.rs | 4 +- tests/unit_webdav_smart_scan_logic_tests.rs | 12 +- tests/unit_webdav_targeted_rescan_tests.rs | 10 +- tests/unit_webdav_unit_tests.rs | 2 +- 27 files changed, 506 insertions(+), 272 deletions(-) create mode 100644 src/metadata_extraction.rs diff --git a/frontend/src/components/FileIntegrityDisplay.tsx b/frontend/src/components/FileIntegrityDisplay.tsx index 23315e0..79bbb59 100644 --- a/frontend/src/components/FileIntegrityDisplay.tsx +++ b/frontend/src/components/FileIntegrityDisplay.tsx @@ -31,6 +31,15 @@ interface FileIntegrityDisplayProps { updatedAt: string; userId?: string; username?: string; + // Additional metadata fields + sourceType?: string; + sourcePath?: string; + filePermissions?: number; + fileOwner?: string; + fileGroup?: string; + originalCreatedAt?: string; + originalModifiedAt?: string; + sourceMetadata?: any; compact?: boolean; } @@ -43,6 +52,14 @@ const FileIntegrityDisplay: React.FC = ({ updatedAt, userId, username, + sourceType, + sourcePath, + filePermissions, + fileOwner, + fileGroup, + originalCreatedAt, + originalModifiedAt, + sourceMetadata, compact = false, }) => { const [copied, setCopied] = useState(false); @@ -203,7 +220,7 @@ const FileIntegrityDisplay: React.FC = ({ }} /> - File Integrity & Verification + Document Details @@ -340,8 +357,146 @@ const FileIntegrityDisplay: React.FC = ({ }} /> + + {fileOwner && ( + + + Owner + + + {fileOwner} + + + )} + + {sourcePath && ( + + + Source Path + + + {sourcePath} + + + )} + + {/* Additional Source Information */} + {(sourceType || fileGroup || filePermissions) && ( + + + + Additional Source Details + + + + {sourceType && ( + + Source Type: + + + )} + + {fileGroup && ( + + File Group: + + {fileGroup} + + + )} + + {filePermissions && ( + + Permissions: + + {filePermissions.toString(8)} ({filePermissions}) + + + )} + + + )} + + {/* Timestamps */} + {(originalCreatedAt || originalModifiedAt) && ( + + + + Original Timestamps + + + + {originalCreatedAt && ( + + Original Created: + + {new Date(originalCreatedAt).toLocaleString()} + + + )} + + {originalModifiedAt && ( + + Original Modified: + + {new Date(originalModifiedAt).toLocaleString()} + + + )} + + + )} + + {/* Source Metadata - displayed as simple key-value pairs */} + {sourceMetadata && Object.keys(sourceMetadata).length > 0 && ( + + + + Source Metadata + + + + {Object.entries(sourceMetadata).map(([key, value]) => { + // Skip null/undefined values and complex objects + if (value === null || value === undefined || typeof value === 'object') return null; + + // Format the key to be more readable + const formattedKey = key + .replace(/_/g, ' ') + .replace(/([A-Z])/g, ' $1') + .replace(/^./, str => str.toUpperCase()) + .trim(); + + // Format the value + const formattedValue = typeof value === 'boolean' + ? (value ? 'Yes' : 'No') + : String(value); + + return ( + + + {formattedKey}: + + + {formattedValue} + + + ); + }).filter(Boolean)} + + + )} + ); }; diff --git a/frontend/src/pages/DocumentDetailsPage.tsx b/frontend/src/pages/DocumentDetailsPage.tsx index 9805315..611fea5 100644 --- a/frontend/src/pages/DocumentDetailsPage.tsx +++ b/frontend/src/pages/DocumentDetailsPage.tsx @@ -52,7 +52,6 @@ import DocumentViewer from '../components/DocumentViewer'; import LabelSelector from '../components/Labels/LabelSelector'; import { type LabelData } from '../components/Labels/Label'; import MetadataDisplay from '../components/MetadataDisplay'; -import MetadataParser from '../components/MetadataParser'; import FileIntegrityDisplay from '../components/FileIntegrityDisplay'; import ProcessingTimeline from '../components/ProcessingTimeline'; import { RetryHistoryModal } from '../components/RetryHistoryModal'; @@ -700,6 +699,14 @@ const DocumentDetailsPage: React.FC = () => { updatedAt={document.updated_at} userId={document.user_id} username={document.username} + sourceType={document.source_type} + sourcePath={document.source_path} + filePermissions={document.file_permissions} + fileOwner={document.file_owner} + fileGroup={document.file_group} + originalCreatedAt={document.original_created_at} + originalModifiedAt={document.original_modified_at} + sourceMetadata={document.source_metadata} /> @@ -891,122 +898,6 @@ const DocumentDetailsPage: React.FC = () => { ocrError={ocrData?.ocr_error} /> - {/* Source Information */} - {(document.source_type || document.file_permissions || document.file_owner || document.file_group) && ( - - - - - Source Information - - - - {document.source_type && ( - - - - Source Type - - - - - )} - - {document.file_permissions && ( - - - - File Permissions - - - {document.file_permissions.toString(8)} ({document.file_permissions}) - - - - )} - - {document.file_owner && ( - - - - File Owner - - - {document.file_owner} - - - - )} - - {document.file_group && ( - - - - File Group - - - {document.file_group} - - - - )} - - {document.source_path && ( - - - - Original Source Path - - - {document.source_path} - - - - )} - - - - )} - - {/* Enhanced Metadata Display */} - {document.source_metadata && Object.keys(document.source_metadata).length > 0 && ( - - - - 📊 Rich Metadata Analysis - - - - - )} - {/* Tags and Labels */} Result { +/// Extract FileIngestionInfo from filesystem path and metadata +async fn extract_file_info_from_path(path: &Path) -> Result { let metadata = fs::metadata(path).await?; let filename = path .file_name() @@ -208,7 +208,7 @@ async fn extract_file_info_from_path(path: &Path) -> Result { #[cfg(not(unix))] let (permissions, owner, group) = (None, None, None); - Ok(FileInfo { + Ok(FileIngestionInfo { path: path.to_string_lossy().to_string(), name: filename, size: file_size, diff --git a/src/ingestion/document_ingestion.rs b/src/ingestion/document_ingestion.rs index c0b4b1d..ee87d52 100644 --- a/src/ingestion/document_ingestion.rs +++ b/src/ingestion/document_ingestion.rs @@ -12,7 +12,7 @@ use tracing::{debug, info, warn}; use serde_json; use chrono::Utc; -use crate::models::{Document, FileInfo}; +use crate::models::{Document, FileIngestionInfo}; use crate::db::Database; use crate::services::file_service::FileService; @@ -76,8 +76,8 @@ impl DocumentIngestionService { Self { db, file_service } } - /// Extract metadata from FileInfo for storage in document - fn extract_metadata_from_file_info(file_info: &FileInfo) -> (Option>, Option>, Option) { + /// Extract metadata from FileIngestionInfo for storage in document + fn extract_metadata_from_file_info(file_info: &FileIngestionInfo) -> (Option>, Option>, Option) { let original_created_at = file_info.created_at; let original_modified_at = file_info.last_modified; @@ -315,10 +315,10 @@ impl DocumentIngestionService { format!("{:x}", result) } - /// Ingest document from source with FileInfo metadata + /// Ingest document from source with FileIngestionInfo metadata pub async fn ingest_from_file_info( &self, - file_info: &FileInfo, + file_info: &FileIngestionInfo, file_data: Vec, user_id: Uuid, deduplication_policy: DeduplicationPolicy, diff --git a/src/lib.rs b/src/lib.rs index c85f9c8..9045b60 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,6 +3,7 @@ pub mod config; pub mod db; pub mod db_guardrails_simple; pub mod ingestion; +pub mod metadata_extraction; pub mod models; pub mod monitoring; pub mod ocr; diff --git a/src/metadata_extraction.rs b/src/metadata_extraction.rs new file mode 100644 index 0000000..7008410 --- /dev/null +++ b/src/metadata_extraction.rs @@ -0,0 +1,179 @@ +use anyhow::Result; +use serde_json::{Map, Value}; +use std::collections::HashMap; + +/// Extract metadata from file content based on file type +pub async fn extract_content_metadata(file_data: &[u8], mime_type: &str, filename: &str) -> Result> { + let mut metadata = Map::new(); + + match mime_type { + // Image files - extract basic image info + mime if mime.starts_with("image/") => { + if let Ok(img_metadata) = extract_image_metadata(file_data).await { + metadata.extend(img_metadata); + } + } + + // PDF files - extract basic PDF info + "application/pdf" => { + if let Ok(pdf_metadata) = extract_pdf_metadata(file_data).await { + metadata.extend(pdf_metadata); + } + } + + // Text files - extract basic text info + "text/plain" => { + if let Ok(text_metadata) = extract_text_metadata(file_data).await { + metadata.extend(text_metadata); + } + } + + _ => { + // For other file types, add basic file information + metadata.insert("file_type".to_string(), Value::String(mime_type.to_string())); + } + } + + // Add filename-based metadata + if let Some(extension) = std::path::Path::new(filename) + .extension() + .and_then(|ext| ext.to_str()) + { + metadata.insert("file_extension".to_string(), Value::String(extension.to_lowercase())); + } + + if metadata.is_empty() { + Ok(None) + } else { + Ok(Some(Value::Object(metadata))) + } +} + +/// Extract metadata from image files +async fn extract_image_metadata(file_data: &[u8]) -> Result> { + let mut metadata = Map::new(); + + // Try to load image and get basic properties + if let Ok(img) = image::load_from_memory(file_data) { + metadata.insert("image_width".to_string(), Value::Number(img.width().into())); + metadata.insert("image_height".to_string(), Value::Number(img.height().into())); + metadata.insert("image_format".to_string(), Value::String(format!("{:?}", img.color()))); + + // Calculate aspect ratio + let aspect_ratio = img.width() as f64 / img.height() as f64; + metadata.insert("aspect_ratio".to_string(), Value::String(format!("{:.2}", aspect_ratio))); + + // Determine orientation + let orientation = if img.width() > img.height() { + "landscape" + } else if img.height() > img.width() { + "portrait" + } else { + "square" + }; + metadata.insert("orientation".to_string(), Value::String(orientation.to_string())); + + // Calculate megapixels + let megapixels = (img.width() as f64 * img.height() as f64) / 1_000_000.0; + metadata.insert("megapixels".to_string(), Value::String(format!("{:.1} MP", megapixels))); + } + + Ok(metadata) +} + +/// Extract metadata from PDF files +async fn extract_pdf_metadata(file_data: &[u8]) -> Result> { + let mut metadata = Map::new(); + + // Basic PDF detection and info + if file_data.len() >= 5 && &file_data[0..4] == b"%PDF" { + // Extract PDF version from header + if let Some(version_end) = file_data[0..20].iter().position(|&b| b == b'\n' || b == b'\r') { + if let Ok(header) = std::str::from_utf8(&file_data[0..version_end]) { + if let Some(version) = header.strip_prefix("%PDF-") { + metadata.insert("pdf_version".to_string(), Value::String(version.to_string())); + } + } + } + + // Try to count pages by counting "Type /Page" entries + let content = String::from_utf8_lossy(file_data); + let page_count = content.matches("/Type /Page").count(); + if page_count > 0 { + metadata.insert("page_count".to_string(), Value::Number(page_count.into())); + } + + // Look for basic PDF info + if content.contains("/Linearized") { + metadata.insert("linearized".to_string(), Value::Bool(true)); + } + + // Check for encryption + if content.contains("/Encrypt") { + metadata.insert("encrypted".to_string(), Value::Bool(true)); + } + + // Try to find creation/modification dates in metadata + if let Some(creation_start) = content.find("/CreationDate") { + if let Some(date_start) = content[creation_start..].find('(') { + if let Some(date_end) = content[creation_start + date_start..].find(')') { + let date_str = &content[creation_start + date_start + 1..creation_start + date_start + date_end]; + metadata.insert("pdf_creation_date".to_string(), Value::String(date_str.to_string())); + } + } + } + + // Basic content analysis + if content.contains("/Font") { + metadata.insert("contains_fonts".to_string(), Value::Bool(true)); + } + + if content.contains("/Image") || content.contains("/XObject") { + metadata.insert("contains_images".to_string(), Value::Bool(true)); + } + } + + Ok(metadata) +} + +/// Extract metadata from text files +async fn extract_text_metadata(file_data: &[u8]) -> Result> { + let mut metadata = Map::new(); + + if let Ok(text) = std::str::from_utf8(file_data) { + // Basic text statistics + let char_count = text.chars().count(); + let word_count = text.split_whitespace().count(); + let line_count = text.lines().count(); + + metadata.insert("character_count".to_string(), Value::Number(char_count.into())); + metadata.insert("word_count".to_string(), Value::Number(word_count.into())); + metadata.insert("line_count".to_string(), Value::Number(line_count.into())); + + // Detect text encoding characteristics + if text.chars().any(|c| !c.is_ascii()) { + metadata.insert("contains_unicode".to_string(), Value::Bool(true)); + } + + // Check for common file formats within text + if text.trim_start().starts_with("(); + + if english_count > word_count / 20 { // If more than 5% are common English words + metadata.insert("likely_language".to_string(), Value::String("english".to_string())); + } + } + + Ok(metadata) +} \ No newline at end of file diff --git a/src/models/document.rs b/src/models/document.rs index 5705962..897bf13 100644 --- a/src/models/document.rs +++ b/src/models/document.rs @@ -253,7 +253,7 @@ pub struct CreateIgnoredFile { } #[derive(Debug, Clone)] -pub struct FileInfo { +pub struct FileIngestionInfo { pub path: String, pub name: String, pub size: i64, diff --git a/src/routes/documents/crud.rs b/src/routes/documents/crud.rs index d4b2774..a211e2f 100644 --- a/src/routes/documents/crud.rs +++ b/src/routes/documents/crud.rs @@ -74,6 +74,25 @@ pub async fn upload_document( info!("Uploading document: {} ({} bytes)", filename, data.len()); + // Create FileIngestionInfo from uploaded data + use crate::models::FileIngestionInfo; + use chrono::Utc; + + let file_info = FileIngestionInfo { + path: format!("upload/{}", filename), // Virtual path for web uploads + name: filename.clone(), + size: data.len() as i64, + mime_type: content_type.clone(), + last_modified: Some(Utc::now()), // Upload time as last modified + etag: format!("{}-{}", data.len(), Utc::now().timestamp()), + is_directory: false, + created_at: Some(Utc::now()), // Upload time as creation time + permissions: None, // Web uploads don't have filesystem permissions + owner: Some(auth_user.user.username.clone()), // Uploader as owner + group: None, // Web uploads don't have filesystem groups + metadata: None, // Could extract EXIF/PDF metadata in the future + }; + // Create ingestion service let file_service = FileService::new(state.config.upload_path.clone()); let ingestion_service = DocumentIngestionService::new( @@ -81,25 +100,14 @@ pub async fn upload_document( file_service, ); - let request = crate::ingestion::document_ingestion::DocumentIngestionRequest { - file_data: data, - filename: filename.clone(), - original_filename: filename, - mime_type: content_type, - user_id: auth_user.user.id, - source_type: Some("web_upload".to_string()), - source_id: None, - deduplication_policy: crate::ingestion::document_ingestion::DeduplicationPolicy::Skip, - original_created_at: None, - original_modified_at: None, - source_path: None, // Web uploads don't have a source path - file_permissions: None, // Web uploads don't preserve permissions - file_owner: None, // Web uploads don't preserve owner - file_group: None, // Web uploads don't preserve group - source_metadata: None, - }; - - match ingestion_service.ingest_document(request).await { + match ingestion_service.ingest_from_file_info( + &file_info, + data, + auth_user.user.id, + crate::ingestion::document_ingestion::DeduplicationPolicy::Skip, + "web_upload", + None + ).await { Ok(IngestionResult::Created(document)) => { info!("Document uploaded successfully: {}", document.id); diff --git a/src/routes/webdav/webdav_sync.rs b/src/routes/webdav/webdav_sync.rs index 4c2e94c..06cb8fd 100644 --- a/src/routes/webdav/webdav_sync.rs +++ b/src/routes/webdav/webdav_sync.rs @@ -230,7 +230,7 @@ async fn process_single_file( state: Arc, user_id: uuid::Uuid, webdav_service: &WebDAVService, - file_info: &crate::models::FileInfo, + file_info: &crate::models::FileIngestionInfo, enable_background_ocr: bool, semaphore: Arc, webdav_source_id: Option, @@ -384,7 +384,7 @@ pub async fn process_files_for_deep_scan( state: Arc, user_id: uuid::Uuid, webdav_service: &WebDAVService, - files_to_process: &[crate::models::FileInfo], + files_to_process: &[crate::models::FileIngestionInfo], enable_background_ocr: bool, webdav_source_id: Option, ) -> Result { diff --git a/src/scheduling/source_sync.rs b/src/scheduling/source_sync.rs index 3336fc2..5534c4b 100644 --- a/src/scheduling/source_sync.rs +++ b/src/scheduling/source_sync.rs @@ -9,7 +9,7 @@ use uuid::Uuid; use crate::{ AppState, - models::{FileInfo, Source, SourceType, SourceStatus, LocalFolderSourceConfig, S3SourceConfig, WebDAVSourceConfig}, + models::{FileIngestionInfo, Source, SourceType, SourceStatus, LocalFolderSourceConfig, S3SourceConfig, WebDAVSourceConfig}, services::file_service::FileService, ingestion::document_ingestion::{DocumentIngestionService, IngestionResult}, services::local_folder_service::LocalFolderService, @@ -227,7 +227,7 @@ impl SourceSyncService { where F: Fn(String) -> Fut1, D: Fn(String) -> Fut2 + Clone, - Fut1: std::future::Future>>, + Fut1: std::future::Future>>, Fut2: std::future::Future>>, { let mut total_files_processed = 0; @@ -328,7 +328,7 @@ impl SourceSyncService { where F: Fn(String) -> Fut1, D: Fn(String) -> Fut2 + Clone, - Fut1: std::future::Future>>, + Fut1: std::future::Future>>, Fut2: std::future::Future>>, { let mut total_files_processed = 0; @@ -514,7 +514,7 @@ impl SourceSyncService { state: Arc, user_id: Uuid, source_id: Uuid, - file_info: &FileInfo, + file_info: &FileIngestionInfo, enable_background_ocr: bool, semaphore: Arc, download_file: D, @@ -593,7 +593,7 @@ impl SourceSyncService { state: Arc, user_id: Uuid, source_id: Uuid, - file_info: &FileInfo, + file_info: &FileIngestionInfo, enable_background_ocr: bool, semaphore: Arc, download_file: D, diff --git a/src/scheduling/watcher.rs b/src/scheduling/watcher.rs index 9cb3434..61b6e40 100644 --- a/src/scheduling/watcher.rs +++ b/src/scheduling/watcher.rs @@ -15,7 +15,7 @@ use crate::{ services::file_service::FileService, ingestion::document_ingestion::{DocumentIngestionService, IngestionResult, DeduplicationPolicy}, ocr::queue::OcrQueueService, - models::FileInfo, + models::FileIngestionInfo, }; pub async fn start_folder_watcher(config: Config, db: Database) -> Result<()> { @@ -372,8 +372,8 @@ async fn process_file( Ok(()) } -/// Extract FileInfo from filesystem path and metadata (for watcher) -async fn extract_file_info_from_path(path: &Path) -> Result { +/// Extract FileIngestionInfo from filesystem path and metadata (for watcher) +async fn extract_file_info_from_path(path: &Path) -> Result { let metadata = tokio::fs::metadata(path).await?; let filename = path .file_name() @@ -411,7 +411,7 @@ async fn extract_file_info_from_path(path: &Path) -> Result { #[cfg(not(unix))] let (permissions, owner, group) = (None, None, None); - Ok(FileInfo { + Ok(FileIngestionInfo { path: path.to_string_lossy().to_string(), name: filename, size: file_size, diff --git a/src/services/local_folder_service.rs b/src/services/local_folder_service.rs index 4ad2b23..e5ba226 100644 --- a/src/services/local_folder_service.rs +++ b/src/services/local_folder_service.rs @@ -7,7 +7,7 @@ use walkdir::WalkDir; use sha2::{Sha256, Digest}; use serde_json; -use crate::models::{FileInfo, LocalFolderSourceConfig}; +use crate::models::{FileIngestionInfo, LocalFolderSourceConfig}; #[derive(Debug, Clone)] pub struct LocalFolderService { @@ -31,13 +31,13 @@ impl LocalFolderService { } /// Discover files in a specific folder - pub async fn discover_files_in_folder(&self, folder_path: &str) -> Result> { + pub async fn discover_files_in_folder(&self, folder_path: &str) -> Result> { let path = Path::new(folder_path); if !path.exists() { return Err(anyhow!("Folder does not exist: {}", folder_path)); } - let mut files: Vec = Vec::new(); + let mut files: Vec = Vec::new(); info!("Scanning local folder: {} (recursive: {})", folder_path, self.config.recursive); @@ -45,8 +45,8 @@ impl LocalFolderService { let folder_path_clone = folder_path.to_string(); let config = self.config.clone(); - let discovered_files = tokio::task::spawn_blocking(move || -> Result> { - let mut files: Vec = Vec::new(); + let discovered_files = tokio::task::spawn_blocking(move || -> Result> { + let mut files: Vec = Vec::new(); let walker = if config.recursive { WalkDir::new(&folder_path_clone) @@ -137,7 +137,7 @@ impl LocalFolderService { // Add file attributes additional_metadata.insert("readonly".to_string(), serde_json::Value::Bool(metadata.permissions().readonly())); - let file_info = FileInfo { + let file_info = FileIngestionInfo { path: path.to_string_lossy().to_string(), name: file_name, size: metadata.len() as i64, diff --git a/src/services/s3_service.rs b/src/services/s3_service.rs index eec86a6..2b65511 100644 --- a/src/services/s3_service.rs +++ b/src/services/s3_service.rs @@ -12,7 +12,7 @@ use aws_credential_types::Credentials; #[cfg(feature = "s3")] use aws_types::region::Region as AwsRegion; -use crate::models::{FileInfo, S3SourceConfig}; +use crate::models::{FileIngestionInfo, S3SourceConfig}; #[derive(Debug, Clone)] pub struct S3Service { @@ -81,7 +81,7 @@ impl S3Service { } /// Discover files in a specific S3 prefix (folder) - pub async fn discover_files_in_folder(&self, folder_path: &str) -> Result> { + pub async fn discover_files_in_folder(&self, folder_path: &str) -> Result> { #[cfg(not(feature = "s3"))] { return Err(anyhow!("S3 support not compiled in")); @@ -176,7 +176,7 @@ impl S3Service { // If we have region info, add it metadata_map.insert("s3_region".to_string(), serde_json::Value::String(self.config.region.clone())); - let file_info = FileInfo { + let file_info = FileIngestionInfo { path: key.clone(), name: file_name, size, diff --git a/src/services/s3_service_stub.rs b/src/services/s3_service_stub.rs index e9a682e..21289d8 100644 --- a/src/services/s3_service_stub.rs +++ b/src/services/s3_service_stub.rs @@ -2,7 +2,7 @@ use anyhow::{anyhow, Result}; use tracing::warn; -use crate::models::{FileInfo, S3SourceConfig}; +use crate::models::{FileIngestionInfo, S3SourceConfig}; #[derive(Debug, Clone)] pub struct S3Service { @@ -14,7 +14,7 @@ impl S3Service { Err(anyhow!("S3 support not compiled in. Enable the 's3' feature to use S3 sources.")) } - pub async fn discover_files_in_folder(&self, _folder_path: &str) -> Result> { + pub async fn discover_files_in_folder(&self, _folder_path: &str) -> Result> { warn!("S3 support not compiled in"); Ok(Vec::new()) } diff --git a/src/services/webdav/discovery.rs b/src/services/webdav/discovery.rs index b3558a0..7730e06 100644 --- a/src/services/webdav/discovery.rs +++ b/src/services/webdav/discovery.rs @@ -5,7 +5,7 @@ use tokio::sync::Semaphore; use futures_util::stream::{self, StreamExt}; use tracing::{debug, info, warn}; -use crate::models::{FileInfo, WebDAVCrawlEstimate, WebDAVFolderInfo}; +use crate::models::{FileIngestionInfo, WebDAVCrawlEstimate, WebDAVFolderInfo}; use crate::webdav_xml_parser::{parse_propfind_response, parse_propfind_response_with_directories}; use super::config::{WebDAVConfig, ConcurrencyConfig}; use super::connection::WebDAVConnection; @@ -30,7 +30,7 @@ impl WebDAVDiscovery { } /// Discovers files in a directory with support for pagination and filtering - pub async fn discover_files(&self, directory_path: &str, recursive: bool) -> Result> { + pub async fn discover_files(&self, directory_path: &str, recursive: bool) -> Result> { info!("🔍 Discovering files in directory: {}", directory_path); if recursive { @@ -41,7 +41,7 @@ impl WebDAVDiscovery { } /// Discovers files in a single directory (non-recursive) - async fn discover_files_single_directory(&self, directory_path: &str) -> Result> { + async fn discover_files_single_directory(&self, directory_path: &str) -> Result> { let url = self.connection.get_url_for_path(directory_path); let propfind_body = r#" @@ -72,7 +72,7 @@ impl WebDAVDiscovery { let files = parse_propfind_response(&body)?; // Filter files based on supported extensions - let filtered_files: Vec = files + let filtered_files: Vec = files .into_iter() .filter(|file| { !file.is_directory && self.config.is_supported_extension(&file.name) @@ -84,7 +84,7 @@ impl WebDAVDiscovery { } /// Discovers files recursively in directory tree - async fn discover_files_recursive(&self, root_directory: &str) -> Result> { + async fn discover_files_recursive(&self, root_directory: &str) -> Result> { let mut all_files = Vec::new(); let mut directories_to_scan = vec![root_directory.to_string()]; let semaphore = Semaphore::new(self.concurrency_config.max_concurrent_scans); @@ -126,7 +126,7 @@ impl WebDAVDiscovery { } /// Scans a directory and returns both files and subdirectories - async fn scan_directory_with_subdirs(&self, directory_path: &str) -> Result<(Vec, Vec)> { + async fn scan_directory_with_subdirs(&self, directory_path: &str) -> Result<(Vec, Vec)> { let url = self.connection.get_url_for_path(directory_path); let propfind_body = r#" @@ -309,7 +309,7 @@ impl WebDAVDiscovery { } /// Calculates the ratio of supported files in a sample - fn calculate_support_ratio(&self, sample_files: &[FileInfo]) -> f64 { + fn calculate_support_ratio(&self, sample_files: &[FileIngestionInfo]) -> f64 { if sample_files.is_empty() { return 1.0; // Assume all files are supported if no sample } @@ -323,7 +323,7 @@ impl WebDAVDiscovery { } /// Filters files by last modified date (for incremental syncs) - pub fn filter_files_by_date(&self, files: Vec, since: chrono::DateTime) -> Vec { + pub fn filter_files_by_date(&self, files: Vec, since: chrono::DateTime) -> Vec { files .into_iter() .filter(|file| { @@ -335,7 +335,7 @@ impl WebDAVDiscovery { } /// Deduplicates files by ETag or path - pub fn deduplicate_files(&self, files: Vec) -> Vec { + pub fn deduplicate_files(&self, files: Vec) -> Vec { let mut seen_etags = HashSet::new(); let mut seen_paths = HashSet::new(); let mut deduplicated = Vec::new(); diff --git a/src/services/webdav/service.rs b/src/services/webdav/service.rs index c7f501b..24a3645 100644 --- a/src/services/webdav/service.rs +++ b/src/services/webdav/service.rs @@ -4,7 +4,7 @@ use tokio::sync::Semaphore; use tracing::{debug, error, info}; use crate::models::{ - FileInfo, WebDAVConnectionResult, WebDAVCrawlEstimate, WebDAVTestConnection, + FileIngestionInfo, WebDAVConnectionResult, WebDAVCrawlEstimate, WebDAVTestConnection, }; use super::config::{WebDAVConfig, RetryConfig, ConcurrencyConfig}; @@ -107,7 +107,7 @@ impl WebDAVService { } /// Discovers all files in watch folders - pub async fn discover_all_files(&self) -> Result> { + pub async fn discover_all_files(&self) -> Result> { info!("🔍 Discovering all files in watch folders"); let mut all_files = Vec::new(); @@ -134,7 +134,7 @@ impl WebDAVService { } /// Discovers files changed since a specific date (for incremental syncs) - pub async fn discover_changed_files(&self, since: chrono::DateTime) -> Result> { + pub async fn discover_changed_files(&self, since: chrono::DateTime) -> Result> { info!("🔍 Discovering files changed since: {}", since); let all_files = self.discover_all_files().await?; @@ -145,7 +145,7 @@ impl WebDAVService { } /// Discovers files in a specific directory - pub async fn discover_files_in_directory(&self, directory_path: &str, recursive: bool) -> Result> { + pub async fn discover_files_in_directory(&self, directory_path: &str, recursive: bool) -> Result> { info!("🔍 Discovering files in directory: {} (recursive: {})", directory_path, recursive); self.discovery.discover_files(directory_path, recursive).await } @@ -181,8 +181,8 @@ impl WebDAVService { Ok(content.to_vec()) } - /// Downloads a file from WebDAV server using FileInfo - pub async fn download_file_info(&self, file_info: &FileInfo) -> Result> { + /// Downloads a file from WebDAV server using FileIngestionInfo + pub async fn download_file_info(&self, file_info: &FileIngestionInfo) -> Result> { let _permit = self.download_semaphore.acquire().await?; debug!("⬇️ Downloading file: {}", file_info.path); @@ -213,7 +213,7 @@ impl WebDAVService { } /// Downloads multiple files concurrently - pub async fn download_files(&self, files: &[FileInfo]) -> Result>)>> { + pub async fn download_files(&self, files: &[FileIngestionInfo]) -> Result>)>> { info!("⬇️ Downloading {} files concurrently", files.len()); let tasks = files.iter().map(|file| { @@ -237,7 +237,7 @@ impl WebDAVService { } /// Gets file metadata without downloading content - pub async fn get_file_metadata(&self, file_path: &str) -> Result { + pub async fn get_file_metadata(&self, file_path: &str) -> Result { debug!("📋 Getting metadata for file: {}", file_path); let url = self.connection.get_url_for_path(file_path); diff --git a/src/services/webdav/subdirectory_edge_cases_tests.rs b/src/services/webdav/subdirectory_edge_cases_tests.rs index ce34c69..6a77e62 100644 --- a/src/services/webdav/subdirectory_edge_cases_tests.rs +++ b/src/services/webdav/subdirectory_edge_cases_tests.rs @@ -1,7 +1,7 @@ #[cfg(test)] mod tests { use super::super::{WebDAVService, WebDAVConfig}; - use crate::models::FileInfo; + use crate::models::FileIngestionInfo; use tokio; use chrono::Utc; use std::collections::BTreeSet; @@ -22,10 +22,10 @@ fn create_test_webdav_service() -> WebDAVService { } // Test scenario that matches the real-world bug: deep nested structure with various file types -fn create_complex_nested_structure() -> Vec { +fn create_complex_nested_structure() -> Vec { vec![ // Root directories at different levels - FileInfo { + FileIngestionInfo { path: "/FullerDocuments".to_string(), name: "FullerDocuments".to_string(), size: 0, @@ -39,7 +39,7 @@ fn create_complex_nested_structure() -> Vec { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/FullerDocuments/JonDocuments".to_string(), name: "JonDocuments".to_string(), size: 0, @@ -54,7 +54,7 @@ fn create_complex_nested_structure() -> Vec { metadata: None, }, // Multiple levels of nesting - FileInfo { + FileIngestionInfo { path: "/FullerDocuments/JonDocuments/Work".to_string(), name: "Work".to_string(), size: 0, @@ -68,7 +68,7 @@ fn create_complex_nested_structure() -> Vec { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/FullerDocuments/JonDocuments/Personal".to_string(), name: "Personal".to_string(), size: 0, @@ -82,7 +82,7 @@ fn create_complex_nested_structure() -> Vec { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/FullerDocuments/JonDocuments/Work/Projects".to_string(), name: "Projects".to_string(), size: 0, @@ -96,7 +96,7 @@ fn create_complex_nested_structure() -> Vec { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/FullerDocuments/JonDocuments/Work/Reports".to_string(), name: "Reports".to_string(), size: 0, @@ -110,7 +110,7 @@ fn create_complex_nested_structure() -> Vec { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/FullerDocuments/JonDocuments/Work/Projects/WebApp".to_string(), name: "WebApp".to_string(), size: 0, @@ -125,7 +125,7 @@ fn create_complex_nested_structure() -> Vec { metadata: None, }, // Files at various nesting levels - this is the key part that was failing - FileInfo { + FileIngestionInfo { path: "/FullerDocuments/JonDocuments/index.txt".to_string(), name: "index.txt".to_string(), size: 1500, @@ -139,7 +139,7 @@ fn create_complex_nested_structure() -> Vec { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/FullerDocuments/JonDocuments/Work/schedule.pdf".to_string(), name: "schedule.pdf".to_string(), size: 2048000, @@ -153,7 +153,7 @@ fn create_complex_nested_structure() -> Vec { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/FullerDocuments/JonDocuments/Work/Projects/proposal.docx".to_string(), name: "proposal.docx".to_string(), size: 1024000, @@ -167,7 +167,7 @@ fn create_complex_nested_structure() -> Vec { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/FullerDocuments/JonDocuments/Work/Projects/WebApp/design.pdf".to_string(), name: "design.pdf".to_string(), size: 3072000, @@ -181,7 +181,7 @@ fn create_complex_nested_structure() -> Vec { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/FullerDocuments/JonDocuments/Work/Reports/monthly.pdf".to_string(), name: "monthly.pdf".to_string(), size: 4096000, @@ -195,7 +195,7 @@ fn create_complex_nested_structure() -> Vec { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/FullerDocuments/JonDocuments/Personal/diary.txt".to_string(), name: "diary.txt".to_string(), size: 5120, diff --git a/src/webdav_xml_parser.rs b/src/webdav_xml_parser.rs index 3d1e734..00b2bb3 100644 --- a/src/webdav_xml_parser.rs +++ b/src/webdav_xml_parser.rs @@ -5,7 +5,7 @@ use quick_xml::reader::Reader; use std::str; use serde_json; -use crate::models::FileInfo; +use crate::models::FileIngestionInfo; #[derive(Debug, Default)] struct PropFindResponse { @@ -24,7 +24,7 @@ struct PropFindResponse { metadata: Option, } -pub fn parse_propfind_response(xml_text: &str) -> Result> { +pub fn parse_propfind_response(xml_text: &str) -> Result> { let mut reader = Reader::from_str(xml_text); reader.config_mut().trim_text(true); @@ -200,7 +200,7 @@ pub fn parse_propfind_response(xml_text: &str) -> Result> { // Use the metadata collected during parsing let metadata = resp.metadata; - let file_info = FileInfo { + let file_info = FileIngestionInfo { path: resp.href.clone(), name, size: resp.content_length.unwrap_or(0), @@ -248,7 +248,7 @@ pub fn parse_propfind_response(xml_text: &str) -> Result> { /// Parse PROPFIND response including both files and directories /// This is used for shallow directory scans where we need to track directory structure -pub fn parse_propfind_response_with_directories(xml_text: &str) -> Result> { +pub fn parse_propfind_response_with_directories(xml_text: &str) -> Result> { let mut reader = Reader::from_str(xml_text); reader.config_mut().trim_text(true); @@ -415,7 +415,7 @@ pub fn parse_propfind_response_with_directories(xml_text: &str) -> Result String { } // Helper function to create test file info -fn create_test_file_info(name: &str, path: &str, content: &[u8]) -> FileInfo { - FileInfo { +fn create_test_file_info(name: &str, path: &str, content: &[u8]) -> FileIngestionInfo { + FileIngestionInfo { name: name.to_string(), path: path.to_string(), size: content.len() as i64, diff --git a/tests/integration_webdav_first_time_scan_tests.rs b/tests/integration_webdav_first_time_scan_tests.rs index f49e6c6..6f4adbc 100644 --- a/tests/integration_webdav_first_time_scan_tests.rs +++ b/tests/integration_webdav_first_time_scan_tests.rs @@ -2,7 +2,7 @@ use tokio; use uuid::Uuid; use chrono::Utc; use anyhow::Result; -use readur::models::{FileInfo, CreateWebDAVDirectory, CreateUser, UserRole}; +use readur::models::{FileIngestionInfo, CreateWebDAVDirectory, CreateUser, UserRole}; use readur::services::webdav::{WebDAVService, WebDAVConfig}; use readur::db::Database; @@ -22,10 +22,10 @@ fn create_test_webdav_service() -> WebDAVService { } // Mock files structure that represents a real directory with subdirectories -fn mock_realistic_directory_structure() -> Vec { +fn mock_realistic_directory_structure() -> Vec { vec![ // Parent root directory - FileInfo { + FileIngestionInfo { path: "/FullerDocuments".to_string(), name: "FullerDocuments".to_string(), size: 0, @@ -40,7 +40,7 @@ fn mock_realistic_directory_structure() -> Vec { metadata: None, }, // Root directory - FileInfo { + FileIngestionInfo { path: "/FullerDocuments/JonDocuments".to_string(), name: "JonDocuments".to_string(), size: 0, @@ -55,7 +55,7 @@ fn mock_realistic_directory_structure() -> Vec { metadata: None, }, // Subdirectory level 1 - FileInfo { + FileIngestionInfo { path: "/FullerDocuments/JonDocuments/Projects".to_string(), name: "Projects".to_string(), size: 0, @@ -69,7 +69,7 @@ fn mock_realistic_directory_structure() -> Vec { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/FullerDocuments/JonDocuments/Archive".to_string(), name: "Archive".to_string(), size: 0, @@ -84,7 +84,7 @@ fn mock_realistic_directory_structure() -> Vec { metadata: None, }, // Subdirectory level 2 - FileInfo { + FileIngestionInfo { path: "/FullerDocuments/JonDocuments/Projects/WebDev".to_string(), name: "WebDev".to_string(), size: 0, @@ -98,7 +98,7 @@ fn mock_realistic_directory_structure() -> Vec { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/FullerDocuments/JonDocuments/Projects/Mobile".to_string(), name: "Mobile".to_string(), size: 0, @@ -113,7 +113,7 @@ fn mock_realistic_directory_structure() -> Vec { metadata: None, }, // Files in various directories - FileInfo { + FileIngestionInfo { path: "/FullerDocuments/JonDocuments/readme.txt".to_string(), name: "readme.txt".to_string(), size: 1024, @@ -127,7 +127,7 @@ fn mock_realistic_directory_structure() -> Vec { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/FullerDocuments/JonDocuments/Projects/project-overview.pdf".to_string(), name: "project-overview.pdf".to_string(), size: 2048000, @@ -141,7 +141,7 @@ fn mock_realistic_directory_structure() -> Vec { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/FullerDocuments/JonDocuments/Projects/WebDev/website-specs.docx".to_string(), name: "website-specs.docx".to_string(), size: 512000, @@ -155,7 +155,7 @@ fn mock_realistic_directory_structure() -> Vec { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/FullerDocuments/JonDocuments/Projects/Mobile/app-design.pdf".to_string(), name: "app-design.pdf".to_string(), size: 1536000, @@ -169,7 +169,7 @@ fn mock_realistic_directory_structure() -> Vec { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/FullerDocuments/JonDocuments/Archive/old-notes.txt".to_string(), name: "old-notes.txt".to_string(), size: 256, diff --git a/tests/integration_webdav_hash_duplicate_tests.rs b/tests/integration_webdav_hash_duplicate_tests.rs index 7b97e48..ffab86f 100644 --- a/tests/integration_webdav_hash_duplicate_tests.rs +++ b/tests/integration_webdav_hash_duplicate_tests.rs @@ -8,7 +8,7 @@ use readur::{ AppState, db::Database, config::Config, - models::{FileInfo, CreateWebDAVFile, Document}, + models::{FileIngestionInfo, CreateWebDAVFile, Document}, }; // Helper function to calculate file hash @@ -20,8 +20,8 @@ fn calculate_file_hash(data: &[u8]) -> String { } // Helper function to create test file info -fn create_test_file_info(name: &str, path: &str, size: i64) -> FileInfo { - FileInfo { +fn create_test_file_info(name: &str, path: &str, size: i64) -> FileIngestionInfo { + FileIngestionInfo { name: name.to_string(), path: path.to_string(), size, @@ -282,7 +282,7 @@ async fn test_webdav_sync_etag_change_detection() -> Result<()> { assert_eq!(existing_file.etag, old_etag); // Simulate file with new ETag (indicating change) - let file_info = FileInfo { + let file_info = FileIngestionInfo { name: "updated.pdf".to_string(), path: webdav_path.to_string(), size: 1024, diff --git a/tests/unit_webdav_directory_tracking_tests.rs b/tests/unit_webdav_directory_tracking_tests.rs index 18dc3b7..55a324f 100644 --- a/tests/unit_webdav_directory_tracking_tests.rs +++ b/tests/unit_webdav_directory_tracking_tests.rs @@ -1,5 +1,5 @@ use readur::services::webdav::{WebDAVService, WebDAVConfig}; -use readur::models::FileInfo; +use readur::models::FileIngestionInfo; use tokio; use chrono::Utc; @@ -38,10 +38,10 @@ fn mock_directory_etag_response(etag: &str) -> String { } // Mock complex nested directory structure -fn mock_nested_directory_files() -> Vec { +fn mock_nested_directory_files() -> Vec { vec![ // Root directory - FileInfo { + FileIngestionInfo { path: "/Documents".to_string(), name: "Documents".to_string(), size: 0, @@ -56,7 +56,7 @@ fn mock_nested_directory_files() -> Vec { metadata: None, }, // Level 1 directories - FileInfo { + FileIngestionInfo { path: "/Documents/2024".to_string(), name: "2024".to_string(), size: 0, @@ -70,7 +70,7 @@ fn mock_nested_directory_files() -> Vec { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/Documents/Archive".to_string(), name: "Archive".to_string(), size: 0, @@ -85,7 +85,7 @@ fn mock_nested_directory_files() -> Vec { metadata: None, }, // Level 2 directories - FileInfo { + FileIngestionInfo { path: "/Documents/2024/Q1".to_string(), name: "Q1".to_string(), size: 0, @@ -99,7 +99,7 @@ fn mock_nested_directory_files() -> Vec { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/Documents/2024/Q2".to_string(), name: "Q2".to_string(), size: 0, @@ -114,7 +114,7 @@ fn mock_nested_directory_files() -> Vec { metadata: None, }, // Level 3 directory - FileInfo { + FileIngestionInfo { path: "/Documents/2024/Q1/Reports".to_string(), name: "Reports".to_string(), size: 0, @@ -129,7 +129,7 @@ fn mock_nested_directory_files() -> Vec { metadata: None, }, // Files at various levels - FileInfo { + FileIngestionInfo { path: "/Documents/root-file.pdf".to_string(), name: "root-file.pdf".to_string(), size: 1024000, @@ -143,7 +143,7 @@ fn mock_nested_directory_files() -> Vec { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/Documents/2024/annual-report.pdf".to_string(), name: "annual-report.pdf".to_string(), size: 2048000, @@ -157,7 +157,7 @@ fn mock_nested_directory_files() -> Vec { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/Documents/2024/Q1/q1-summary.pdf".to_string(), name: "q1-summary.pdf".to_string(), size: 512000, @@ -171,7 +171,7 @@ fn mock_nested_directory_files() -> Vec { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/Documents/2024/Q1/Reports/detailed-report.pdf".to_string(), name: "detailed-report.pdf".to_string(), size: 4096000, @@ -185,7 +185,7 @@ fn mock_nested_directory_files() -> Vec { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/Documents/Archive/old-document.pdf".to_string(), name: "old-document.pdf".to_string(), size: 256000, diff --git a/tests/unit_webdav_edge_cases_tests.rs b/tests/unit_webdav_edge_cases_tests.rs index 8fd5db3..9340fd7 100644 --- a/tests/unit_webdav_edge_cases_tests.rs +++ b/tests/unit_webdav_edge_cases_tests.rs @@ -1,5 +1,5 @@ use readur::services::webdav::{WebDAVService, WebDAVConfig}; -use readur::models::FileInfo; +use readur::models::FileIngestionInfo; use tokio; use chrono::Utc; @@ -23,7 +23,7 @@ async fn test_empty_directory_tracking() { let service = create_test_webdav_service(); // Test completely empty directory - let empty_files: Vec = vec![]; + let empty_files: Vec = vec![]; // Test the directory extraction logic that happens in track_subdirectories_recursively let mut all_directories = std::collections::BTreeSet::new(); @@ -57,7 +57,7 @@ async fn test_directory_only_structure() { // Test structure with only directories, no files let directory_only_files = vec![ - FileInfo { + FileIngestionInfo { path: "/Documents".to_string(), name: "Documents".to_string(), size: 0, @@ -71,7 +71,7 @@ async fn test_directory_only_structure() { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/Documents/Empty1".to_string(), name: "Empty1".to_string(), size: 0, @@ -85,7 +85,7 @@ async fn test_directory_only_structure() { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/Documents/Empty2".to_string(), name: "Empty2".to_string(), size: 0, @@ -136,7 +136,7 @@ async fn test_very_deep_nesting() { let deep_files = vec![ // All directories in the path - FileInfo { + FileIngestionInfo { path: "/Documents".to_string(), name: "Documents".to_string(), size: 0, @@ -151,7 +151,7 @@ async fn test_very_deep_nesting() { metadata: None, }, // All intermediate directories from L1 to L10 - FileInfo { + FileIngestionInfo { path: "/Documents/L1".to_string(), name: "L1".to_string(), size: 0, @@ -165,7 +165,7 @@ async fn test_very_deep_nesting() { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/Documents/L1/L2".to_string(), name: "L2".to_string(), size: 0, @@ -179,7 +179,7 @@ async fn test_very_deep_nesting() { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/Documents/L1/L2/L3".to_string(), name: "L3".to_string(), size: 0, @@ -193,7 +193,7 @@ async fn test_very_deep_nesting() { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: deep_path.to_string(), name: "L10".to_string(), size: 0, @@ -208,7 +208,7 @@ async fn test_very_deep_nesting() { metadata: None, }, // File at the deepest level - FileInfo { + FileIngestionInfo { path: file_path.clone(), name: "deep-file.pdf".to_string(), size: 1024000, @@ -266,7 +266,7 @@ async fn test_special_characters_in_paths() { // Test paths with special characters, spaces, unicode let special_files = vec![ - FileInfo { + FileIngestionInfo { path: "/Documents/Folder with spaces".to_string(), name: "Folder with spaces".to_string(), size: 0, @@ -280,7 +280,7 @@ async fn test_special_characters_in_paths() { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/Documents/Folder-with-dashes".to_string(), name: "Folder-with-dashes".to_string(), size: 0, @@ -294,7 +294,7 @@ async fn test_special_characters_in_paths() { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/Documents/Документы".to_string(), // Cyrillic name: "Документы".to_string(), size: 0, @@ -308,7 +308,7 @@ async fn test_special_characters_in_paths() { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/Documents/Folder with spaces/file with spaces.pdf".to_string(), name: "file with spaces.pdf".to_string(), size: 1024000, @@ -686,7 +686,7 @@ async fn test_large_directory_structures() { let mut large_files = Vec::new(); // Add root directory - large_files.push(FileInfo { + large_files.push(FileIngestionInfo { path: "/Documents".to_string(), name: "Documents".to_string(), size: 0, @@ -706,7 +706,7 @@ async fn test_large_directory_structures() { let level1_path = format!("/Documents/Dir{:03}", i); // Add level-1 directory - large_files.push(FileInfo { + large_files.push(FileIngestionInfo { path: level1_path.clone(), name: format!("Dir{:03}", i), size: 0, @@ -724,7 +724,7 @@ async fn test_large_directory_structures() { // Add 10 subdirectories for j in 0..10 { let level2_path = format!("{}/SubDir{:02}", level1_path, j); - large_files.push(FileInfo { + large_files.push(FileIngestionInfo { path: level2_path.clone(), name: format!("SubDir{:02}", j), size: 0, @@ -741,7 +741,7 @@ async fn test_large_directory_structures() { // Add 5 files in each subdirectory for k in 0..5 { - large_files.push(FileInfo { + large_files.push(FileIngestionInfo { path: format!("{}/file{:02}.pdf", level2_path, k), name: format!("file{:02}.pdf", k), size: 1024 * (k + 1) as i64, diff --git a/tests/unit_webdav_enhanced_unit_tests.rs b/tests/unit_webdav_enhanced_unit_tests.rs index d179f41..91f445d 100644 --- a/tests/unit_webdav_enhanced_unit_tests.rs +++ b/tests/unit_webdav_enhanced_unit_tests.rs @@ -1,6 +1,6 @@ use readur::services::webdav::{WebDAVService, WebDAVConfig, RetryConfig}; use readur::webdav_xml_parser::parse_propfind_response; -use readur::models::FileInfo; +use readur::models::FileIngestionInfo; use readur::models::*; use chrono::Utc; use uuid::Uuid; @@ -607,7 +607,7 @@ fn test_special_characters_in_paths() { ]; for path in test_paths { - let file_info = FileInfo { + let file_info = FileIngestionInfo { path: path.to_string(), name: std::path::Path::new(path) .file_name() diff --git a/tests/unit_webdav_smart_scan_logic_tests.rs b/tests/unit_webdav_smart_scan_logic_tests.rs index ea8607d..cf8f9c6 100644 --- a/tests/unit_webdav_smart_scan_logic_tests.rs +++ b/tests/unit_webdav_smart_scan_logic_tests.rs @@ -2,7 +2,7 @@ use tokio; use uuid::Uuid; use chrono::Utc; use std::collections::HashMap; -use readur::models::FileInfo; +use readur::models::FileIngestionInfo; use readur::services::webdav::{WebDAVService, WebDAVConfig}; // Helper function to create test WebDAV service for smart scanning @@ -35,10 +35,10 @@ fn create_generic_webdav_service() -> WebDAVService { } // Mock directory structure with subdirectories for testing -fn create_mock_directory_structure() -> Vec { +fn create_mock_directory_structure() -> Vec { vec![ // Root directory - FileInfo { + FileIngestionInfo { path: "/Documents".to_string(), name: "Documents".to_string(), size: 0, @@ -53,7 +53,7 @@ fn create_mock_directory_structure() -> Vec { metadata: None, }, // Subdirectory 1 - Changed - FileInfo { + FileIngestionInfo { path: "/Documents/Projects".to_string(), name: "Projects".to_string(), size: 0, @@ -68,7 +68,7 @@ fn create_mock_directory_structure() -> Vec { metadata: None, }, // File in changed subdirectory - FileInfo { + FileIngestionInfo { path: "/Documents/Projects/report.pdf".to_string(), name: "report.pdf".to_string(), size: 1024000, @@ -83,7 +83,7 @@ fn create_mock_directory_structure() -> Vec { metadata: None, }, // Subdirectory 2 - Unchanged - FileInfo { + FileIngestionInfo { path: "/Documents/Archive".to_string(), name: "Archive".to_string(), size: 0, diff --git a/tests/unit_webdav_targeted_rescan_tests.rs b/tests/unit_webdav_targeted_rescan_tests.rs index 20d648e..330c07b 100644 --- a/tests/unit_webdav_targeted_rescan_tests.rs +++ b/tests/unit_webdav_targeted_rescan_tests.rs @@ -1,5 +1,5 @@ use readur::services::webdav::{WebDAVService, WebDAVConfig}; -use readur::models::FileInfo; +use readur::models::FileIngestionInfo; use tokio; use chrono::Utc; @@ -98,7 +98,7 @@ async fn test_update_single_directory_tracking() { // Create mock files representing a shallow directory scan let files = vec![ - FileInfo { + FileIngestionInfo { path: "/Documents".to_string(), name: "Documents".to_string(), size: 0, @@ -112,7 +112,7 @@ async fn test_update_single_directory_tracking() { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/Documents/file1.pdf".to_string(), name: "file1.pdf".to_string(), size: 1024000, @@ -126,7 +126,7 @@ async fn test_update_single_directory_tracking() { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/Documents/file2.pdf".to_string(), name: "file2.pdf".to_string(), size: 2048000, @@ -140,7 +140,7 @@ async fn test_update_single_directory_tracking() { group: Some("admin".to_string()), metadata: None, }, - FileInfo { + FileIngestionInfo { path: "/Documents/SubFolder".to_string(), name: "SubFolder".to_string(), size: 0, diff --git a/tests/unit_webdav_unit_tests.rs b/tests/unit_webdav_unit_tests.rs index 5ac7349..52d12e5 100644 --- a/tests/unit_webdav_unit_tests.rs +++ b/tests/unit_webdav_unit_tests.rs @@ -1,5 +1,5 @@ use readur::services::webdav::{WebDAVService, WebDAVConfig}; -use readur::models::FileInfo; +use readur::models::FileIngestionInfo; use readur::models::*; use tokio;