use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; use sqlx::FromRow; use uuid::Uuid; use utoipa::{ToSchema, IntoParams}; #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, ToSchema)] pub enum UserRole { #[serde(rename = "admin")] Admin, #[serde(rename = "user")] User, } impl std::fmt::Display for UserRole { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { UserRole::Admin => write!(f, "admin"), UserRole::User => write!(f, "user"), } } } impl TryFrom for UserRole { type Error = String; fn try_from(value: String) -> Result { match value.as_str() { "admin" => Ok(UserRole::Admin), "user" => Ok(UserRole::User), _ => Err(format!("Invalid user role: {}", value)), } } } #[derive(Debug, Clone, Serialize, Deserialize, FromRow, ToSchema)] pub struct User { pub id: Uuid, pub username: String, pub email: String, pub password_hash: String, #[sqlx(try_from = "String")] pub role: UserRole, pub created_at: DateTime, pub updated_at: DateTime, } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct CreateUser { pub username: String, pub email: String, pub password: String, #[serde(default = "default_user_role")] pub role: Option, } fn default_user_role() -> Option { Some(UserRole::User) } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct LoginRequest { pub username: String, pub password: String, } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct LoginResponse { pub token: String, pub user: UserResponse, } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct UserResponse { pub id: Uuid, pub username: String, pub email: String, } #[derive(Debug, Clone, Serialize, Deserialize, FromRow)] pub struct Document { pub id: Uuid, pub filename: String, pub original_filename: String, pub file_path: String, pub file_size: i64, pub mime_type: String, pub content: Option, pub ocr_text: Option, pub ocr_confidence: Option, pub ocr_word_count: Option, pub ocr_processing_time_ms: Option, pub ocr_status: Option, pub ocr_error: Option, pub ocr_completed_at: Option>, pub tags: Vec, pub created_at: DateTime, pub updated_at: DateTime, pub user_id: Uuid, } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct DocumentResponse { /// Unique identifier for the document pub id: Uuid, /// Current filename in the system pub filename: String, /// Original filename when uploaded pub original_filename: String, /// File size in bytes pub file_size: i64, /// MIME type of the file pub mime_type: String, /// Tags associated with the document pub tags: Vec, /// When the document was created pub created_at: DateTime, /// Whether OCR text has been extracted pub has_ocr_text: bool, /// OCR confidence score (0-100, higher is better) pub ocr_confidence: Option, /// Number of words detected by OCR pub ocr_word_count: Option, /// Time taken for OCR processing in milliseconds pub ocr_processing_time_ms: Option, /// Current status of OCR processing (pending, processing, completed, failed) pub ocr_status: Option, } #[derive(Debug, Serialize, Deserialize, ToSchema, IntoParams)] pub struct SearchRequest { /// Search query text (searches both document content and OCR-extracted text) pub query: String, /// Filter by specific tags pub tags: Option>, /// Filter by MIME types (e.g., "application/pdf", "image/png") pub mime_types: Option>, /// Maximum number of results to return (default: 25) pub limit: Option, /// Number of results to skip for pagination (default: 0) pub offset: Option, /// Whether to include text snippets with search matches (default: true) pub include_snippets: Option, /// Length of text snippets in characters (default: 200) pub snippet_length: Option, /// Search algorithm to use (default: simple) pub search_mode: Option, } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub enum SearchMode { /// Simple text search with basic word matching #[serde(rename = "simple")] Simple, /// Exact phrase matching #[serde(rename = "phrase")] Phrase, /// Fuzzy search using similarity matching (good for typos and partial matches) #[serde(rename = "fuzzy")] Fuzzy, /// Boolean search with AND, OR, NOT operators #[serde(rename = "boolean")] Boolean, } impl Default for SearchMode { fn default() -> Self { SearchMode::Simple } } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct SearchSnippet { /// The snippet text content pub text: String, /// Starting character position in the original document pub start_offset: i32, /// Ending character position in the original document pub end_offset: i32, /// Ranges within the snippet that should be highlighted pub highlight_ranges: Vec, } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct HighlightRange { /// Start position of highlight within the snippet pub start: i32, /// End position of highlight within the snippet pub end: i32, } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct EnhancedDocumentResponse { /// Unique identifier for the document pub id: Uuid, /// Current filename in the system pub filename: String, /// Original filename when uploaded pub original_filename: String, /// File size in bytes pub file_size: i64, /// MIME type of the file pub mime_type: String, /// Tags associated with the document pub tags: Vec, /// When the document was created pub created_at: DateTime, /// Whether OCR text has been extracted pub has_ocr_text: bool, /// OCR confidence score (0-100, higher is better) pub ocr_confidence: Option, /// Number of words detected by OCR pub ocr_word_count: Option, /// Time taken for OCR processing in milliseconds pub ocr_processing_time_ms: Option, /// Current status of OCR processing (pending, processing, completed, failed) pub ocr_status: Option, /// Search relevance score (0-1, higher is more relevant) pub search_rank: Option, /// Text snippets showing search matches with highlights pub snippets: Vec, } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct SearchResponse { /// List of matching documents with enhanced metadata and snippets pub documents: Vec, /// Total number of documents matching the search criteria pub total: i64, /// Time taken to execute the search in milliseconds pub query_time_ms: u64, /// Search suggestions for query improvement pub suggestions: Vec, } impl From for DocumentResponse { fn from(doc: Document) -> Self { Self { id: doc.id, filename: doc.filename, original_filename: doc.original_filename, file_size: doc.file_size, mime_type: doc.mime_type, tags: doc.tags, created_at: doc.created_at, has_ocr_text: doc.ocr_text.is_some(), ocr_confidence: doc.ocr_confidence, ocr_word_count: doc.ocr_word_count, ocr_processing_time_ms: doc.ocr_processing_time_ms, ocr_status: doc.ocr_status, } } } impl From for UserResponse { fn from(user: User) -> Self { Self { id: user.id, username: user.username, email: user.email, } } } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct UpdateUser { pub username: Option, pub email: Option, pub password: Option, } #[derive(Debug, Clone, Serialize, Deserialize, FromRow, ToSchema)] pub struct Settings { pub id: Uuid, pub user_id: Uuid, pub ocr_language: String, pub concurrent_ocr_jobs: i32, pub ocr_timeout_seconds: i32, pub max_file_size_mb: i32, pub allowed_file_types: Vec, pub auto_rotate_images: bool, pub enable_image_preprocessing: bool, pub search_results_per_page: i32, pub search_snippet_length: i32, pub fuzzy_search_threshold: f32, pub retention_days: Option, pub enable_auto_cleanup: bool, pub enable_compression: bool, pub memory_limit_mb: i32, pub cpu_priority: String, pub enable_background_ocr: bool, pub ocr_page_segmentation_mode: i32, pub ocr_engine_mode: i32, pub ocr_min_confidence: f32, pub ocr_dpi: i32, pub ocr_enhance_contrast: bool, pub ocr_remove_noise: bool, pub ocr_detect_orientation: bool, pub ocr_whitelist_chars: Option, pub ocr_blacklist_chars: Option, pub webdav_enabled: bool, pub webdav_server_url: Option, pub webdav_username: Option, pub webdav_password: Option, pub webdav_watch_folders: Vec, pub webdav_file_extensions: Vec, pub webdav_auto_sync: bool, pub webdav_sync_interval_minutes: i32, pub created_at: DateTime, pub updated_at: DateTime, } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct SettingsResponse { pub ocr_language: String, pub concurrent_ocr_jobs: i32, pub ocr_timeout_seconds: i32, pub max_file_size_mb: i32, pub allowed_file_types: Vec, pub auto_rotate_images: bool, pub enable_image_preprocessing: bool, pub search_results_per_page: i32, pub search_snippet_length: i32, pub fuzzy_search_threshold: f32, pub retention_days: Option, pub enable_auto_cleanup: bool, pub enable_compression: bool, pub memory_limit_mb: i32, pub cpu_priority: String, pub enable_background_ocr: bool, pub ocr_page_segmentation_mode: i32, pub ocr_engine_mode: i32, pub ocr_min_confidence: f32, pub ocr_dpi: i32, pub ocr_enhance_contrast: bool, pub ocr_remove_noise: bool, pub ocr_detect_orientation: bool, pub ocr_whitelist_chars: Option, pub ocr_blacklist_chars: Option, pub webdav_enabled: bool, pub webdav_server_url: Option, pub webdav_username: Option, pub webdav_password: Option, pub webdav_watch_folders: Vec, pub webdav_file_extensions: Vec, pub webdav_auto_sync: bool, pub webdav_sync_interval_minutes: i32, } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct UpdateSettings { pub ocr_language: Option, pub concurrent_ocr_jobs: Option, pub ocr_timeout_seconds: Option, pub max_file_size_mb: Option, pub allowed_file_types: Option>, pub auto_rotate_images: Option, pub enable_image_preprocessing: Option, pub search_results_per_page: Option, pub search_snippet_length: Option, pub fuzzy_search_threshold: Option, pub retention_days: Option>, pub enable_auto_cleanup: Option, pub enable_compression: Option, pub memory_limit_mb: Option, pub cpu_priority: Option, pub enable_background_ocr: Option, pub ocr_page_segmentation_mode: Option, pub ocr_engine_mode: Option, pub ocr_min_confidence: Option, pub ocr_dpi: Option, pub ocr_enhance_contrast: Option, pub ocr_remove_noise: Option, pub ocr_detect_orientation: Option, pub ocr_whitelist_chars: Option>, pub ocr_blacklist_chars: Option>, pub webdav_enabled: Option, pub webdav_server_url: Option>, pub webdav_username: Option>, pub webdav_password: Option>, pub webdav_watch_folders: Option>, pub webdav_file_extensions: Option>, pub webdav_auto_sync: Option, pub webdav_sync_interval_minutes: Option, } impl From for SettingsResponse { fn from(settings: Settings) -> Self { Self { ocr_language: settings.ocr_language, concurrent_ocr_jobs: settings.concurrent_ocr_jobs, ocr_timeout_seconds: settings.ocr_timeout_seconds, max_file_size_mb: settings.max_file_size_mb, allowed_file_types: settings.allowed_file_types, auto_rotate_images: settings.auto_rotate_images, enable_image_preprocessing: settings.enable_image_preprocessing, search_results_per_page: settings.search_results_per_page, search_snippet_length: settings.search_snippet_length, fuzzy_search_threshold: settings.fuzzy_search_threshold, retention_days: settings.retention_days, enable_auto_cleanup: settings.enable_auto_cleanup, enable_compression: settings.enable_compression, memory_limit_mb: settings.memory_limit_mb, cpu_priority: settings.cpu_priority, enable_background_ocr: settings.enable_background_ocr, ocr_page_segmentation_mode: settings.ocr_page_segmentation_mode, ocr_engine_mode: settings.ocr_engine_mode, ocr_min_confidence: settings.ocr_min_confidence, ocr_dpi: settings.ocr_dpi, ocr_enhance_contrast: settings.ocr_enhance_contrast, ocr_remove_noise: settings.ocr_remove_noise, ocr_detect_orientation: settings.ocr_detect_orientation, ocr_whitelist_chars: settings.ocr_whitelist_chars, ocr_blacklist_chars: settings.ocr_blacklist_chars, webdav_enabled: settings.webdav_enabled, webdav_server_url: settings.webdav_server_url, webdav_username: settings.webdav_username, webdav_password: settings.webdav_password, webdav_watch_folders: settings.webdav_watch_folders, webdav_file_extensions: settings.webdav_file_extensions, webdav_auto_sync: settings.webdav_auto_sync, webdav_sync_interval_minutes: settings.webdav_sync_interval_minutes, } } } impl Default for Settings { fn default() -> Self { Self { id: Uuid::new_v4(), user_id: Uuid::nil(), ocr_language: "eng".to_string(), concurrent_ocr_jobs: 4, ocr_timeout_seconds: 300, max_file_size_mb: 50, allowed_file_types: vec![ "pdf".to_string(), "png".to_string(), "jpg".to_string(), "jpeg".to_string(), "tiff".to_string(), "bmp".to_string(), "txt".to_string(), ], auto_rotate_images: true, enable_image_preprocessing: true, search_results_per_page: 25, search_snippet_length: 200, fuzzy_search_threshold: 0.8, retention_days: None, enable_auto_cleanup: false, enable_compression: false, memory_limit_mb: 512, cpu_priority: "normal".to_string(), enable_background_ocr: true, ocr_page_segmentation_mode: 3, // PSM_AUTO_OSD - Fully automatic page segmentation, but no OSD ocr_engine_mode: 3, // OEM_DEFAULT - Default, based on what is available ocr_min_confidence: 30.0, // Minimum confidence threshold (0-100) ocr_dpi: 300, // Optimal DPI for OCR ocr_enhance_contrast: true, // Enable contrast enhancement ocr_remove_noise: true, // Enable noise removal ocr_detect_orientation: true, // Enable orientation detection ocr_whitelist_chars: None, // No character whitelist by default ocr_blacklist_chars: None, // No character blacklist by default webdav_enabled: false, webdav_server_url: None, webdav_username: None, webdav_password: None, webdav_watch_folders: vec!["/Documents".to_string()], webdav_file_extensions: vec![ "pdf".to_string(), "png".to_string(), "jpg".to_string(), "jpeg".to_string(), "tiff".to_string(), "bmp".to_string(), "txt".to_string(), ], webdav_auto_sync: false, webdav_sync_interval_minutes: 60, created_at: Utc::now(), updated_at: Utc::now(), } } } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct WebDAVFolderInfo { pub path: String, pub total_files: i64, pub supported_files: i64, pub estimated_time_hours: f32, pub total_size_mb: f64, } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct WebDAVCrawlEstimate { pub folders: Vec, pub total_files: i64, pub total_supported_files: i64, pub total_estimated_time_hours: f32, pub total_size_mb: f64, } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct WebDAVTestConnection { pub server_url: String, pub username: String, pub password: String, pub server_type: Option, // "nextcloud", "owncloud", "generic" } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct WebDAVConnectionResult { pub success: bool, pub message: String, pub server_version: Option, pub server_type: Option, } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct WebDAVSyncStatus { pub is_running: bool, pub last_sync: Option>, pub files_processed: i64, pub files_remaining: i64, pub current_folder: Option, pub errors: Vec, } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct Notification { pub id: Uuid, pub user_id: Uuid, pub notification_type: String, pub title: String, pub message: String, pub read: bool, pub action_url: Option, pub metadata: Option, pub created_at: DateTime, } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct CreateNotification { pub notification_type: String, pub title: String, pub message: String, pub action_url: Option, pub metadata: Option, } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct NotificationSummary { pub unread_count: i64, pub recent_notifications: Vec, } #[derive(Debug, Serialize, Deserialize)] pub struct WebDAVSyncState { pub id: Uuid, pub user_id: Uuid, pub last_sync_at: Option>, pub sync_cursor: Option, pub is_running: bool, pub files_processed: i64, pub files_remaining: i64, pub current_folder: Option, pub errors: Vec, pub created_at: DateTime, pub updated_at: DateTime, } #[derive(Debug, Serialize, Deserialize)] pub struct UpdateWebDAVSyncState { pub last_sync_at: Option>, pub sync_cursor: Option, pub is_running: bool, pub files_processed: i64, pub files_remaining: i64, pub current_folder: Option, pub errors: Vec, } #[derive(Debug, Serialize, Deserialize)] pub struct WebDAVFile { pub id: Uuid, pub user_id: Uuid, pub webdav_path: String, pub etag: String, pub last_modified: Option>, pub file_size: i64, pub mime_type: String, pub document_id: Option, pub sync_status: String, pub sync_error: Option, pub created_at: DateTime, pub updated_at: DateTime, } #[derive(Debug, Serialize, Deserialize)] pub struct CreateWebDAVFile { pub user_id: Uuid, pub webdav_path: String, pub etag: String, pub last_modified: Option>, pub file_size: i64, pub mime_type: String, pub document_id: Option, pub sync_status: String, pub sync_error: Option, } #[derive(Debug, Clone)] pub struct FileInfo { pub path: String, pub name: String, pub size: i64, pub mime_type: String, pub last_modified: Option>, pub etag: String, pub is_directory: bool, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, ToSchema)] pub enum SourceType { #[serde(rename = "webdav")] WebDAV, #[serde(rename = "local_folder")] LocalFolder, #[serde(rename = "s3")] S3, } impl std::fmt::Display for SourceType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { SourceType::WebDAV => write!(f, "webdav"), SourceType::LocalFolder => write!(f, "local_folder"), SourceType::S3 => write!(f, "s3"), } } } impl TryFrom for SourceType { type Error = String; fn try_from(value: String) -> Result { match value.as_str() { "webdav" => Ok(SourceType::WebDAV), "local_folder" => Ok(SourceType::LocalFolder), "s3" => Ok(SourceType::S3), _ => Err(format!("Invalid source type: {}", value)), } } } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, ToSchema)] pub enum SourceStatus { #[serde(rename = "idle")] Idle, #[serde(rename = "syncing")] Syncing, #[serde(rename = "error")] Error, } impl std::fmt::Display for SourceStatus { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { SourceStatus::Idle => write!(f, "idle"), SourceStatus::Syncing => write!(f, "syncing"), SourceStatus::Error => write!(f, "error"), } } } impl TryFrom for SourceStatus { type Error = String; fn try_from(value: String) -> Result>::Error> { match value.as_str() { "idle" => Ok(SourceStatus::Idle), "syncing" => Ok(SourceStatus::Syncing), "error" => Ok(SourceStatus::Error), _ => Err(format!("Invalid source status: {}", value)), } } } #[derive(Debug, Clone, Serialize, Deserialize, FromRow, ToSchema)] pub struct Source { pub id: Uuid, pub user_id: Uuid, pub name: String, #[sqlx(try_from = "String")] pub source_type: SourceType, pub enabled: bool, pub config: serde_json::Value, #[sqlx(try_from = "String")] pub status: SourceStatus, pub last_sync_at: Option>, pub last_error: Option, pub last_error_at: Option>, pub total_files_synced: i64, pub total_files_pending: i64, pub total_size_bytes: i64, pub created_at: DateTime, pub updated_at: DateTime, } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct SourceResponse { pub id: Uuid, pub name: String, pub source_type: SourceType, pub enabled: bool, pub config: serde_json::Value, pub status: SourceStatus, pub last_sync_at: Option>, pub last_error: Option, pub last_error_at: Option>, pub total_files_synced: i64, pub total_files_pending: i64, pub total_size_bytes: i64, pub created_at: DateTime, pub updated_at: DateTime, } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct CreateSource { pub name: String, pub source_type: SourceType, pub enabled: Option, pub config: serde_json::Value, } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct UpdateSource { pub name: Option, pub enabled: Option, pub config: Option, } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct SourceWithStats { pub source: SourceResponse, pub recent_documents: Vec, pub sync_progress: Option, } impl From for SourceResponse { fn from(source: Source) -> Self { Self { id: source.id, name: source.name, source_type: source.source_type, enabled: source.enabled, config: source.config, status: source.status, last_sync_at: source.last_sync_at, last_error: source.last_error, last_error_at: source.last_error_at, total_files_synced: source.total_files_synced, total_files_pending: source.total_files_pending, total_size_bytes: source.total_size_bytes, created_at: source.created_at, updated_at: source.updated_at, } } } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct WebDAVSourceConfig { pub server_url: String, pub username: String, pub password: String, pub watch_folders: Vec, pub file_extensions: Vec, pub auto_sync: bool, pub sync_interval_minutes: i32, pub server_type: Option, }