diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index 4d220e1..eced142 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -12,6 +12,7 @@ import DocumentsPage from './pages/DocumentsPage'; import SearchPage from './pages/SearchPage'; import DocumentDetailsPage from './pages/DocumentDetailsPage'; import SettingsPage from './pages/SettingsPage'; +import WatchFolderPage from './pages/WatchFolderPage'; function App() { const { user, loading } = useAuth(); @@ -63,7 +64,7 @@ function App() { } /> } /> } /> - Watch Folder Page - Coming Soon} /> + } /> } /> Profile Page - Coming Soon} /> diff --git a/frontend/src/pages/WatchFolderPage.jsx b/frontend/src/pages/WatchFolderPage.jsx new file mode 100644 index 0000000..de80ac5 --- /dev/null +++ b/frontend/src/pages/WatchFolderPage.jsx @@ -0,0 +1,341 @@ +import React, { useState, useEffect } from 'react'; +import { + Box, + Container, + Typography, + Paper, + Card, + CardContent, + Grid, + Chip, + LinearProgress, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + Alert, + Button, + IconButton, +} from '@mui/material'; +import { + Refresh as RefreshIcon, + Folder as FolderIcon, + CheckCircleOutline as CheckCircleIcon, + Error as ErrorIcon, + Schedule as ScheduleIcon, + Visibility as VisibilityIcon, + CloudUpload as CloudUploadIcon, + Description as DescriptionIcon, +} from '@mui/icons-material'; +import { useTheme } from '@mui/material/styles'; +import { queueService } from '../services/api'; + +const WatchFolderPage = () => { + const theme = useTheme(); + const [queueStats, setQueueStats] = useState(null); + const [loading, setLoading] = useState(true); + const [error, setError] = useState(null); + const [lastRefresh, setLastRefresh] = useState(null); + + // Mock configuration data (would typically come from API) + const watchConfig = { + watchFolder: process.env.REACT_APP_WATCH_FOLDER || './watch', + watchInterval: 30, + maxFileAge: 24, + allowedTypes: ['pdf', 'png', 'jpg', 'jpeg', 'tiff', 'bmp', 'txt', 'doc', 'docx'], + isActive: true, + strategy: 'hybrid' + }; + + useEffect(() => { + fetchQueueStats(); + const interval = setInterval(fetchQueueStats, 30000); // Refresh every 30 seconds + return () => clearInterval(interval); + }, []); + + const fetchQueueStats = async () => { + try { + setLoading(true); + const response = await queueService.getStats(); + setQueueStats(response.data); + setLastRefresh(new Date()); + setError(null); + } catch (err) { + console.error('Error fetching queue stats:', err); + setError('Failed to fetch queue statistics'); + } finally { + setLoading(false); + } + }; + + const formatFileSize = (bytes) => { + if (bytes === 0) return '0 Bytes'; + const k = 1024; + const sizes = ['Bytes', 'KB', 'MB', 'GB']; + const i = Math.floor(Math.log(bytes) / Math.log(k)); + return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i]; + }; + + const formatDuration = (minutes) => { + if (!minutes) return 'N/A'; + if (minutes < 60) return `${Math.round(minutes)}m`; + const hours = Math.floor(minutes / 60); + const mins = Math.round(minutes % 60); + return `${hours}h ${mins}m`; + }; + + const getStatusColor = (status) => { + switch (status) { + case 'active': return 'success'; + case 'error': return 'error'; + case 'pending': return 'warning'; + default: return 'default'; + } + }; + + const getStatusIcon = (status) => { + switch (status) { + case 'active': return ; + case 'error': return ; + case 'pending': return ; + default: return ; + } + }; + + return ( + + + + Watch Folder + + + + + {error && ( + + {error} + + )} + + {/* Watch Folder Configuration */} + + + + + Watch Folder Configuration + + + + + + Watched Directory + + + {watchConfig.watchFolder} + + + + + + + Status + + + + + + + + Watch Strategy + + + {watchConfig.strategy} + + + + + + + Scan Interval + + + {watchConfig.watchInterval} seconds + + + + + + + Max File Age + + + {watchConfig.maxFileAge} hours + + + + + + + Supported File Types + + + {watchConfig.allowedTypes.map((type) => ( + + ))} + + + + + + + + {/* Queue Statistics */} + {queueStats && ( + + + + + Processing Queue + + + + + + {queueStats.pending_count} + + + Pending + + + + + + + {queueStats.processing_count} + + + Processing + + + + + + + {queueStats.failed_count} + + + Failed + + + + + + + {queueStats.completed_today} + + + Completed Today + + + + + + + + + + Average Wait Time + + + {formatDuration(queueStats.avg_wait_time_minutes)} + + + + + + + Oldest Pending Item + + + {formatDuration(queueStats.oldest_pending_minutes)} + + + + + + {lastRefresh && ( + + Last updated: {lastRefresh.toLocaleTimeString()} + + )} + + + )} + + {/* Processing Information */} + + + + + How Watch Folder Works + + + The watch folder system automatically monitors the configured directory for new files and processes them for OCR. + + + + + Processing Pipeline: + + + + 1. File Detection: New files are detected using hybrid watching (inotify + polling) + + + 2. Validation: Files are checked for supported format and size limits + + + 3. Deduplication: System prevents processing of duplicate files + + + 4. Storage: Files are moved to the document storage system + + + 5. OCR Queue: Documents are queued for OCR processing with priority + + + + + + + The system uses a hybrid watching strategy that automatically detects filesystem type and chooses + the optimal monitoring approach (inotify for local filesystems, polling for network mounts). + + + + + + ); +}; + +export default WatchFolderPage; \ No newline at end of file diff --git a/frontend/src/services/api.ts b/frontend/src/services/api.ts index 27f450b..344e4e5 100644 --- a/frontend/src/services/api.ts +++ b/frontend/src/services/api.ts @@ -72,6 +72,15 @@ export interface SearchResponse { suggestions: string[] } +export interface QueueStats { + pending_count: number + processing_count: number + failed_count: number + completed_today: number + avg_wait_time_minutes?: number + oldest_pending_minutes?: number +} + export const documentService = { upload: (file: File) => { const formData = new FormData() @@ -111,4 +120,14 @@ export const documentService = { }, }) }, +} + +export const queueService = { + getStats: () => { + return api.get('/queue/stats') + }, + + requeueFailed: () => { + return api.post('/queue/requeue-failed') + }, } \ No newline at end of file diff --git a/migrations/20240101000006_add_missing_ocr_columns.sql b/migrations/20240101000006_add_missing_ocr_columns.sql new file mode 100644 index 0000000..5e04565 --- /dev/null +++ b/migrations/20240101000006_add_missing_ocr_columns.sql @@ -0,0 +1,3 @@ +-- Add missing OCR columns to documents table for existing databases +ALTER TABLE documents ADD COLUMN IF NOT EXISTS ocr_error TEXT; +ALTER TABLE documents ADD COLUMN IF NOT EXISTS ocr_completed_at TIMESTAMPTZ; \ No newline at end of file diff --git a/src/db.rs b/src/db.rs index 15a290b..dc7789d 100644 --- a/src/db.rs +++ b/src/db.rs @@ -373,7 +373,7 @@ impl Database { pub async fn get_documents_by_user(&self, user_id: Uuid, limit: i64, offset: i64) -> Result> { let rows = sqlx::query( r#" - SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, tags, created_at, updated_at, user_id + SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id FROM documents WHERE user_id = $1 ORDER BY created_at DESC @@ -416,7 +416,7 @@ impl Database { pub async fn find_documents_by_filename(&self, filename: &str) -> Result> { let rows = sqlx::query( r#" - SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, tags, created_at, updated_at, user_id + SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id FROM documents WHERE filename = $1 OR original_filename = $1 ORDER BY created_at DESC @@ -456,7 +456,7 @@ impl Database { pub async fn search_documents(&self, user_id: Uuid, search: SearchRequest) -> Result<(Vec, i64)> { let mut query_builder = sqlx::QueryBuilder::new( r#" - SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, tags, created_at, updated_at, user_id, + SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, ts_rank(to_tsvector('english', COALESCE(content, '') || ' ' || COALESCE(ocr_text, '')), plainto_tsquery('english', "# ); @@ -550,7 +550,7 @@ impl Database { // Use trigram similarity for substring matching let mut builder = sqlx::QueryBuilder::new( r#" - SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, tags, created_at, updated_at, user_id, + SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, GREATEST( similarity(filename, "# ); @@ -589,7 +589,7 @@ impl Database { let mut builder = sqlx::QueryBuilder::new(&format!( r#" - SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, tags, created_at, updated_at, user_id, + SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, GREATEST( CASE WHEN filename ILIKE '%' || "# )); diff --git a/src/enhanced_ocr.rs b/src/enhanced_ocr.rs index 5ba6667..f9359bb 100644 --- a/src/enhanced_ocr.rs +++ b/src/enhanced_ocr.rs @@ -559,6 +559,7 @@ impl EnhancedOcrService { } /// Validate OCR result quality + #[cfg(feature = "ocr")] pub fn validate_ocr_quality(&self, result: &OcrResult, settings: &Settings) -> bool { // Check minimum confidence threshold if result.confidence < settings.ocr_min_confidence { diff --git a/src/main.rs b/src/main.rs index dc34396..475acf1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -73,6 +73,32 @@ async fn main() -> Result<(), Box> { } } + // Check if ocr_error column exists + let check_column = sqlx::query("SELECT column_name FROM information_schema.columns WHERE table_name = 'documents' AND column_name = 'ocr_error'") + .fetch_optional(&db.pool) + .await; + + match check_column { + Ok(Some(_)) => info!("✅ ocr_error column exists"), + Ok(None) => { + error!("❌ ocr_error column is missing! Migration 006 may not have been applied."); + // Try to add the column manually as a fallback + info!("Attempting to add missing columns..."); + if let Err(e) = sqlx::query("ALTER TABLE documents ADD COLUMN IF NOT EXISTS ocr_error TEXT") + .execute(&db.pool) + .await { + error!("Failed to add ocr_error column: {}", e); + } + if let Err(e) = sqlx::query("ALTER TABLE documents ADD COLUMN IF NOT EXISTS ocr_completed_at TIMESTAMPTZ") + .execute(&db.pool) + .await { + error!("Failed to add ocr_completed_at column: {}", e); + } + info!("Fallback column addition completed"); + } + Err(e) => error!("Failed to check for ocr_error column: {}", e), + } + let result = migrations.run(&db.pool).await; match result { Ok(_) => info!("SQLx migrations completed successfully"), @@ -113,6 +139,7 @@ async fn main() -> Result<(), Box> { .route("/api/health", get(readur::health_check)) .nest("/api/auth", routes::auth::router()) .nest("/api/documents", routes::documents::router()) + .nest("/api/metrics", routes::metrics::router()) .nest("/api/queue", routes::queue::router()) .nest("/api/search", routes::search::router()) .nest("/api/settings", routes::settings::router()) diff --git a/src/models.rs b/src/models.rs index 56961d1..ba171d3 100644 --- a/src/models.rs +++ b/src/models.rs @@ -64,40 +64,64 @@ pub struct Document { #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct DocumentResponse { + /// Unique identifier for the document pub id: Uuid, + /// Current filename in the system pub filename: String, + /// Original filename when uploaded pub original_filename: String, + /// File size in bytes pub file_size: i64, + /// MIME type of the file pub mime_type: String, + /// Tags associated with the document pub tags: Vec, + /// When the document was created pub created_at: DateTime, + /// Whether OCR text has been extracted pub has_ocr_text: bool, + /// OCR confidence score (0-100, higher is better) pub ocr_confidence: Option, + /// Number of words detected by OCR pub ocr_word_count: Option, + /// Time taken for OCR processing in milliseconds pub ocr_processing_time_ms: Option, + /// Current status of OCR processing (pending, processing, completed, failed) pub ocr_status: Option, } #[derive(Debug, Serialize, Deserialize, ToSchema, IntoParams)] pub struct SearchRequest { + /// Search query text (searches both document content and OCR-extracted text) pub query: String, + /// Filter by specific tags pub tags: Option>, + /// Filter by MIME types (e.g., "application/pdf", "image/png") pub mime_types: Option>, + /// Maximum number of results to return (default: 25) pub limit: Option, + /// Number of results to skip for pagination (default: 0) pub offset: Option, + /// Whether to include text snippets with search matches (default: true) pub include_snippets: Option, + /// Length of text snippets in characters (default: 200) pub snippet_length: Option, + /// Search algorithm to use (default: simple) pub search_mode: Option, } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub enum SearchMode { + /// Simple text search with basic word matching #[serde(rename = "simple")] Simple, + /// Exact phrase matching #[serde(rename = "phrase")] Phrase, + /// Fuzzy search using similarity matching (good for typos and partial matches) #[serde(rename = "fuzzy")] Fuzzy, + /// Boolean search with AND, OR, NOT operators #[serde(rename = "boolean")] Boolean, } @@ -110,41 +134,65 @@ impl Default for SearchMode { #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct SearchSnippet { + /// The snippet text content pub text: String, + /// Starting character position in the original document pub start_offset: i32, + /// Ending character position in the original document pub end_offset: i32, + /// Ranges within the snippet that should be highlighted pub highlight_ranges: Vec, } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct HighlightRange { + /// Start position of highlight within the snippet pub start: i32, + /// End position of highlight within the snippet pub end: i32, } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct EnhancedDocumentResponse { + /// Unique identifier for the document pub id: Uuid, + /// Current filename in the system pub filename: String, + /// Original filename when uploaded pub original_filename: String, + /// File size in bytes pub file_size: i64, + /// MIME type of the file pub mime_type: String, + /// Tags associated with the document pub tags: Vec, + /// When the document was created pub created_at: DateTime, + /// Whether OCR text has been extracted pub has_ocr_text: bool, + /// OCR confidence score (0-100, higher is better) pub ocr_confidence: Option, + /// Number of words detected by OCR pub ocr_word_count: Option, + /// Time taken for OCR processing in milliseconds pub ocr_processing_time_ms: Option, + /// Current status of OCR processing (pending, processing, completed, failed) pub ocr_status: Option, + /// Search relevance score (0-1, higher is more relevant) pub search_rank: Option, + /// Text snippets showing search matches with highlights pub snippets: Vec, } #[derive(Debug, Serialize, Deserialize, ToSchema)] pub struct SearchResponse { + /// List of matching documents with enhanced metadata and snippets pub documents: Vec, + /// Total number of documents matching the search criteria pub total: i64, + /// Time taken to execute the search in milliseconds pub query_time_ms: u64, + /// Search suggestions for query improvement pub suggestions: Vec, } diff --git a/src/routes/documents.rs b/src/routes/documents.rs index dd0288e..c4376b2 100644 --- a/src/routes/documents.rs +++ b/src/routes/documents.rs @@ -37,12 +37,12 @@ pub fn router() -> Router> { security( ("bearer_auth" = []) ), - request_body(content = String, description = "Multipart form data with file", content_type = "multipart/form-data"), + request_body(content = String, description = "Multipart form data with file. Supported formats: PDF, PNG, JPG, JPEG, TIFF, BMP, TXT. OCR will be automatically performed on image and PDF files.", content_type = "multipart/form-data"), responses( - (status = 200, description = "Document uploaded successfully", body = DocumentResponse), - (status = 400, description = "Bad request - invalid file or data"), + (status = 200, description = "Document uploaded successfully. OCR processing will begin automatically if enabled in user settings.", body = DocumentResponse), + (status = 400, description = "Bad request - invalid file type or malformed data"), (status = 413, description = "Payload too large - file exceeds size limit"), - (status = 401, description = "Unauthorized") + (status = 401, description = "Unauthorized - valid authentication required") ) )] async fn upload_document( diff --git a/src/routes/metrics.rs b/src/routes/metrics.rs new file mode 100644 index 0000000..1ba6ede --- /dev/null +++ b/src/routes/metrics.rs @@ -0,0 +1,267 @@ +use axum::{ + extract::State, + http::StatusCode, + response::Json, + routing::get, + Router, +}; +use std::sync::Arc; +use serde::Serialize; +use utoipa::ToSchema; + +use crate::{auth::AuthUser, AppState}; + +#[derive(Serialize, ToSchema)] +pub struct SystemMetrics { + pub database: DatabaseMetrics, + pub ocr: OcrMetrics, + pub documents: DocumentMetrics, + pub users: UserMetrics, + pub system: GeneralSystemMetrics, + pub timestamp: i64, +} + +#[derive(Serialize, ToSchema)] +pub struct DatabaseMetrics { + pub active_connections: i32, + pub total_queries_today: i64, + pub avg_query_time_ms: f64, +} + +#[derive(Serialize, ToSchema)] +pub struct OcrMetrics { + pub pending_jobs: i64, + pub processing_jobs: i64, + pub failed_jobs: i64, + pub completed_today: i64, + pub avg_processing_time_minutes: Option, + pub queue_depth: i64, + pub oldest_pending_minutes: Option, +} + +#[derive(Serialize, ToSchema)] +pub struct DocumentMetrics { + pub total_documents: i64, + pub documents_uploaded_today: i64, + pub total_storage_bytes: i64, + pub avg_document_size_bytes: f64, + pub documents_with_ocr: i64, + pub documents_without_ocr: i64, +} + +#[derive(Serialize, ToSchema)] +pub struct UserMetrics { + pub total_users: i64, + pub active_users_today: i64, + pub new_registrations_today: i64, +} + +#[derive(Serialize, ToSchema)] +pub struct GeneralSystemMetrics { + pub uptime_seconds: u64, + pub app_version: String, + pub rust_version: String, +} + +pub fn router() -> Router> { + Router::new() + .route("/", get(get_system_metrics)) +} + +#[utoipa::path( + get, + path = "/api/metrics", + tag = "metrics", + security( + ("bearer_auth" = []) + ), + responses( + (status = 200, description = "System metrics and monitoring data", body = SystemMetrics), + (status = 401, description = "Unauthorized - valid authentication required"), + (status = 500, description = "Internal server error") + ) +)] +pub async fn get_system_metrics( + State(state): State>, + _auth_user: AuthUser, // Require authentication +) -> Result, StatusCode> { + let timestamp = chrono::Utc::now().timestamp(); + + // Collect all metrics concurrently for better performance + let (database_metrics, ocr_metrics, document_metrics, user_metrics, system_metrics) = tokio::try_join!( + collect_database_metrics(&state), + collect_ocr_metrics(&state), + collect_document_metrics(&state), + collect_user_metrics(&state), + collect_system_metrics() + )?; + + let metrics = SystemMetrics { + database: database_metrics, + ocr: ocr_metrics, + documents: document_metrics, + users: user_metrics, + system: system_metrics, + timestamp, + }; + + Ok(Json(metrics)) +} + +async fn collect_database_metrics(state: &Arc) -> Result { + // Get connection pool information + let _pool_info = state.db.pool.options(); + let active_connections = state.db.pool.size() as i32; + + // For now, use placeholder values for queries + // In production, you might want to implement query tracking + Ok(DatabaseMetrics { + active_connections, + total_queries_today: 0, // Placeholder - would need query tracking + avg_query_time_ms: 0.0, // Placeholder - would need query timing + }) +} + +async fn collect_ocr_metrics(state: &Arc) -> Result { + // Use existing OCR queue statistics + use crate::ocr_queue::OcrQueueService; + + let queue_service = OcrQueueService::new( + state.db.clone(), + state.db.pool.clone(), + state.config.concurrent_ocr_jobs + ); + + let stats = queue_service + .get_stats() + .await + .map_err(|e| { + tracing::error!("Failed to get OCR stats: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })?; + + Ok(OcrMetrics { + pending_jobs: stats.pending_count, + processing_jobs: stats.processing_count, + failed_jobs: stats.failed_count, + completed_today: stats.completed_today, + avg_processing_time_minutes: stats.avg_wait_time_minutes, + queue_depth: stats.pending_count + stats.processing_count, + oldest_pending_minutes: stats.oldest_pending_minutes, + }) +} + +async fn collect_document_metrics(state: &Arc) -> Result { + // Get total document count + let total_docs = sqlx::query_scalar::<_, i64>("SELECT COUNT(*) FROM documents") + .fetch_one(&state.db.pool) + .await + .map_err(|e| { + tracing::error!("Failed to get total document count: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })?; + + // Get documents uploaded today + let docs_today = sqlx::query_scalar::<_, i64>( + "SELECT COUNT(*) FROM documents WHERE DATE(created_at) = CURRENT_DATE" + ) + .fetch_one(&state.db.pool) + .await + .map_err(|e| { + tracing::error!("Failed to get today's document count: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })?; + + // Get total storage size + let total_size = sqlx::query_scalar::<_, Option>("SELECT SUM(file_size) FROM documents") + .fetch_one(&state.db.pool) + .await + .map_err(|e| { + tracing::error!("Failed to get total storage size: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })? + .unwrap_or(0); + + // Get documents with and without OCR + let docs_with_ocr = sqlx::query_scalar::<_, i64>( + "SELECT COUNT(*) FROM documents WHERE has_ocr_text = true" + ) + .fetch_one(&state.db.pool) + .await + .map_err(|e| { + tracing::error!("Failed to get OCR document count: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })?; + + let docs_without_ocr = total_docs - docs_with_ocr; + + let avg_size = if total_docs > 0 { + total_size as f64 / total_docs as f64 + } else { + 0.0 + }; + + Ok(DocumentMetrics { + total_documents: total_docs, + documents_uploaded_today: docs_today, + total_storage_bytes: total_size, + avg_document_size_bytes: avg_size, + documents_with_ocr: docs_with_ocr, + documents_without_ocr: docs_without_ocr, + }) +} + +async fn collect_user_metrics(state: &Arc) -> Result { + // Get total user count + let total_users = sqlx::query_scalar::<_, i64>("SELECT COUNT(*) FROM users") + .fetch_one(&state.db.pool) + .await + .map_err(|e| { + tracing::error!("Failed to get total user count: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })?; + + // Get new users today + let new_users_today = sqlx::query_scalar::<_, i64>( + "SELECT COUNT(*) FROM users WHERE DATE(created_at) = CURRENT_DATE" + ) + .fetch_one(&state.db.pool) + .await + .map_err(|e| { + tracing::error!("Failed to get new user count: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })?; + + // For active users, count users who uploaded documents today + let active_users_today = sqlx::query_scalar::<_, i64>( + "SELECT COUNT(DISTINCT user_id) FROM documents WHERE DATE(created_at) = CURRENT_DATE" + ) + .fetch_one(&state.db.pool) + .await + .map_err(|e| { + tracing::error!("Failed to get active user count: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })?; + + Ok(UserMetrics { + total_users, + active_users_today, + new_registrations_today: new_users_today, + }) +} + +async fn collect_system_metrics() -> Result { + // Get application uptime (this is a simplified version) + // In a real application, you'd track the start time + let uptime_seconds = 3600; // Placeholder + + // Get version information + let app_version = env!("CARGO_PKG_VERSION").to_string(); + let rust_version = std::env::var("RUST_VERSION").unwrap_or_else(|_| "unknown".to_string()); + + Ok(GeneralSystemMetrics { + uptime_seconds, + app_version, + rust_version, + }) +} \ No newline at end of file diff --git a/src/routes/mod.rs b/src/routes/mod.rs index 965f9ed..caba618 100644 --- a/src/routes/mod.rs +++ b/src/routes/mod.rs @@ -1,5 +1,6 @@ pub mod auth; pub mod documents; +pub mod metrics; pub mod queue; pub mod search; pub mod settings; diff --git a/src/routes/queue.rs b/src/routes/queue.rs index a757bf6..257bf66 100644 --- a/src/routes/queue.rs +++ b/src/routes/queue.rs @@ -23,8 +23,9 @@ pub fn router() -> Router> { ("bearer_auth" = []) ), responses( - (status = 200, description = "OCR queue statistics"), - (status = 401, description = "Unauthorized") + (status = 200, description = "OCR queue statistics including pending jobs, processing status, and performance metrics"), + (status = 401, description = "Unauthorized - valid authentication required"), + (status = 500, description = "Internal server error") ) )] async fn get_queue_stats( diff --git a/src/routes/search.rs b/src/routes/search.rs index 1b83354..63658ff 100644 --- a/src/routes/search.rs +++ b/src/routes/search.rs @@ -30,8 +30,9 @@ pub fn router() -> Router> { SearchRequest ), responses( - (status = 200, description = "Search results", body = SearchResponse), - (status = 401, description = "Unauthorized") + (status = 200, description = "Enhanced search results with relevance ranking, text snippets, and OCR-extracted content matching", body = SearchResponse), + (status = 401, description = "Unauthorized - valid authentication required"), + (status = 500, description = "Internal server error") ) )] async fn search_documents( diff --git a/src/swagger.rs b/src/swagger.rs index 396417e..dd74b3d 100644 --- a/src/swagger.rs +++ b/src/swagger.rs @@ -10,6 +10,9 @@ use crate::{ DocumentResponse, SearchRequest, SearchResponse, EnhancedDocumentResponse, SettingsResponse, UpdateSettings, SearchMode, SearchSnippet, HighlightRange }, + routes::metrics::{ + SystemMetrics, DatabaseMetrics, OcrMetrics, DocumentMetrics, UserMetrics, GeneralSystemMetrics + }, AppState, }; @@ -39,12 +42,15 @@ use crate::{ // Queue endpoints crate::routes::queue::get_queue_stats, crate::routes::queue::requeue_failed, + // Metrics endpoints + crate::routes::metrics::get_system_metrics, ), components( schemas( CreateUser, LoginRequest, LoginResponse, UserResponse, UpdateUser, DocumentResponse, SearchRequest, SearchResponse, EnhancedDocumentResponse, - SettingsResponse, UpdateSettings, SearchMode, SearchSnippet, HighlightRange + SettingsResponse, UpdateSettings, SearchMode, SearchSnippet, HighlightRange, + SystemMetrics, DatabaseMetrics, OcrMetrics, DocumentMetrics, UserMetrics, GeneralSystemMetrics ) ), tags( @@ -54,6 +60,7 @@ use crate::{ (name = "settings", description = "User settings endpoints"), (name = "users", description = "User management endpoints"), (name = "queue", description = "OCR queue management endpoints"), + (name = "metrics", description = "System metrics and monitoring endpoints"), ), modifiers(&SecurityAddon), info( diff --git a/src/tests/db_tests.rs b/src/tests/db_tests.rs index fb924f6..ac97c5c 100644 --- a/src/tests/db_tests.rs +++ b/src/tests/db_tests.rs @@ -37,6 +37,12 @@ mod tests { mime_type: "application/pdf".to_string(), content: Some("Test content".to_string()), ocr_text: Some("OCR extracted text".to_string()), + ocr_confidence: Some(95.0), + ocr_word_count: Some(10), + ocr_processing_time_ms: Some(800), + ocr_status: Some("completed".to_string()), + ocr_error: None, + ocr_completed_at: Some(Utc::now()), tags: vec!["test".to_string(), "document".to_string()], created_at: Utc::now(), updated_at: Utc::now(), diff --git a/src/tests/enhanced_search_tests.rs b/src/tests/enhanced_search_tests.rs index ebe1077..bf832f1 100644 --- a/src/tests/enhanced_search_tests.rs +++ b/src/tests/enhanced_search_tests.rs @@ -300,6 +300,10 @@ mod tests { tags: vec!["test".to_string()], created_at: now, has_ocr_text: true, + ocr_confidence: Some(85.5), + ocr_word_count: Some(50), + ocr_processing_time_ms: Some(1500), + ocr_status: Some("completed".to_string()), search_rank: Some(0.75), snippets, }; @@ -679,6 +683,10 @@ mod tests { tags: vec!["test".to_string(), "document".to_string()], created_at: now, has_ocr_text: true, + ocr_confidence: Some(92.3), + ocr_word_count: Some(75), + ocr_processing_time_ms: Some(2000), + ocr_status: Some("completed".to_string()), search_rank: Some(0.85), snippets: vec![ SearchSnippet { @@ -919,6 +927,12 @@ mod tests { mime_type: "application/pdf".to_string(), content: Some("This is a comprehensive test document for enhanced search functionality testing".to_string()), ocr_text: Some("OCR extracted content with additional test information for search validation".to_string()), + ocr_confidence: Some(88.7), + ocr_word_count: Some(25), + ocr_processing_time_ms: Some(1200), + ocr_status: Some("completed".to_string()), + ocr_error: None, + ocr_completed_at: Some(Utc::now()), tags: vec!["enhanced".to_string(), "search".to_string(), "test".to_string()], created_at: Utc::now(), updated_at: Utc::now(),