diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx
index 4d220e1..eced142 100644
--- a/frontend/src/App.jsx
+++ b/frontend/src/App.jsx
@@ -12,6 +12,7 @@ import DocumentsPage from './pages/DocumentsPage';
import SearchPage from './pages/SearchPage';
import DocumentDetailsPage from './pages/DocumentDetailsPage';
import SettingsPage from './pages/SettingsPage';
+import WatchFolderPage from './pages/WatchFolderPage';
function App() {
const { user, loading } = useAuth();
@@ -63,7 +64,7 @@ function App() {
} />
} />
} />
- Watch Folder Page - Coming Soon} />
+ } />
} />
Profile Page - Coming Soon} />
diff --git a/frontend/src/pages/WatchFolderPage.jsx b/frontend/src/pages/WatchFolderPage.jsx
new file mode 100644
index 0000000..de80ac5
--- /dev/null
+++ b/frontend/src/pages/WatchFolderPage.jsx
@@ -0,0 +1,341 @@
+import React, { useState, useEffect } from 'react';
+import {
+ Box,
+ Container,
+ Typography,
+ Paper,
+ Card,
+ CardContent,
+ Grid,
+ Chip,
+ LinearProgress,
+ Table,
+ TableBody,
+ TableCell,
+ TableContainer,
+ TableHead,
+ TableRow,
+ Alert,
+ Button,
+ IconButton,
+} from '@mui/material';
+import {
+ Refresh as RefreshIcon,
+ Folder as FolderIcon,
+ CheckCircleOutline as CheckCircleIcon,
+ Error as ErrorIcon,
+ Schedule as ScheduleIcon,
+ Visibility as VisibilityIcon,
+ CloudUpload as CloudUploadIcon,
+ Description as DescriptionIcon,
+} from '@mui/icons-material';
+import { useTheme } from '@mui/material/styles';
+import { queueService } from '../services/api';
+
+const WatchFolderPage = () => {
+ const theme = useTheme();
+ const [queueStats, setQueueStats] = useState(null);
+ const [loading, setLoading] = useState(true);
+ const [error, setError] = useState(null);
+ const [lastRefresh, setLastRefresh] = useState(null);
+
+ // Mock configuration data (would typically come from API)
+ const watchConfig = {
+ watchFolder: process.env.REACT_APP_WATCH_FOLDER || './watch',
+ watchInterval: 30,
+ maxFileAge: 24,
+ allowedTypes: ['pdf', 'png', 'jpg', 'jpeg', 'tiff', 'bmp', 'txt', 'doc', 'docx'],
+ isActive: true,
+ strategy: 'hybrid'
+ };
+
+ useEffect(() => {
+ fetchQueueStats();
+ const interval = setInterval(fetchQueueStats, 30000); // Refresh every 30 seconds
+ return () => clearInterval(interval);
+ }, []);
+
+ const fetchQueueStats = async () => {
+ try {
+ setLoading(true);
+ const response = await queueService.getStats();
+ setQueueStats(response.data);
+ setLastRefresh(new Date());
+ setError(null);
+ } catch (err) {
+ console.error('Error fetching queue stats:', err);
+ setError('Failed to fetch queue statistics');
+ } finally {
+ setLoading(false);
+ }
+ };
+
+ const formatFileSize = (bytes) => {
+ if (bytes === 0) return '0 Bytes';
+ const k = 1024;
+ const sizes = ['Bytes', 'KB', 'MB', 'GB'];
+ const i = Math.floor(Math.log(bytes) / Math.log(k));
+ return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
+ };
+
+ const formatDuration = (minutes) => {
+ if (!minutes) return 'N/A';
+ if (minutes < 60) return `${Math.round(minutes)}m`;
+ const hours = Math.floor(minutes / 60);
+ const mins = Math.round(minutes % 60);
+ return `${hours}h ${mins}m`;
+ };
+
+ const getStatusColor = (status) => {
+ switch (status) {
+ case 'active': return 'success';
+ case 'error': return 'error';
+ case 'pending': return 'warning';
+ default: return 'default';
+ }
+ };
+
+ const getStatusIcon = (status) => {
+ switch (status) {
+ case 'active': return ;
+ case 'error': return ;
+ case 'pending': return ;
+ default: return ;
+ }
+ };
+
+ return (
+
+
+
+ Watch Folder
+
+ }
+ onClick={fetchQueueStats}
+ disabled={loading}
+ >
+ Refresh
+
+
+
+ {error && (
+
+ {error}
+
+ )}
+
+ {/* Watch Folder Configuration */}
+
+
+
+
+ Watch Folder Configuration
+
+
+
+
+
+ Watched Directory
+
+
+ {watchConfig.watchFolder}
+
+
+
+
+
+
+ Status
+
+
+
+
+
+
+
+ Watch Strategy
+
+
+ {watchConfig.strategy}
+
+
+
+
+
+
+ Scan Interval
+
+
+ {watchConfig.watchInterval} seconds
+
+
+
+
+
+
+ Max File Age
+
+
+ {watchConfig.maxFileAge} hours
+
+
+
+
+
+
+ Supported File Types
+
+
+ {watchConfig.allowedTypes.map((type) => (
+
+ ))}
+
+
+
+
+
+
+
+ {/* Queue Statistics */}
+ {queueStats && (
+
+
+
+
+ Processing Queue
+
+
+
+
+
+ {queueStats.pending_count}
+
+
+ Pending
+
+
+
+
+
+
+ {queueStats.processing_count}
+
+
+ Processing
+
+
+
+
+
+
+ {queueStats.failed_count}
+
+
+ Failed
+
+
+
+
+
+
+ {queueStats.completed_today}
+
+
+ Completed Today
+
+
+
+
+
+
+
+
+
+ Average Wait Time
+
+
+ {formatDuration(queueStats.avg_wait_time_minutes)}
+
+
+
+
+
+
+ Oldest Pending Item
+
+
+ {formatDuration(queueStats.oldest_pending_minutes)}
+
+
+
+
+
+ {lastRefresh && (
+
+ Last updated: {lastRefresh.toLocaleTimeString()}
+
+ )}
+
+
+ )}
+
+ {/* Processing Information */}
+
+
+
+
+ How Watch Folder Works
+
+
+ The watch folder system automatically monitors the configured directory for new files and processes them for OCR.
+
+
+
+
+ Processing Pipeline:
+
+
+
+ 1. File Detection: New files are detected using hybrid watching (inotify + polling)
+
+
+ 2. Validation: Files are checked for supported format and size limits
+
+
+ 3. Deduplication: System prevents processing of duplicate files
+
+
+ 4. Storage: Files are moved to the document storage system
+
+
+ 5. OCR Queue: Documents are queued for OCR processing with priority
+
+
+
+
+
+
+ The system uses a hybrid watching strategy that automatically detects filesystem type and chooses
+ the optimal monitoring approach (inotify for local filesystems, polling for network mounts).
+
+
+
+
+
+ );
+};
+
+export default WatchFolderPage;
\ No newline at end of file
diff --git a/frontend/src/services/api.ts b/frontend/src/services/api.ts
index 27f450b..344e4e5 100644
--- a/frontend/src/services/api.ts
+++ b/frontend/src/services/api.ts
@@ -72,6 +72,15 @@ export interface SearchResponse {
suggestions: string[]
}
+export interface QueueStats {
+ pending_count: number
+ processing_count: number
+ failed_count: number
+ completed_today: number
+ avg_wait_time_minutes?: number
+ oldest_pending_minutes?: number
+}
+
export const documentService = {
upload: (file: File) => {
const formData = new FormData()
@@ -111,4 +120,14 @@ export const documentService = {
},
})
},
+}
+
+export const queueService = {
+ getStats: () => {
+ return api.get('/queue/stats')
+ },
+
+ requeueFailed: () => {
+ return api.post('/queue/requeue-failed')
+ },
}
\ No newline at end of file
diff --git a/migrations/20240101000006_add_missing_ocr_columns.sql b/migrations/20240101000006_add_missing_ocr_columns.sql
new file mode 100644
index 0000000..5e04565
--- /dev/null
+++ b/migrations/20240101000006_add_missing_ocr_columns.sql
@@ -0,0 +1,3 @@
+-- Add missing OCR columns to documents table for existing databases
+ALTER TABLE documents ADD COLUMN IF NOT EXISTS ocr_error TEXT;
+ALTER TABLE documents ADD COLUMN IF NOT EXISTS ocr_completed_at TIMESTAMPTZ;
\ No newline at end of file
diff --git a/src/db.rs b/src/db.rs
index 15a290b..dc7789d 100644
--- a/src/db.rs
+++ b/src/db.rs
@@ -373,7 +373,7 @@ impl Database {
pub async fn get_documents_by_user(&self, user_id: Uuid, limit: i64, offset: i64) -> Result> {
let rows = sqlx::query(
r#"
- SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, tags, created_at, updated_at, user_id
+ SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id
FROM documents
WHERE user_id = $1
ORDER BY created_at DESC
@@ -416,7 +416,7 @@ impl Database {
pub async fn find_documents_by_filename(&self, filename: &str) -> Result> {
let rows = sqlx::query(
r#"
- SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, tags, created_at, updated_at, user_id
+ SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id
FROM documents
WHERE filename = $1 OR original_filename = $1
ORDER BY created_at DESC
@@ -456,7 +456,7 @@ impl Database {
pub async fn search_documents(&self, user_id: Uuid, search: SearchRequest) -> Result<(Vec, i64)> {
let mut query_builder = sqlx::QueryBuilder::new(
r#"
- SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, tags, created_at, updated_at, user_id,
+ SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id,
ts_rank(to_tsvector('english', COALESCE(content, '') || ' ' || COALESCE(ocr_text, '')), plainto_tsquery('english', "#
);
@@ -550,7 +550,7 @@ impl Database {
// Use trigram similarity for substring matching
let mut builder = sqlx::QueryBuilder::new(
r#"
- SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, tags, created_at, updated_at, user_id,
+ SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id,
GREATEST(
similarity(filename, "#
);
@@ -589,7 +589,7 @@ impl Database {
let mut builder = sqlx::QueryBuilder::new(&format!(
r#"
- SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, tags, created_at, updated_at, user_id,
+ SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id,
GREATEST(
CASE WHEN filename ILIKE '%' || "#
));
diff --git a/src/enhanced_ocr.rs b/src/enhanced_ocr.rs
index 5ba6667..f9359bb 100644
--- a/src/enhanced_ocr.rs
+++ b/src/enhanced_ocr.rs
@@ -559,6 +559,7 @@ impl EnhancedOcrService {
}
/// Validate OCR result quality
+ #[cfg(feature = "ocr")]
pub fn validate_ocr_quality(&self, result: &OcrResult, settings: &Settings) -> bool {
// Check minimum confidence threshold
if result.confidence < settings.ocr_min_confidence {
diff --git a/src/main.rs b/src/main.rs
index dc34396..475acf1 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -73,6 +73,32 @@ async fn main() -> Result<(), Box> {
}
}
+ // Check if ocr_error column exists
+ let check_column = sqlx::query("SELECT column_name FROM information_schema.columns WHERE table_name = 'documents' AND column_name = 'ocr_error'")
+ .fetch_optional(&db.pool)
+ .await;
+
+ match check_column {
+ Ok(Some(_)) => info!("✅ ocr_error column exists"),
+ Ok(None) => {
+ error!("❌ ocr_error column is missing! Migration 006 may not have been applied.");
+ // Try to add the column manually as a fallback
+ info!("Attempting to add missing columns...");
+ if let Err(e) = sqlx::query("ALTER TABLE documents ADD COLUMN IF NOT EXISTS ocr_error TEXT")
+ .execute(&db.pool)
+ .await {
+ error!("Failed to add ocr_error column: {}", e);
+ }
+ if let Err(e) = sqlx::query("ALTER TABLE documents ADD COLUMN IF NOT EXISTS ocr_completed_at TIMESTAMPTZ")
+ .execute(&db.pool)
+ .await {
+ error!("Failed to add ocr_completed_at column: {}", e);
+ }
+ info!("Fallback column addition completed");
+ }
+ Err(e) => error!("Failed to check for ocr_error column: {}", e),
+ }
+
let result = migrations.run(&db.pool).await;
match result {
Ok(_) => info!("SQLx migrations completed successfully"),
@@ -113,6 +139,7 @@ async fn main() -> Result<(), Box> {
.route("/api/health", get(readur::health_check))
.nest("/api/auth", routes::auth::router())
.nest("/api/documents", routes::documents::router())
+ .nest("/api/metrics", routes::metrics::router())
.nest("/api/queue", routes::queue::router())
.nest("/api/search", routes::search::router())
.nest("/api/settings", routes::settings::router())
diff --git a/src/models.rs b/src/models.rs
index 56961d1..ba171d3 100644
--- a/src/models.rs
+++ b/src/models.rs
@@ -64,40 +64,64 @@ pub struct Document {
#[derive(Debug, Serialize, Deserialize, ToSchema)]
pub struct DocumentResponse {
+ /// Unique identifier for the document
pub id: Uuid,
+ /// Current filename in the system
pub filename: String,
+ /// Original filename when uploaded
pub original_filename: String,
+ /// File size in bytes
pub file_size: i64,
+ /// MIME type of the file
pub mime_type: String,
+ /// Tags associated with the document
pub tags: Vec,
+ /// When the document was created
pub created_at: DateTime,
+ /// Whether OCR text has been extracted
pub has_ocr_text: bool,
+ /// OCR confidence score (0-100, higher is better)
pub ocr_confidence: Option,
+ /// Number of words detected by OCR
pub ocr_word_count: Option,
+ /// Time taken for OCR processing in milliseconds
pub ocr_processing_time_ms: Option,
+ /// Current status of OCR processing (pending, processing, completed, failed)
pub ocr_status: Option,
}
#[derive(Debug, Serialize, Deserialize, ToSchema, IntoParams)]
pub struct SearchRequest {
+ /// Search query text (searches both document content and OCR-extracted text)
pub query: String,
+ /// Filter by specific tags
pub tags: Option>,
+ /// Filter by MIME types (e.g., "application/pdf", "image/png")
pub mime_types: Option>,
+ /// Maximum number of results to return (default: 25)
pub limit: Option,
+ /// Number of results to skip for pagination (default: 0)
pub offset: Option,
+ /// Whether to include text snippets with search matches (default: true)
pub include_snippets: Option,
+ /// Length of text snippets in characters (default: 200)
pub snippet_length: Option,
+ /// Search algorithm to use (default: simple)
pub search_mode: Option,
}
#[derive(Debug, Serialize, Deserialize, ToSchema)]
pub enum SearchMode {
+ /// Simple text search with basic word matching
#[serde(rename = "simple")]
Simple,
+ /// Exact phrase matching
#[serde(rename = "phrase")]
Phrase,
+ /// Fuzzy search using similarity matching (good for typos and partial matches)
#[serde(rename = "fuzzy")]
Fuzzy,
+ /// Boolean search with AND, OR, NOT operators
#[serde(rename = "boolean")]
Boolean,
}
@@ -110,41 +134,65 @@ impl Default for SearchMode {
#[derive(Debug, Serialize, Deserialize, ToSchema)]
pub struct SearchSnippet {
+ /// The snippet text content
pub text: String,
+ /// Starting character position in the original document
pub start_offset: i32,
+ /// Ending character position in the original document
pub end_offset: i32,
+ /// Ranges within the snippet that should be highlighted
pub highlight_ranges: Vec,
}
#[derive(Debug, Serialize, Deserialize, ToSchema)]
pub struct HighlightRange {
+ /// Start position of highlight within the snippet
pub start: i32,
+ /// End position of highlight within the snippet
pub end: i32,
}
#[derive(Debug, Serialize, Deserialize, ToSchema)]
pub struct EnhancedDocumentResponse {
+ /// Unique identifier for the document
pub id: Uuid,
+ /// Current filename in the system
pub filename: String,
+ /// Original filename when uploaded
pub original_filename: String,
+ /// File size in bytes
pub file_size: i64,
+ /// MIME type of the file
pub mime_type: String,
+ /// Tags associated with the document
pub tags: Vec,
+ /// When the document was created
pub created_at: DateTime,
+ /// Whether OCR text has been extracted
pub has_ocr_text: bool,
+ /// OCR confidence score (0-100, higher is better)
pub ocr_confidence: Option,
+ /// Number of words detected by OCR
pub ocr_word_count: Option,
+ /// Time taken for OCR processing in milliseconds
pub ocr_processing_time_ms: Option,
+ /// Current status of OCR processing (pending, processing, completed, failed)
pub ocr_status: Option,
+ /// Search relevance score (0-1, higher is more relevant)
pub search_rank: Option,
+ /// Text snippets showing search matches with highlights
pub snippets: Vec,
}
#[derive(Debug, Serialize, Deserialize, ToSchema)]
pub struct SearchResponse {
+ /// List of matching documents with enhanced metadata and snippets
pub documents: Vec,
+ /// Total number of documents matching the search criteria
pub total: i64,
+ /// Time taken to execute the search in milliseconds
pub query_time_ms: u64,
+ /// Search suggestions for query improvement
pub suggestions: Vec,
}
diff --git a/src/routes/documents.rs b/src/routes/documents.rs
index dd0288e..c4376b2 100644
--- a/src/routes/documents.rs
+++ b/src/routes/documents.rs
@@ -37,12 +37,12 @@ pub fn router() -> Router> {
security(
("bearer_auth" = [])
),
- request_body(content = String, description = "Multipart form data with file", content_type = "multipart/form-data"),
+ request_body(content = String, description = "Multipart form data with file. Supported formats: PDF, PNG, JPG, JPEG, TIFF, BMP, TXT. OCR will be automatically performed on image and PDF files.", content_type = "multipart/form-data"),
responses(
- (status = 200, description = "Document uploaded successfully", body = DocumentResponse),
- (status = 400, description = "Bad request - invalid file or data"),
+ (status = 200, description = "Document uploaded successfully. OCR processing will begin automatically if enabled in user settings.", body = DocumentResponse),
+ (status = 400, description = "Bad request - invalid file type or malformed data"),
(status = 413, description = "Payload too large - file exceeds size limit"),
- (status = 401, description = "Unauthorized")
+ (status = 401, description = "Unauthorized - valid authentication required")
)
)]
async fn upload_document(
diff --git a/src/routes/metrics.rs b/src/routes/metrics.rs
new file mode 100644
index 0000000..1ba6ede
--- /dev/null
+++ b/src/routes/metrics.rs
@@ -0,0 +1,267 @@
+use axum::{
+ extract::State,
+ http::StatusCode,
+ response::Json,
+ routing::get,
+ Router,
+};
+use std::sync::Arc;
+use serde::Serialize;
+use utoipa::ToSchema;
+
+use crate::{auth::AuthUser, AppState};
+
+#[derive(Serialize, ToSchema)]
+pub struct SystemMetrics {
+ pub database: DatabaseMetrics,
+ pub ocr: OcrMetrics,
+ pub documents: DocumentMetrics,
+ pub users: UserMetrics,
+ pub system: GeneralSystemMetrics,
+ pub timestamp: i64,
+}
+
+#[derive(Serialize, ToSchema)]
+pub struct DatabaseMetrics {
+ pub active_connections: i32,
+ pub total_queries_today: i64,
+ pub avg_query_time_ms: f64,
+}
+
+#[derive(Serialize, ToSchema)]
+pub struct OcrMetrics {
+ pub pending_jobs: i64,
+ pub processing_jobs: i64,
+ pub failed_jobs: i64,
+ pub completed_today: i64,
+ pub avg_processing_time_minutes: Option,
+ pub queue_depth: i64,
+ pub oldest_pending_minutes: Option,
+}
+
+#[derive(Serialize, ToSchema)]
+pub struct DocumentMetrics {
+ pub total_documents: i64,
+ pub documents_uploaded_today: i64,
+ pub total_storage_bytes: i64,
+ pub avg_document_size_bytes: f64,
+ pub documents_with_ocr: i64,
+ pub documents_without_ocr: i64,
+}
+
+#[derive(Serialize, ToSchema)]
+pub struct UserMetrics {
+ pub total_users: i64,
+ pub active_users_today: i64,
+ pub new_registrations_today: i64,
+}
+
+#[derive(Serialize, ToSchema)]
+pub struct GeneralSystemMetrics {
+ pub uptime_seconds: u64,
+ pub app_version: String,
+ pub rust_version: String,
+}
+
+pub fn router() -> Router> {
+ Router::new()
+ .route("/", get(get_system_metrics))
+}
+
+#[utoipa::path(
+ get,
+ path = "/api/metrics",
+ tag = "metrics",
+ security(
+ ("bearer_auth" = [])
+ ),
+ responses(
+ (status = 200, description = "System metrics and monitoring data", body = SystemMetrics),
+ (status = 401, description = "Unauthorized - valid authentication required"),
+ (status = 500, description = "Internal server error")
+ )
+)]
+pub async fn get_system_metrics(
+ State(state): State>,
+ _auth_user: AuthUser, // Require authentication
+) -> Result, StatusCode> {
+ let timestamp = chrono::Utc::now().timestamp();
+
+ // Collect all metrics concurrently for better performance
+ let (database_metrics, ocr_metrics, document_metrics, user_metrics, system_metrics) = tokio::try_join!(
+ collect_database_metrics(&state),
+ collect_ocr_metrics(&state),
+ collect_document_metrics(&state),
+ collect_user_metrics(&state),
+ collect_system_metrics()
+ )?;
+
+ let metrics = SystemMetrics {
+ database: database_metrics,
+ ocr: ocr_metrics,
+ documents: document_metrics,
+ users: user_metrics,
+ system: system_metrics,
+ timestamp,
+ };
+
+ Ok(Json(metrics))
+}
+
+async fn collect_database_metrics(state: &Arc) -> Result {
+ // Get connection pool information
+ let _pool_info = state.db.pool.options();
+ let active_connections = state.db.pool.size() as i32;
+
+ // For now, use placeholder values for queries
+ // In production, you might want to implement query tracking
+ Ok(DatabaseMetrics {
+ active_connections,
+ total_queries_today: 0, // Placeholder - would need query tracking
+ avg_query_time_ms: 0.0, // Placeholder - would need query timing
+ })
+}
+
+async fn collect_ocr_metrics(state: &Arc) -> Result {
+ // Use existing OCR queue statistics
+ use crate::ocr_queue::OcrQueueService;
+
+ let queue_service = OcrQueueService::new(
+ state.db.clone(),
+ state.db.pool.clone(),
+ state.config.concurrent_ocr_jobs
+ );
+
+ let stats = queue_service
+ .get_stats()
+ .await
+ .map_err(|e| {
+ tracing::error!("Failed to get OCR stats: {}", e);
+ StatusCode::INTERNAL_SERVER_ERROR
+ })?;
+
+ Ok(OcrMetrics {
+ pending_jobs: stats.pending_count,
+ processing_jobs: stats.processing_count,
+ failed_jobs: stats.failed_count,
+ completed_today: stats.completed_today,
+ avg_processing_time_minutes: stats.avg_wait_time_minutes,
+ queue_depth: stats.pending_count + stats.processing_count,
+ oldest_pending_minutes: stats.oldest_pending_minutes,
+ })
+}
+
+async fn collect_document_metrics(state: &Arc) -> Result {
+ // Get total document count
+ let total_docs = sqlx::query_scalar::<_, i64>("SELECT COUNT(*) FROM documents")
+ .fetch_one(&state.db.pool)
+ .await
+ .map_err(|e| {
+ tracing::error!("Failed to get total document count: {}", e);
+ StatusCode::INTERNAL_SERVER_ERROR
+ })?;
+
+ // Get documents uploaded today
+ let docs_today = sqlx::query_scalar::<_, i64>(
+ "SELECT COUNT(*) FROM documents WHERE DATE(created_at) = CURRENT_DATE"
+ )
+ .fetch_one(&state.db.pool)
+ .await
+ .map_err(|e| {
+ tracing::error!("Failed to get today's document count: {}", e);
+ StatusCode::INTERNAL_SERVER_ERROR
+ })?;
+
+ // Get total storage size
+ let total_size = sqlx::query_scalar::<_, Option>("SELECT SUM(file_size) FROM documents")
+ .fetch_one(&state.db.pool)
+ .await
+ .map_err(|e| {
+ tracing::error!("Failed to get total storage size: {}", e);
+ StatusCode::INTERNAL_SERVER_ERROR
+ })?
+ .unwrap_or(0);
+
+ // Get documents with and without OCR
+ let docs_with_ocr = sqlx::query_scalar::<_, i64>(
+ "SELECT COUNT(*) FROM documents WHERE has_ocr_text = true"
+ )
+ .fetch_one(&state.db.pool)
+ .await
+ .map_err(|e| {
+ tracing::error!("Failed to get OCR document count: {}", e);
+ StatusCode::INTERNAL_SERVER_ERROR
+ })?;
+
+ let docs_without_ocr = total_docs - docs_with_ocr;
+
+ let avg_size = if total_docs > 0 {
+ total_size as f64 / total_docs as f64
+ } else {
+ 0.0
+ };
+
+ Ok(DocumentMetrics {
+ total_documents: total_docs,
+ documents_uploaded_today: docs_today,
+ total_storage_bytes: total_size,
+ avg_document_size_bytes: avg_size,
+ documents_with_ocr: docs_with_ocr,
+ documents_without_ocr: docs_without_ocr,
+ })
+}
+
+async fn collect_user_metrics(state: &Arc) -> Result {
+ // Get total user count
+ let total_users = sqlx::query_scalar::<_, i64>("SELECT COUNT(*) FROM users")
+ .fetch_one(&state.db.pool)
+ .await
+ .map_err(|e| {
+ tracing::error!("Failed to get total user count: {}", e);
+ StatusCode::INTERNAL_SERVER_ERROR
+ })?;
+
+ // Get new users today
+ let new_users_today = sqlx::query_scalar::<_, i64>(
+ "SELECT COUNT(*) FROM users WHERE DATE(created_at) = CURRENT_DATE"
+ )
+ .fetch_one(&state.db.pool)
+ .await
+ .map_err(|e| {
+ tracing::error!("Failed to get new user count: {}", e);
+ StatusCode::INTERNAL_SERVER_ERROR
+ })?;
+
+ // For active users, count users who uploaded documents today
+ let active_users_today = sqlx::query_scalar::<_, i64>(
+ "SELECT COUNT(DISTINCT user_id) FROM documents WHERE DATE(created_at) = CURRENT_DATE"
+ )
+ .fetch_one(&state.db.pool)
+ .await
+ .map_err(|e| {
+ tracing::error!("Failed to get active user count: {}", e);
+ StatusCode::INTERNAL_SERVER_ERROR
+ })?;
+
+ Ok(UserMetrics {
+ total_users,
+ active_users_today,
+ new_registrations_today: new_users_today,
+ })
+}
+
+async fn collect_system_metrics() -> Result {
+ // Get application uptime (this is a simplified version)
+ // In a real application, you'd track the start time
+ let uptime_seconds = 3600; // Placeholder
+
+ // Get version information
+ let app_version = env!("CARGO_PKG_VERSION").to_string();
+ let rust_version = std::env::var("RUST_VERSION").unwrap_or_else(|_| "unknown".to_string());
+
+ Ok(GeneralSystemMetrics {
+ uptime_seconds,
+ app_version,
+ rust_version,
+ })
+}
\ No newline at end of file
diff --git a/src/routes/mod.rs b/src/routes/mod.rs
index 965f9ed..caba618 100644
--- a/src/routes/mod.rs
+++ b/src/routes/mod.rs
@@ -1,5 +1,6 @@
pub mod auth;
pub mod documents;
+pub mod metrics;
pub mod queue;
pub mod search;
pub mod settings;
diff --git a/src/routes/queue.rs b/src/routes/queue.rs
index a757bf6..257bf66 100644
--- a/src/routes/queue.rs
+++ b/src/routes/queue.rs
@@ -23,8 +23,9 @@ pub fn router() -> Router> {
("bearer_auth" = [])
),
responses(
- (status = 200, description = "OCR queue statistics"),
- (status = 401, description = "Unauthorized")
+ (status = 200, description = "OCR queue statistics including pending jobs, processing status, and performance metrics"),
+ (status = 401, description = "Unauthorized - valid authentication required"),
+ (status = 500, description = "Internal server error")
)
)]
async fn get_queue_stats(
diff --git a/src/routes/search.rs b/src/routes/search.rs
index 1b83354..63658ff 100644
--- a/src/routes/search.rs
+++ b/src/routes/search.rs
@@ -30,8 +30,9 @@ pub fn router() -> Router> {
SearchRequest
),
responses(
- (status = 200, description = "Search results", body = SearchResponse),
- (status = 401, description = "Unauthorized")
+ (status = 200, description = "Enhanced search results with relevance ranking, text snippets, and OCR-extracted content matching", body = SearchResponse),
+ (status = 401, description = "Unauthorized - valid authentication required"),
+ (status = 500, description = "Internal server error")
)
)]
async fn search_documents(
diff --git a/src/swagger.rs b/src/swagger.rs
index 396417e..dd74b3d 100644
--- a/src/swagger.rs
+++ b/src/swagger.rs
@@ -10,6 +10,9 @@ use crate::{
DocumentResponse, SearchRequest, SearchResponse, EnhancedDocumentResponse,
SettingsResponse, UpdateSettings, SearchMode, SearchSnippet, HighlightRange
},
+ routes::metrics::{
+ SystemMetrics, DatabaseMetrics, OcrMetrics, DocumentMetrics, UserMetrics, GeneralSystemMetrics
+ },
AppState,
};
@@ -39,12 +42,15 @@ use crate::{
// Queue endpoints
crate::routes::queue::get_queue_stats,
crate::routes::queue::requeue_failed,
+ // Metrics endpoints
+ crate::routes::metrics::get_system_metrics,
),
components(
schemas(
CreateUser, LoginRequest, LoginResponse, UserResponse, UpdateUser,
DocumentResponse, SearchRequest, SearchResponse, EnhancedDocumentResponse,
- SettingsResponse, UpdateSettings, SearchMode, SearchSnippet, HighlightRange
+ SettingsResponse, UpdateSettings, SearchMode, SearchSnippet, HighlightRange,
+ SystemMetrics, DatabaseMetrics, OcrMetrics, DocumentMetrics, UserMetrics, GeneralSystemMetrics
)
),
tags(
@@ -54,6 +60,7 @@ use crate::{
(name = "settings", description = "User settings endpoints"),
(name = "users", description = "User management endpoints"),
(name = "queue", description = "OCR queue management endpoints"),
+ (name = "metrics", description = "System metrics and monitoring endpoints"),
),
modifiers(&SecurityAddon),
info(
diff --git a/src/tests/db_tests.rs b/src/tests/db_tests.rs
index fb924f6..ac97c5c 100644
--- a/src/tests/db_tests.rs
+++ b/src/tests/db_tests.rs
@@ -37,6 +37,12 @@ mod tests {
mime_type: "application/pdf".to_string(),
content: Some("Test content".to_string()),
ocr_text: Some("OCR extracted text".to_string()),
+ ocr_confidence: Some(95.0),
+ ocr_word_count: Some(10),
+ ocr_processing_time_ms: Some(800),
+ ocr_status: Some("completed".to_string()),
+ ocr_error: None,
+ ocr_completed_at: Some(Utc::now()),
tags: vec!["test".to_string(), "document".to_string()],
created_at: Utc::now(),
updated_at: Utc::now(),
diff --git a/src/tests/enhanced_search_tests.rs b/src/tests/enhanced_search_tests.rs
index ebe1077..bf832f1 100644
--- a/src/tests/enhanced_search_tests.rs
+++ b/src/tests/enhanced_search_tests.rs
@@ -300,6 +300,10 @@ mod tests {
tags: vec!["test".to_string()],
created_at: now,
has_ocr_text: true,
+ ocr_confidence: Some(85.5),
+ ocr_word_count: Some(50),
+ ocr_processing_time_ms: Some(1500),
+ ocr_status: Some("completed".to_string()),
search_rank: Some(0.75),
snippets,
};
@@ -679,6 +683,10 @@ mod tests {
tags: vec!["test".to_string(), "document".to_string()],
created_at: now,
has_ocr_text: true,
+ ocr_confidence: Some(92.3),
+ ocr_word_count: Some(75),
+ ocr_processing_time_ms: Some(2000),
+ ocr_status: Some("completed".to_string()),
search_rank: Some(0.85),
snippets: vec![
SearchSnippet {
@@ -919,6 +927,12 @@ mod tests {
mime_type: "application/pdf".to_string(),
content: Some("This is a comprehensive test document for enhanced search functionality testing".to_string()),
ocr_text: Some("OCR extracted content with additional test information for search validation".to_string()),
+ ocr_confidence: Some(88.7),
+ ocr_word_count: Some(25),
+ ocr_processing_time_ms: Some(1200),
+ ocr_status: Some("completed".to_string()),
+ ocr_error: None,
+ ocr_completed_at: Some(Utc::now()),
tags: vec!["enhanced".to_string(), "search".to_string(), "test".to_string()],
created_at: Utc::now(),
updated_at: Utc::now(),