feat(server/client): the /documents endpoint works again, and so does the watch folder...kinda

This commit is contained in:
perf3ct 2025-06-13 15:53:19 +00:00
parent e6e2ba76f5
commit 00b2bfe22c
16 changed files with 752 additions and 15 deletions

View File

@ -12,6 +12,7 @@ import DocumentsPage from './pages/DocumentsPage';
import SearchPage from './pages/SearchPage';
import DocumentDetailsPage from './pages/DocumentDetailsPage';
import SettingsPage from './pages/SettingsPage';
import WatchFolderPage from './pages/WatchFolderPage';
function App() {
const { user, loading } = useAuth();
@ -63,7 +64,7 @@ function App() {
<Route path="/documents" element={<DocumentsPage />} />
<Route path="/documents/:id" element={<DocumentDetailsPage />} />
<Route path="/search" element={<SearchPage />} />
<Route path="/watch" element={<div>Watch Folder Page - Coming Soon</div>} />
<Route path="/watch" element={<WatchFolderPage />} />
<Route path="/settings" element={<SettingsPage />} />
<Route path="/profile" element={<div>Profile Page - Coming Soon</div>} />
</Routes>

View File

@ -0,0 +1,341 @@
import React, { useState, useEffect } from 'react';
import {
Box,
Container,
Typography,
Paper,
Card,
CardContent,
Grid,
Chip,
LinearProgress,
Table,
TableBody,
TableCell,
TableContainer,
TableHead,
TableRow,
Alert,
Button,
IconButton,
} from '@mui/material';
import {
Refresh as RefreshIcon,
Folder as FolderIcon,
CheckCircleOutline as CheckCircleIcon,
Error as ErrorIcon,
Schedule as ScheduleIcon,
Visibility as VisibilityIcon,
CloudUpload as CloudUploadIcon,
Description as DescriptionIcon,
} from '@mui/icons-material';
import { useTheme } from '@mui/material/styles';
import { queueService } from '../services/api';
const WatchFolderPage = () => {
const theme = useTheme();
const [queueStats, setQueueStats] = useState(null);
const [loading, setLoading] = useState(true);
const [error, setError] = useState(null);
const [lastRefresh, setLastRefresh] = useState(null);
// Mock configuration data (would typically come from API)
const watchConfig = {
watchFolder: process.env.REACT_APP_WATCH_FOLDER || './watch',
watchInterval: 30,
maxFileAge: 24,
allowedTypes: ['pdf', 'png', 'jpg', 'jpeg', 'tiff', 'bmp', 'txt', 'doc', 'docx'],
isActive: true,
strategy: 'hybrid'
};
useEffect(() => {
fetchQueueStats();
const interval = setInterval(fetchQueueStats, 30000); // Refresh every 30 seconds
return () => clearInterval(interval);
}, []);
const fetchQueueStats = async () => {
try {
setLoading(true);
const response = await queueService.getStats();
setQueueStats(response.data);
setLastRefresh(new Date());
setError(null);
} catch (err) {
console.error('Error fetching queue stats:', err);
setError('Failed to fetch queue statistics');
} finally {
setLoading(false);
}
};
const formatFileSize = (bytes) => {
if (bytes === 0) return '0 Bytes';
const k = 1024;
const sizes = ['Bytes', 'KB', 'MB', 'GB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
};
const formatDuration = (minutes) => {
if (!minutes) return 'N/A';
if (minutes < 60) return `${Math.round(minutes)}m`;
const hours = Math.floor(minutes / 60);
const mins = Math.round(minutes % 60);
return `${hours}h ${mins}m`;
};
const getStatusColor = (status) => {
switch (status) {
case 'active': return 'success';
case 'error': return 'error';
case 'pending': return 'warning';
default: return 'default';
}
};
const getStatusIcon = (status) => {
switch (status) {
case 'active': return <CheckCircleIcon />;
case 'error': return <ErrorIcon />;
case 'pending': return <ScheduleIcon />;
default: return <VisibilityIcon />;
}
};
return (
<Container maxWidth="xl" sx={{ mt: 4, mb: 4 }}>
<Box sx={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', mb: 4 }}>
<Typography variant="h4" sx={{ fontWeight: 600 }}>
Watch Folder
</Typography>
<Button
variant="outlined"
startIcon={<RefreshIcon />}
onClick={fetchQueueStats}
disabled={loading}
>
Refresh
</Button>
</Box>
{error && (
<Alert severity="error" sx={{ mb: 3 }}>
{error}
</Alert>
)}
{/* Watch Folder Configuration */}
<Card sx={{ mb: 3 }}>
<CardContent>
<Typography variant="h6" sx={{ mb: 2, display: 'flex', alignItems: 'center', gap: 1 }}>
<FolderIcon color="primary" />
Watch Folder Configuration
</Typography>
<Grid container spacing={2}>
<Grid item xs={12} md={6}>
<Box sx={{ mb: 2 }}>
<Typography variant="body2" color="text.secondary">
Watched Directory
</Typography>
<Typography variant="body1" sx={{ fontFamily: 'monospace', bgcolor: 'grey.100', p: 1, borderRadius: 1 }}>
{watchConfig.watchFolder}
</Typography>
</Box>
</Grid>
<Grid item xs={12} md={6}>
<Box sx={{ mb: 2 }}>
<Typography variant="body2" color="text.secondary">
Status
</Typography>
<Chip
icon={getStatusIcon(watchConfig.isActive ? 'active' : 'error')}
label={watchConfig.isActive ? 'Active' : 'Inactive'}
color={getStatusColor(watchConfig.isActive ? 'active' : 'error')}
variant="filled"
/>
</Box>
</Grid>
<Grid item xs={12} md={4}>
<Box sx={{ mb: 2 }}>
<Typography variant="body2" color="text.secondary">
Watch Strategy
</Typography>
<Typography variant="body1" sx={{ textTransform: 'capitalize' }}>
{watchConfig.strategy}
</Typography>
</Box>
</Grid>
<Grid item xs={12} md={4}>
<Box sx={{ mb: 2 }}>
<Typography variant="body2" color="text.secondary">
Scan Interval
</Typography>
<Typography variant="body1">
{watchConfig.watchInterval} seconds
</Typography>
</Box>
</Grid>
<Grid item xs={12} md={4}>
<Box sx={{ mb: 2 }}>
<Typography variant="body2" color="text.secondary">
Max File Age
</Typography>
<Typography variant="body1">
{watchConfig.maxFileAge} hours
</Typography>
</Box>
</Grid>
<Grid item xs={12}>
<Box sx={{ mb: 2 }}>
<Typography variant="body2" color="text.secondary" sx={{ mb: 1 }}>
Supported File Types
</Typography>
<Box sx={{ display: 'flex', flexWrap: 'wrap', gap: 0.5 }}>
{watchConfig.allowedTypes.map((type) => (
<Chip
key={type}
label={`.${type}`}
size="small"
variant="outlined"
color="primary"
/>
))}
</Box>
</Box>
</Grid>
</Grid>
</CardContent>
</Card>
{/* Queue Statistics */}
{queueStats && (
<Card sx={{ mb: 3 }}>
<CardContent>
<Typography variant="h6" sx={{ mb: 2, display: 'flex', alignItems: 'center', gap: 1 }}>
<CloudUploadIcon color="primary" />
Processing Queue
</Typography>
<Grid container spacing={2}>
<Grid item xs={12} sm={6} md={3}>
<Box sx={{ textAlign: 'center', p: 2, bgcolor: 'info.light', borderRadius: 2 }}>
<Typography variant="h4" sx={{ fontWeight: 600, color: 'info.dark' }}>
{queueStats.pending_count}
</Typography>
<Typography variant="body2" color="text.secondary">
Pending
</Typography>
</Box>
</Grid>
<Grid item xs={12} sm={6} md={3}>
<Box sx={{ textAlign: 'center', p: 2, bgcolor: 'warning.light', borderRadius: 2 }}>
<Typography variant="h4" sx={{ fontWeight: 600, color: 'warning.dark' }}>
{queueStats.processing_count}
</Typography>
<Typography variant="body2" color="text.secondary">
Processing
</Typography>
</Box>
</Grid>
<Grid item xs={12} sm={6} md={3}>
<Box sx={{ textAlign: 'center', p: 2, bgcolor: 'error.light', borderRadius: 2 }}>
<Typography variant="h4" sx={{ fontWeight: 600, color: 'error.dark' }}>
{queueStats.failed_count}
</Typography>
<Typography variant="body2" color="text.secondary">
Failed
</Typography>
</Box>
</Grid>
<Grid item xs={12} sm={6} md={3}>
<Box sx={{ textAlign: 'center', p: 2, bgcolor: 'success.light', borderRadius: 2 }}>
<Typography variant="h4" sx={{ fontWeight: 600, color: 'success.dark' }}>
{queueStats.completed_today}
</Typography>
<Typography variant="body2" color="text.secondary">
Completed Today
</Typography>
</Box>
</Grid>
</Grid>
<Grid container spacing={2} sx={{ mt: 2 }}>
<Grid item xs={12} md={6}>
<Box sx={{ p: 2, bgcolor: 'grey.50', borderRadius: 2 }}>
<Typography variant="body2" color="text.secondary" sx={{ mb: 1 }}>
Average Wait Time
</Typography>
<Typography variant="h6">
{formatDuration(queueStats.avg_wait_time_minutes)}
</Typography>
</Box>
</Grid>
<Grid item xs={12} md={6}>
<Box sx={{ p: 2, bgcolor: 'grey.50', borderRadius: 2 }}>
<Typography variant="body2" color="text.secondary" sx={{ mb: 1 }}>
Oldest Pending Item
</Typography>
<Typography variant="h6">
{formatDuration(queueStats.oldest_pending_minutes)}
</Typography>
</Box>
</Grid>
</Grid>
{lastRefresh && (
<Typography variant="caption" color="text.secondary" sx={{ mt: 2, display: 'block' }}>
Last updated: {lastRefresh.toLocaleTimeString()}
</Typography>
)}
</CardContent>
</Card>
)}
{/* Processing Information */}
<Card>
<CardContent>
<Typography variant="h6" sx={{ mb: 2, display: 'flex', alignItems: 'center', gap: 1 }}>
<DescriptionIcon color="primary" />
How Watch Folder Works
</Typography>
<Typography variant="body1" sx={{ mb: 2 }}>
The watch folder system automatically monitors the configured directory for new files and processes them for OCR.
</Typography>
<Box sx={{ mb: 3 }}>
<Typography variant="subtitle2" sx={{ mb: 1, color: 'primary.main' }}>
Processing Pipeline:
</Typography>
<Box sx={{ pl: 2 }}>
<Typography variant="body2" sx={{ mb: 0.5 }}>
1. <strong>File Detection:</strong> New files are detected using hybrid watching (inotify + polling)
</Typography>
<Typography variant="body2" sx={{ mb: 0.5 }}>
2. <strong>Validation:</strong> Files are checked for supported format and size limits
</Typography>
<Typography variant="body2" sx={{ mb: 0.5 }}>
3. <strong>Deduplication:</strong> System prevents processing of duplicate files
</Typography>
<Typography variant="body2" sx={{ mb: 0.5 }}>
4. <strong>Storage:</strong> Files are moved to the document storage system
</Typography>
<Typography variant="body2" sx={{ mb: 0.5 }}>
5. <strong>OCR Queue:</strong> Documents are queued for OCR processing with priority
</Typography>
</Box>
</Box>
<Alert severity="info" sx={{ mt: 2 }}>
<Typography variant="body2">
The system uses a hybrid watching strategy that automatically detects filesystem type and chooses
the optimal monitoring approach (inotify for local filesystems, polling for network mounts).
</Typography>
</Alert>
</CardContent>
</Card>
</Container>
);
};
export default WatchFolderPage;

View File

@ -72,6 +72,15 @@ export interface SearchResponse {
suggestions: string[]
}
export interface QueueStats {
pending_count: number
processing_count: number
failed_count: number
completed_today: number
avg_wait_time_minutes?: number
oldest_pending_minutes?: number
}
export const documentService = {
upload: (file: File) => {
const formData = new FormData()
@ -111,4 +120,14 @@ export const documentService = {
},
})
},
}
export const queueService = {
getStats: () => {
return api.get<QueueStats>('/queue/stats')
},
requeueFailed: () => {
return api.post('/queue/requeue-failed')
},
}

View File

@ -0,0 +1,3 @@
-- Add missing OCR columns to documents table for existing databases
ALTER TABLE documents ADD COLUMN IF NOT EXISTS ocr_error TEXT;
ALTER TABLE documents ADD COLUMN IF NOT EXISTS ocr_completed_at TIMESTAMPTZ;

View File

@ -373,7 +373,7 @@ impl Database {
pub async fn get_documents_by_user(&self, user_id: Uuid, limit: i64, offset: i64) -> Result<Vec<Document>> {
let rows = sqlx::query(
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, tags, created_at, updated_at, user_id
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id
FROM documents
WHERE user_id = $1
ORDER BY created_at DESC
@ -416,7 +416,7 @@ impl Database {
pub async fn find_documents_by_filename(&self, filename: &str) -> Result<Vec<Document>> {
let rows = sqlx::query(
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, tags, created_at, updated_at, user_id
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id
FROM documents
WHERE filename = $1 OR original_filename = $1
ORDER BY created_at DESC
@ -456,7 +456,7 @@ impl Database {
pub async fn search_documents(&self, user_id: Uuid, search: SearchRequest) -> Result<(Vec<Document>, i64)> {
let mut query_builder = sqlx::QueryBuilder::new(
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, tags, created_at, updated_at, user_id,
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id,
ts_rank(to_tsvector('english', COALESCE(content, '') || ' ' || COALESCE(ocr_text, '')), plainto_tsquery('english', "#
);
@ -550,7 +550,7 @@ impl Database {
// Use trigram similarity for substring matching
let mut builder = sqlx::QueryBuilder::new(
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, tags, created_at, updated_at, user_id,
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id,
GREATEST(
similarity(filename, "#
);
@ -589,7 +589,7 @@ impl Database {
let mut builder = sqlx::QueryBuilder::new(&format!(
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, tags, created_at, updated_at, user_id,
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id,
GREATEST(
CASE WHEN filename ILIKE '%' || "#
));

View File

@ -559,6 +559,7 @@ impl EnhancedOcrService {
}
/// Validate OCR result quality
#[cfg(feature = "ocr")]
pub fn validate_ocr_quality(&self, result: &OcrResult, settings: &Settings) -> bool {
// Check minimum confidence threshold
if result.confidence < settings.ocr_min_confidence {

View File

@ -73,6 +73,32 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
}
}
// Check if ocr_error column exists
let check_column = sqlx::query("SELECT column_name FROM information_schema.columns WHERE table_name = 'documents' AND column_name = 'ocr_error'")
.fetch_optional(&db.pool)
.await;
match check_column {
Ok(Some(_)) => info!("✅ ocr_error column exists"),
Ok(None) => {
error!("❌ ocr_error column is missing! Migration 006 may not have been applied.");
// Try to add the column manually as a fallback
info!("Attempting to add missing columns...");
if let Err(e) = sqlx::query("ALTER TABLE documents ADD COLUMN IF NOT EXISTS ocr_error TEXT")
.execute(&db.pool)
.await {
error!("Failed to add ocr_error column: {}", e);
}
if let Err(e) = sqlx::query("ALTER TABLE documents ADD COLUMN IF NOT EXISTS ocr_completed_at TIMESTAMPTZ")
.execute(&db.pool)
.await {
error!("Failed to add ocr_completed_at column: {}", e);
}
info!("Fallback column addition completed");
}
Err(e) => error!("Failed to check for ocr_error column: {}", e),
}
let result = migrations.run(&db.pool).await;
match result {
Ok(_) => info!("SQLx migrations completed successfully"),
@ -113,6 +139,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
.route("/api/health", get(readur::health_check))
.nest("/api/auth", routes::auth::router())
.nest("/api/documents", routes::documents::router())
.nest("/api/metrics", routes::metrics::router())
.nest("/api/queue", routes::queue::router())
.nest("/api/search", routes::search::router())
.nest("/api/settings", routes::settings::router())

View File

@ -64,40 +64,64 @@ pub struct Document {
#[derive(Debug, Serialize, Deserialize, ToSchema)]
pub struct DocumentResponse {
/// Unique identifier for the document
pub id: Uuid,
/// Current filename in the system
pub filename: String,
/// Original filename when uploaded
pub original_filename: String,
/// File size in bytes
pub file_size: i64,
/// MIME type of the file
pub mime_type: String,
/// Tags associated with the document
pub tags: Vec<String>,
/// When the document was created
pub created_at: DateTime<Utc>,
/// Whether OCR text has been extracted
pub has_ocr_text: bool,
/// OCR confidence score (0-100, higher is better)
pub ocr_confidence: Option<f32>,
/// Number of words detected by OCR
pub ocr_word_count: Option<i32>,
/// Time taken for OCR processing in milliseconds
pub ocr_processing_time_ms: Option<i32>,
/// Current status of OCR processing (pending, processing, completed, failed)
pub ocr_status: Option<String>,
}
#[derive(Debug, Serialize, Deserialize, ToSchema, IntoParams)]
pub struct SearchRequest {
/// Search query text (searches both document content and OCR-extracted text)
pub query: String,
/// Filter by specific tags
pub tags: Option<Vec<String>>,
/// Filter by MIME types (e.g., "application/pdf", "image/png")
pub mime_types: Option<Vec<String>>,
/// Maximum number of results to return (default: 25)
pub limit: Option<i64>,
/// Number of results to skip for pagination (default: 0)
pub offset: Option<i64>,
/// Whether to include text snippets with search matches (default: true)
pub include_snippets: Option<bool>,
/// Length of text snippets in characters (default: 200)
pub snippet_length: Option<i32>,
/// Search algorithm to use (default: simple)
pub search_mode: Option<SearchMode>,
}
#[derive(Debug, Serialize, Deserialize, ToSchema)]
pub enum SearchMode {
/// Simple text search with basic word matching
#[serde(rename = "simple")]
Simple,
/// Exact phrase matching
#[serde(rename = "phrase")]
Phrase,
/// Fuzzy search using similarity matching (good for typos and partial matches)
#[serde(rename = "fuzzy")]
Fuzzy,
/// Boolean search with AND, OR, NOT operators
#[serde(rename = "boolean")]
Boolean,
}
@ -110,41 +134,65 @@ impl Default for SearchMode {
#[derive(Debug, Serialize, Deserialize, ToSchema)]
pub struct SearchSnippet {
/// The snippet text content
pub text: String,
/// Starting character position in the original document
pub start_offset: i32,
/// Ending character position in the original document
pub end_offset: i32,
/// Ranges within the snippet that should be highlighted
pub highlight_ranges: Vec<HighlightRange>,
}
#[derive(Debug, Serialize, Deserialize, ToSchema)]
pub struct HighlightRange {
/// Start position of highlight within the snippet
pub start: i32,
/// End position of highlight within the snippet
pub end: i32,
}
#[derive(Debug, Serialize, Deserialize, ToSchema)]
pub struct EnhancedDocumentResponse {
/// Unique identifier for the document
pub id: Uuid,
/// Current filename in the system
pub filename: String,
/// Original filename when uploaded
pub original_filename: String,
/// File size in bytes
pub file_size: i64,
/// MIME type of the file
pub mime_type: String,
/// Tags associated with the document
pub tags: Vec<String>,
/// When the document was created
pub created_at: DateTime<Utc>,
/// Whether OCR text has been extracted
pub has_ocr_text: bool,
/// OCR confidence score (0-100, higher is better)
pub ocr_confidence: Option<f32>,
/// Number of words detected by OCR
pub ocr_word_count: Option<i32>,
/// Time taken for OCR processing in milliseconds
pub ocr_processing_time_ms: Option<i32>,
/// Current status of OCR processing (pending, processing, completed, failed)
pub ocr_status: Option<String>,
/// Search relevance score (0-1, higher is more relevant)
pub search_rank: Option<f32>,
/// Text snippets showing search matches with highlights
pub snippets: Vec<SearchSnippet>,
}
#[derive(Debug, Serialize, Deserialize, ToSchema)]
pub struct SearchResponse {
/// List of matching documents with enhanced metadata and snippets
pub documents: Vec<EnhancedDocumentResponse>,
/// Total number of documents matching the search criteria
pub total: i64,
/// Time taken to execute the search in milliseconds
pub query_time_ms: u64,
/// Search suggestions for query improvement
pub suggestions: Vec<String>,
}

View File

@ -37,12 +37,12 @@ pub fn router() -> Router<Arc<AppState>> {
security(
("bearer_auth" = [])
),
request_body(content = String, description = "Multipart form data with file", content_type = "multipart/form-data"),
request_body(content = String, description = "Multipart form data with file. Supported formats: PDF, PNG, JPG, JPEG, TIFF, BMP, TXT. OCR will be automatically performed on image and PDF files.", content_type = "multipart/form-data"),
responses(
(status = 200, description = "Document uploaded successfully", body = DocumentResponse),
(status = 400, description = "Bad request - invalid file or data"),
(status = 200, description = "Document uploaded successfully. OCR processing will begin automatically if enabled in user settings.", body = DocumentResponse),
(status = 400, description = "Bad request - invalid file type or malformed data"),
(status = 413, description = "Payload too large - file exceeds size limit"),
(status = 401, description = "Unauthorized")
(status = 401, description = "Unauthorized - valid authentication required")
)
)]
async fn upload_document(

267
src/routes/metrics.rs Normal file
View File

@ -0,0 +1,267 @@
use axum::{
extract::State,
http::StatusCode,
response::Json,
routing::get,
Router,
};
use std::sync::Arc;
use serde::Serialize;
use utoipa::ToSchema;
use crate::{auth::AuthUser, AppState};
#[derive(Serialize, ToSchema)]
pub struct SystemMetrics {
pub database: DatabaseMetrics,
pub ocr: OcrMetrics,
pub documents: DocumentMetrics,
pub users: UserMetrics,
pub system: GeneralSystemMetrics,
pub timestamp: i64,
}
#[derive(Serialize, ToSchema)]
pub struct DatabaseMetrics {
pub active_connections: i32,
pub total_queries_today: i64,
pub avg_query_time_ms: f64,
}
#[derive(Serialize, ToSchema)]
pub struct OcrMetrics {
pub pending_jobs: i64,
pub processing_jobs: i64,
pub failed_jobs: i64,
pub completed_today: i64,
pub avg_processing_time_minutes: Option<f64>,
pub queue_depth: i64,
pub oldest_pending_minutes: Option<f64>,
}
#[derive(Serialize, ToSchema)]
pub struct DocumentMetrics {
pub total_documents: i64,
pub documents_uploaded_today: i64,
pub total_storage_bytes: i64,
pub avg_document_size_bytes: f64,
pub documents_with_ocr: i64,
pub documents_without_ocr: i64,
}
#[derive(Serialize, ToSchema)]
pub struct UserMetrics {
pub total_users: i64,
pub active_users_today: i64,
pub new_registrations_today: i64,
}
#[derive(Serialize, ToSchema)]
pub struct GeneralSystemMetrics {
pub uptime_seconds: u64,
pub app_version: String,
pub rust_version: String,
}
pub fn router() -> Router<Arc<AppState>> {
Router::new()
.route("/", get(get_system_metrics))
}
#[utoipa::path(
get,
path = "/api/metrics",
tag = "metrics",
security(
("bearer_auth" = [])
),
responses(
(status = 200, description = "System metrics and monitoring data", body = SystemMetrics),
(status = 401, description = "Unauthorized - valid authentication required"),
(status = 500, description = "Internal server error")
)
)]
pub async fn get_system_metrics(
State(state): State<Arc<AppState>>,
_auth_user: AuthUser, // Require authentication
) -> Result<Json<SystemMetrics>, StatusCode> {
let timestamp = chrono::Utc::now().timestamp();
// Collect all metrics concurrently for better performance
let (database_metrics, ocr_metrics, document_metrics, user_metrics, system_metrics) = tokio::try_join!(
collect_database_metrics(&state),
collect_ocr_metrics(&state),
collect_document_metrics(&state),
collect_user_metrics(&state),
collect_system_metrics()
)?;
let metrics = SystemMetrics {
database: database_metrics,
ocr: ocr_metrics,
documents: document_metrics,
users: user_metrics,
system: system_metrics,
timestamp,
};
Ok(Json(metrics))
}
async fn collect_database_metrics(state: &Arc<AppState>) -> Result<DatabaseMetrics, StatusCode> {
// Get connection pool information
let _pool_info = state.db.pool.options();
let active_connections = state.db.pool.size() as i32;
// For now, use placeholder values for queries
// In production, you might want to implement query tracking
Ok(DatabaseMetrics {
active_connections,
total_queries_today: 0, // Placeholder - would need query tracking
avg_query_time_ms: 0.0, // Placeholder - would need query timing
})
}
async fn collect_ocr_metrics(state: &Arc<AppState>) -> Result<OcrMetrics, StatusCode> {
// Use existing OCR queue statistics
use crate::ocr_queue::OcrQueueService;
let queue_service = OcrQueueService::new(
state.db.clone(),
state.db.pool.clone(),
state.config.concurrent_ocr_jobs
);
let stats = queue_service
.get_stats()
.await
.map_err(|e| {
tracing::error!("Failed to get OCR stats: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
Ok(OcrMetrics {
pending_jobs: stats.pending_count,
processing_jobs: stats.processing_count,
failed_jobs: stats.failed_count,
completed_today: stats.completed_today,
avg_processing_time_minutes: stats.avg_wait_time_minutes,
queue_depth: stats.pending_count + stats.processing_count,
oldest_pending_minutes: stats.oldest_pending_minutes,
})
}
async fn collect_document_metrics(state: &Arc<AppState>) -> Result<DocumentMetrics, StatusCode> {
// Get total document count
let total_docs = sqlx::query_scalar::<_, i64>("SELECT COUNT(*) FROM documents")
.fetch_one(&state.db.pool)
.await
.map_err(|e| {
tracing::error!("Failed to get total document count: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
// Get documents uploaded today
let docs_today = sqlx::query_scalar::<_, i64>(
"SELECT COUNT(*) FROM documents WHERE DATE(created_at) = CURRENT_DATE"
)
.fetch_one(&state.db.pool)
.await
.map_err(|e| {
tracing::error!("Failed to get today's document count: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
// Get total storage size
let total_size = sqlx::query_scalar::<_, Option<i64>>("SELECT SUM(file_size) FROM documents")
.fetch_one(&state.db.pool)
.await
.map_err(|e| {
tracing::error!("Failed to get total storage size: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?
.unwrap_or(0);
// Get documents with and without OCR
let docs_with_ocr = sqlx::query_scalar::<_, i64>(
"SELECT COUNT(*) FROM documents WHERE has_ocr_text = true"
)
.fetch_one(&state.db.pool)
.await
.map_err(|e| {
tracing::error!("Failed to get OCR document count: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
let docs_without_ocr = total_docs - docs_with_ocr;
let avg_size = if total_docs > 0 {
total_size as f64 / total_docs as f64
} else {
0.0
};
Ok(DocumentMetrics {
total_documents: total_docs,
documents_uploaded_today: docs_today,
total_storage_bytes: total_size,
avg_document_size_bytes: avg_size,
documents_with_ocr: docs_with_ocr,
documents_without_ocr: docs_without_ocr,
})
}
async fn collect_user_metrics(state: &Arc<AppState>) -> Result<UserMetrics, StatusCode> {
// Get total user count
let total_users = sqlx::query_scalar::<_, i64>("SELECT COUNT(*) FROM users")
.fetch_one(&state.db.pool)
.await
.map_err(|e| {
tracing::error!("Failed to get total user count: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
// Get new users today
let new_users_today = sqlx::query_scalar::<_, i64>(
"SELECT COUNT(*) FROM users WHERE DATE(created_at) = CURRENT_DATE"
)
.fetch_one(&state.db.pool)
.await
.map_err(|e| {
tracing::error!("Failed to get new user count: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
// For active users, count users who uploaded documents today
let active_users_today = sqlx::query_scalar::<_, i64>(
"SELECT COUNT(DISTINCT user_id) FROM documents WHERE DATE(created_at) = CURRENT_DATE"
)
.fetch_one(&state.db.pool)
.await
.map_err(|e| {
tracing::error!("Failed to get active user count: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
Ok(UserMetrics {
total_users,
active_users_today,
new_registrations_today: new_users_today,
})
}
async fn collect_system_metrics() -> Result<GeneralSystemMetrics, StatusCode> {
// Get application uptime (this is a simplified version)
// In a real application, you'd track the start time
let uptime_seconds = 3600; // Placeholder
// Get version information
let app_version = env!("CARGO_PKG_VERSION").to_string();
let rust_version = std::env::var("RUST_VERSION").unwrap_or_else(|_| "unknown".to_string());
Ok(GeneralSystemMetrics {
uptime_seconds,
app_version,
rust_version,
})
}

View File

@ -1,5 +1,6 @@
pub mod auth;
pub mod documents;
pub mod metrics;
pub mod queue;
pub mod search;
pub mod settings;

View File

@ -23,8 +23,9 @@ pub fn router() -> Router<Arc<AppState>> {
("bearer_auth" = [])
),
responses(
(status = 200, description = "OCR queue statistics"),
(status = 401, description = "Unauthorized")
(status = 200, description = "OCR queue statistics including pending jobs, processing status, and performance metrics"),
(status = 401, description = "Unauthorized - valid authentication required"),
(status = 500, description = "Internal server error")
)
)]
async fn get_queue_stats(

View File

@ -30,8 +30,9 @@ pub fn router() -> Router<Arc<AppState>> {
SearchRequest
),
responses(
(status = 200, description = "Search results", body = SearchResponse),
(status = 401, description = "Unauthorized")
(status = 200, description = "Enhanced search results with relevance ranking, text snippets, and OCR-extracted content matching", body = SearchResponse),
(status = 401, description = "Unauthorized - valid authentication required"),
(status = 500, description = "Internal server error")
)
)]
async fn search_documents(

View File

@ -10,6 +10,9 @@ use crate::{
DocumentResponse, SearchRequest, SearchResponse, EnhancedDocumentResponse,
SettingsResponse, UpdateSettings, SearchMode, SearchSnippet, HighlightRange
},
routes::metrics::{
SystemMetrics, DatabaseMetrics, OcrMetrics, DocumentMetrics, UserMetrics, GeneralSystemMetrics
},
AppState,
};
@ -39,12 +42,15 @@ use crate::{
// Queue endpoints
crate::routes::queue::get_queue_stats,
crate::routes::queue::requeue_failed,
// Metrics endpoints
crate::routes::metrics::get_system_metrics,
),
components(
schemas(
CreateUser, LoginRequest, LoginResponse, UserResponse, UpdateUser,
DocumentResponse, SearchRequest, SearchResponse, EnhancedDocumentResponse,
SettingsResponse, UpdateSettings, SearchMode, SearchSnippet, HighlightRange
SettingsResponse, UpdateSettings, SearchMode, SearchSnippet, HighlightRange,
SystemMetrics, DatabaseMetrics, OcrMetrics, DocumentMetrics, UserMetrics, GeneralSystemMetrics
)
),
tags(
@ -54,6 +60,7 @@ use crate::{
(name = "settings", description = "User settings endpoints"),
(name = "users", description = "User management endpoints"),
(name = "queue", description = "OCR queue management endpoints"),
(name = "metrics", description = "System metrics and monitoring endpoints"),
),
modifiers(&SecurityAddon),
info(

View File

@ -37,6 +37,12 @@ mod tests {
mime_type: "application/pdf".to_string(),
content: Some("Test content".to_string()),
ocr_text: Some("OCR extracted text".to_string()),
ocr_confidence: Some(95.0),
ocr_word_count: Some(10),
ocr_processing_time_ms: Some(800),
ocr_status: Some("completed".to_string()),
ocr_error: None,
ocr_completed_at: Some(Utc::now()),
tags: vec!["test".to_string(), "document".to_string()],
created_at: Utc::now(),
updated_at: Utc::now(),

View File

@ -300,6 +300,10 @@ mod tests {
tags: vec!["test".to_string()],
created_at: now,
has_ocr_text: true,
ocr_confidence: Some(85.5),
ocr_word_count: Some(50),
ocr_processing_time_ms: Some(1500),
ocr_status: Some("completed".to_string()),
search_rank: Some(0.75),
snippets,
};
@ -679,6 +683,10 @@ mod tests {
tags: vec!["test".to_string(), "document".to_string()],
created_at: now,
has_ocr_text: true,
ocr_confidence: Some(92.3),
ocr_word_count: Some(75),
ocr_processing_time_ms: Some(2000),
ocr_status: Some("completed".to_string()),
search_rank: Some(0.85),
snippets: vec![
SearchSnippet {
@ -919,6 +927,12 @@ mod tests {
mime_type: "application/pdf".to_string(),
content: Some("This is a comprehensive test document for enhanced search functionality testing".to_string()),
ocr_text: Some("OCR extracted content with additional test information for search validation".to_string()),
ocr_confidence: Some(88.7),
ocr_word_count: Some(25),
ocr_processing_time_ms: Some(1200),
ocr_status: Some("completed".to_string()),
ocr_error: None,
ocr_completed_at: Some(Utc::now()),
tags: vec!["enhanced".to_string(), "search".to_string(), "test".to_string()],
created_at: Utc::now(),
updated_at: Utc::now(),