feat(server/client): the /documents endpoint works again, and so does the watch folder...kinda
This commit is contained in:
parent
e6e2ba76f5
commit
00b2bfe22c
|
|
@ -12,6 +12,7 @@ import DocumentsPage from './pages/DocumentsPage';
|
|||
import SearchPage from './pages/SearchPage';
|
||||
import DocumentDetailsPage from './pages/DocumentDetailsPage';
|
||||
import SettingsPage from './pages/SettingsPage';
|
||||
import WatchFolderPage from './pages/WatchFolderPage';
|
||||
|
||||
function App() {
|
||||
const { user, loading } = useAuth();
|
||||
|
|
@ -63,7 +64,7 @@ function App() {
|
|||
<Route path="/documents" element={<DocumentsPage />} />
|
||||
<Route path="/documents/:id" element={<DocumentDetailsPage />} />
|
||||
<Route path="/search" element={<SearchPage />} />
|
||||
<Route path="/watch" element={<div>Watch Folder Page - Coming Soon</div>} />
|
||||
<Route path="/watch" element={<WatchFolderPage />} />
|
||||
<Route path="/settings" element={<SettingsPage />} />
|
||||
<Route path="/profile" element={<div>Profile Page - Coming Soon</div>} />
|
||||
</Routes>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,341 @@
|
|||
import React, { useState, useEffect } from 'react';
|
||||
import {
|
||||
Box,
|
||||
Container,
|
||||
Typography,
|
||||
Paper,
|
||||
Card,
|
||||
CardContent,
|
||||
Grid,
|
||||
Chip,
|
||||
LinearProgress,
|
||||
Table,
|
||||
TableBody,
|
||||
TableCell,
|
||||
TableContainer,
|
||||
TableHead,
|
||||
TableRow,
|
||||
Alert,
|
||||
Button,
|
||||
IconButton,
|
||||
} from '@mui/material';
|
||||
import {
|
||||
Refresh as RefreshIcon,
|
||||
Folder as FolderIcon,
|
||||
CheckCircleOutline as CheckCircleIcon,
|
||||
Error as ErrorIcon,
|
||||
Schedule as ScheduleIcon,
|
||||
Visibility as VisibilityIcon,
|
||||
CloudUpload as CloudUploadIcon,
|
||||
Description as DescriptionIcon,
|
||||
} from '@mui/icons-material';
|
||||
import { useTheme } from '@mui/material/styles';
|
||||
import { queueService } from '../services/api';
|
||||
|
||||
const WatchFolderPage = () => {
|
||||
const theme = useTheme();
|
||||
const [queueStats, setQueueStats] = useState(null);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [error, setError] = useState(null);
|
||||
const [lastRefresh, setLastRefresh] = useState(null);
|
||||
|
||||
// Mock configuration data (would typically come from API)
|
||||
const watchConfig = {
|
||||
watchFolder: process.env.REACT_APP_WATCH_FOLDER || './watch',
|
||||
watchInterval: 30,
|
||||
maxFileAge: 24,
|
||||
allowedTypes: ['pdf', 'png', 'jpg', 'jpeg', 'tiff', 'bmp', 'txt', 'doc', 'docx'],
|
||||
isActive: true,
|
||||
strategy: 'hybrid'
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
fetchQueueStats();
|
||||
const interval = setInterval(fetchQueueStats, 30000); // Refresh every 30 seconds
|
||||
return () => clearInterval(interval);
|
||||
}, []);
|
||||
|
||||
const fetchQueueStats = async () => {
|
||||
try {
|
||||
setLoading(true);
|
||||
const response = await queueService.getStats();
|
||||
setQueueStats(response.data);
|
||||
setLastRefresh(new Date());
|
||||
setError(null);
|
||||
} catch (err) {
|
||||
console.error('Error fetching queue stats:', err);
|
||||
setError('Failed to fetch queue statistics');
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
const formatFileSize = (bytes) => {
|
||||
if (bytes === 0) return '0 Bytes';
|
||||
const k = 1024;
|
||||
const sizes = ['Bytes', 'KB', 'MB', 'GB'];
|
||||
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
||||
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
|
||||
};
|
||||
|
||||
const formatDuration = (minutes) => {
|
||||
if (!minutes) return 'N/A';
|
||||
if (minutes < 60) return `${Math.round(minutes)}m`;
|
||||
const hours = Math.floor(minutes / 60);
|
||||
const mins = Math.round(minutes % 60);
|
||||
return `${hours}h ${mins}m`;
|
||||
};
|
||||
|
||||
const getStatusColor = (status) => {
|
||||
switch (status) {
|
||||
case 'active': return 'success';
|
||||
case 'error': return 'error';
|
||||
case 'pending': return 'warning';
|
||||
default: return 'default';
|
||||
}
|
||||
};
|
||||
|
||||
const getStatusIcon = (status) => {
|
||||
switch (status) {
|
||||
case 'active': return <CheckCircleIcon />;
|
||||
case 'error': return <ErrorIcon />;
|
||||
case 'pending': return <ScheduleIcon />;
|
||||
default: return <VisibilityIcon />;
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<Container maxWidth="xl" sx={{ mt: 4, mb: 4 }}>
|
||||
<Box sx={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', mb: 4 }}>
|
||||
<Typography variant="h4" sx={{ fontWeight: 600 }}>
|
||||
Watch Folder
|
||||
</Typography>
|
||||
<Button
|
||||
variant="outlined"
|
||||
startIcon={<RefreshIcon />}
|
||||
onClick={fetchQueueStats}
|
||||
disabled={loading}
|
||||
>
|
||||
Refresh
|
||||
</Button>
|
||||
</Box>
|
||||
|
||||
{error && (
|
||||
<Alert severity="error" sx={{ mb: 3 }}>
|
||||
{error}
|
||||
</Alert>
|
||||
)}
|
||||
|
||||
{/* Watch Folder Configuration */}
|
||||
<Card sx={{ mb: 3 }}>
|
||||
<CardContent>
|
||||
<Typography variant="h6" sx={{ mb: 2, display: 'flex', alignItems: 'center', gap: 1 }}>
|
||||
<FolderIcon color="primary" />
|
||||
Watch Folder Configuration
|
||||
</Typography>
|
||||
<Grid container spacing={2}>
|
||||
<Grid item xs={12} md={6}>
|
||||
<Box sx={{ mb: 2 }}>
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
Watched Directory
|
||||
</Typography>
|
||||
<Typography variant="body1" sx={{ fontFamily: 'monospace', bgcolor: 'grey.100', p: 1, borderRadius: 1 }}>
|
||||
{watchConfig.watchFolder}
|
||||
</Typography>
|
||||
</Box>
|
||||
</Grid>
|
||||
<Grid item xs={12} md={6}>
|
||||
<Box sx={{ mb: 2 }}>
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
Status
|
||||
</Typography>
|
||||
<Chip
|
||||
icon={getStatusIcon(watchConfig.isActive ? 'active' : 'error')}
|
||||
label={watchConfig.isActive ? 'Active' : 'Inactive'}
|
||||
color={getStatusColor(watchConfig.isActive ? 'active' : 'error')}
|
||||
variant="filled"
|
||||
/>
|
||||
</Box>
|
||||
</Grid>
|
||||
<Grid item xs={12} md={4}>
|
||||
<Box sx={{ mb: 2 }}>
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
Watch Strategy
|
||||
</Typography>
|
||||
<Typography variant="body1" sx={{ textTransform: 'capitalize' }}>
|
||||
{watchConfig.strategy}
|
||||
</Typography>
|
||||
</Box>
|
||||
</Grid>
|
||||
<Grid item xs={12} md={4}>
|
||||
<Box sx={{ mb: 2 }}>
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
Scan Interval
|
||||
</Typography>
|
||||
<Typography variant="body1">
|
||||
{watchConfig.watchInterval} seconds
|
||||
</Typography>
|
||||
</Box>
|
||||
</Grid>
|
||||
<Grid item xs={12} md={4}>
|
||||
<Box sx={{ mb: 2 }}>
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
Max File Age
|
||||
</Typography>
|
||||
<Typography variant="body1">
|
||||
{watchConfig.maxFileAge} hours
|
||||
</Typography>
|
||||
</Box>
|
||||
</Grid>
|
||||
<Grid item xs={12}>
|
||||
<Box sx={{ mb: 2 }}>
|
||||
<Typography variant="body2" color="text.secondary" sx={{ mb: 1 }}>
|
||||
Supported File Types
|
||||
</Typography>
|
||||
<Box sx={{ display: 'flex', flexWrap: 'wrap', gap: 0.5 }}>
|
||||
{watchConfig.allowedTypes.map((type) => (
|
||||
<Chip
|
||||
key={type}
|
||||
label={`.${type}`}
|
||||
size="small"
|
||||
variant="outlined"
|
||||
color="primary"
|
||||
/>
|
||||
))}
|
||||
</Box>
|
||||
</Box>
|
||||
</Grid>
|
||||
</Grid>
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
||||
{/* Queue Statistics */}
|
||||
{queueStats && (
|
||||
<Card sx={{ mb: 3 }}>
|
||||
<CardContent>
|
||||
<Typography variant="h6" sx={{ mb: 2, display: 'flex', alignItems: 'center', gap: 1 }}>
|
||||
<CloudUploadIcon color="primary" />
|
||||
Processing Queue
|
||||
</Typography>
|
||||
<Grid container spacing={2}>
|
||||
<Grid item xs={12} sm={6} md={3}>
|
||||
<Box sx={{ textAlign: 'center', p: 2, bgcolor: 'info.light', borderRadius: 2 }}>
|
||||
<Typography variant="h4" sx={{ fontWeight: 600, color: 'info.dark' }}>
|
||||
{queueStats.pending_count}
|
||||
</Typography>
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
Pending
|
||||
</Typography>
|
||||
</Box>
|
||||
</Grid>
|
||||
<Grid item xs={12} sm={6} md={3}>
|
||||
<Box sx={{ textAlign: 'center', p: 2, bgcolor: 'warning.light', borderRadius: 2 }}>
|
||||
<Typography variant="h4" sx={{ fontWeight: 600, color: 'warning.dark' }}>
|
||||
{queueStats.processing_count}
|
||||
</Typography>
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
Processing
|
||||
</Typography>
|
||||
</Box>
|
||||
</Grid>
|
||||
<Grid item xs={12} sm={6} md={3}>
|
||||
<Box sx={{ textAlign: 'center', p: 2, bgcolor: 'error.light', borderRadius: 2 }}>
|
||||
<Typography variant="h4" sx={{ fontWeight: 600, color: 'error.dark' }}>
|
||||
{queueStats.failed_count}
|
||||
</Typography>
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
Failed
|
||||
</Typography>
|
||||
</Box>
|
||||
</Grid>
|
||||
<Grid item xs={12} sm={6} md={3}>
|
||||
<Box sx={{ textAlign: 'center', p: 2, bgcolor: 'success.light', borderRadius: 2 }}>
|
||||
<Typography variant="h4" sx={{ fontWeight: 600, color: 'success.dark' }}>
|
||||
{queueStats.completed_today}
|
||||
</Typography>
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
Completed Today
|
||||
</Typography>
|
||||
</Box>
|
||||
</Grid>
|
||||
</Grid>
|
||||
|
||||
<Grid container spacing={2} sx={{ mt: 2 }}>
|
||||
<Grid item xs={12} md={6}>
|
||||
<Box sx={{ p: 2, bgcolor: 'grey.50', borderRadius: 2 }}>
|
||||
<Typography variant="body2" color="text.secondary" sx={{ mb: 1 }}>
|
||||
Average Wait Time
|
||||
</Typography>
|
||||
<Typography variant="h6">
|
||||
{formatDuration(queueStats.avg_wait_time_minutes)}
|
||||
</Typography>
|
||||
</Box>
|
||||
</Grid>
|
||||
<Grid item xs={12} md={6}>
|
||||
<Box sx={{ p: 2, bgcolor: 'grey.50', borderRadius: 2 }}>
|
||||
<Typography variant="body2" color="text.secondary" sx={{ mb: 1 }}>
|
||||
Oldest Pending Item
|
||||
</Typography>
|
||||
<Typography variant="h6">
|
||||
{formatDuration(queueStats.oldest_pending_minutes)}
|
||||
</Typography>
|
||||
</Box>
|
||||
</Grid>
|
||||
</Grid>
|
||||
|
||||
{lastRefresh && (
|
||||
<Typography variant="caption" color="text.secondary" sx={{ mt: 2, display: 'block' }}>
|
||||
Last updated: {lastRefresh.toLocaleTimeString()}
|
||||
</Typography>
|
||||
)}
|
||||
</CardContent>
|
||||
</Card>
|
||||
)}
|
||||
|
||||
{/* Processing Information */}
|
||||
<Card>
|
||||
<CardContent>
|
||||
<Typography variant="h6" sx={{ mb: 2, display: 'flex', alignItems: 'center', gap: 1 }}>
|
||||
<DescriptionIcon color="primary" />
|
||||
How Watch Folder Works
|
||||
</Typography>
|
||||
<Typography variant="body1" sx={{ mb: 2 }}>
|
||||
The watch folder system automatically monitors the configured directory for new files and processes them for OCR.
|
||||
</Typography>
|
||||
|
||||
<Box sx={{ mb: 3 }}>
|
||||
<Typography variant="subtitle2" sx={{ mb: 1, color: 'primary.main' }}>
|
||||
Processing Pipeline:
|
||||
</Typography>
|
||||
<Box sx={{ pl: 2 }}>
|
||||
<Typography variant="body2" sx={{ mb: 0.5 }}>
|
||||
1. <strong>File Detection:</strong> New files are detected using hybrid watching (inotify + polling)
|
||||
</Typography>
|
||||
<Typography variant="body2" sx={{ mb: 0.5 }}>
|
||||
2. <strong>Validation:</strong> Files are checked for supported format and size limits
|
||||
</Typography>
|
||||
<Typography variant="body2" sx={{ mb: 0.5 }}>
|
||||
3. <strong>Deduplication:</strong> System prevents processing of duplicate files
|
||||
</Typography>
|
||||
<Typography variant="body2" sx={{ mb: 0.5 }}>
|
||||
4. <strong>Storage:</strong> Files are moved to the document storage system
|
||||
</Typography>
|
||||
<Typography variant="body2" sx={{ mb: 0.5 }}>
|
||||
5. <strong>OCR Queue:</strong> Documents are queued for OCR processing with priority
|
||||
</Typography>
|
||||
</Box>
|
||||
</Box>
|
||||
|
||||
<Alert severity="info" sx={{ mt: 2 }}>
|
||||
<Typography variant="body2">
|
||||
The system uses a hybrid watching strategy that automatically detects filesystem type and chooses
|
||||
the optimal monitoring approach (inotify for local filesystems, polling for network mounts).
|
||||
</Typography>
|
||||
</Alert>
|
||||
</CardContent>
|
||||
</Card>
|
||||
</Container>
|
||||
);
|
||||
};
|
||||
|
||||
export default WatchFolderPage;
|
||||
|
|
@ -72,6 +72,15 @@ export interface SearchResponse {
|
|||
suggestions: string[]
|
||||
}
|
||||
|
||||
export interface QueueStats {
|
||||
pending_count: number
|
||||
processing_count: number
|
||||
failed_count: number
|
||||
completed_today: number
|
||||
avg_wait_time_minutes?: number
|
||||
oldest_pending_minutes?: number
|
||||
}
|
||||
|
||||
export const documentService = {
|
||||
upload: (file: File) => {
|
||||
const formData = new FormData()
|
||||
|
|
@ -111,4 +120,14 @@ export const documentService = {
|
|||
},
|
||||
})
|
||||
},
|
||||
}
|
||||
|
||||
export const queueService = {
|
||||
getStats: () => {
|
||||
return api.get<QueueStats>('/queue/stats')
|
||||
},
|
||||
|
||||
requeueFailed: () => {
|
||||
return api.post('/queue/requeue-failed')
|
||||
},
|
||||
}
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
-- Add missing OCR columns to documents table for existing databases
|
||||
ALTER TABLE documents ADD COLUMN IF NOT EXISTS ocr_error TEXT;
|
||||
ALTER TABLE documents ADD COLUMN IF NOT EXISTS ocr_completed_at TIMESTAMPTZ;
|
||||
10
src/db.rs
10
src/db.rs
|
|
@ -373,7 +373,7 @@ impl Database {
|
|||
pub async fn get_documents_by_user(&self, user_id: Uuid, limit: i64, offset: i64) -> Result<Vec<Document>> {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, tags, created_at, updated_at, user_id
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id
|
||||
FROM documents
|
||||
WHERE user_id = $1
|
||||
ORDER BY created_at DESC
|
||||
|
|
@ -416,7 +416,7 @@ impl Database {
|
|||
pub async fn find_documents_by_filename(&self, filename: &str) -> Result<Vec<Document>> {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, tags, created_at, updated_at, user_id
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id
|
||||
FROM documents
|
||||
WHERE filename = $1 OR original_filename = $1
|
||||
ORDER BY created_at DESC
|
||||
|
|
@ -456,7 +456,7 @@ impl Database {
|
|||
pub async fn search_documents(&self, user_id: Uuid, search: SearchRequest) -> Result<(Vec<Document>, i64)> {
|
||||
let mut query_builder = sqlx::QueryBuilder::new(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, tags, created_at, updated_at, user_id,
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id,
|
||||
ts_rank(to_tsvector('english', COALESCE(content, '') || ' ' || COALESCE(ocr_text, '')), plainto_tsquery('english', "#
|
||||
);
|
||||
|
||||
|
|
@ -550,7 +550,7 @@ impl Database {
|
|||
// Use trigram similarity for substring matching
|
||||
let mut builder = sqlx::QueryBuilder::new(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, tags, created_at, updated_at, user_id,
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id,
|
||||
GREATEST(
|
||||
similarity(filename, "#
|
||||
);
|
||||
|
|
@ -589,7 +589,7 @@ impl Database {
|
|||
|
||||
let mut builder = sqlx::QueryBuilder::new(&format!(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, tags, created_at, updated_at, user_id,
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id,
|
||||
GREATEST(
|
||||
CASE WHEN filename ILIKE '%' || "#
|
||||
));
|
||||
|
|
|
|||
|
|
@ -559,6 +559,7 @@ impl EnhancedOcrService {
|
|||
}
|
||||
|
||||
/// Validate OCR result quality
|
||||
#[cfg(feature = "ocr")]
|
||||
pub fn validate_ocr_quality(&self, result: &OcrResult, settings: &Settings) -> bool {
|
||||
// Check minimum confidence threshold
|
||||
if result.confidence < settings.ocr_min_confidence {
|
||||
|
|
|
|||
27
src/main.rs
27
src/main.rs
|
|
@ -73,6 +73,32 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||
}
|
||||
}
|
||||
|
||||
// Check if ocr_error column exists
|
||||
let check_column = sqlx::query("SELECT column_name FROM information_schema.columns WHERE table_name = 'documents' AND column_name = 'ocr_error'")
|
||||
.fetch_optional(&db.pool)
|
||||
.await;
|
||||
|
||||
match check_column {
|
||||
Ok(Some(_)) => info!("✅ ocr_error column exists"),
|
||||
Ok(None) => {
|
||||
error!("❌ ocr_error column is missing! Migration 006 may not have been applied.");
|
||||
// Try to add the column manually as a fallback
|
||||
info!("Attempting to add missing columns...");
|
||||
if let Err(e) = sqlx::query("ALTER TABLE documents ADD COLUMN IF NOT EXISTS ocr_error TEXT")
|
||||
.execute(&db.pool)
|
||||
.await {
|
||||
error!("Failed to add ocr_error column: {}", e);
|
||||
}
|
||||
if let Err(e) = sqlx::query("ALTER TABLE documents ADD COLUMN IF NOT EXISTS ocr_completed_at TIMESTAMPTZ")
|
||||
.execute(&db.pool)
|
||||
.await {
|
||||
error!("Failed to add ocr_completed_at column: {}", e);
|
||||
}
|
||||
info!("Fallback column addition completed");
|
||||
}
|
||||
Err(e) => error!("Failed to check for ocr_error column: {}", e),
|
||||
}
|
||||
|
||||
let result = migrations.run(&db.pool).await;
|
||||
match result {
|
||||
Ok(_) => info!("SQLx migrations completed successfully"),
|
||||
|
|
@ -113,6 +139,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||
.route("/api/health", get(readur::health_check))
|
||||
.nest("/api/auth", routes::auth::router())
|
||||
.nest("/api/documents", routes::documents::router())
|
||||
.nest("/api/metrics", routes::metrics::router())
|
||||
.nest("/api/queue", routes::queue::router())
|
||||
.nest("/api/search", routes::search::router())
|
||||
.nest("/api/settings", routes::settings::router())
|
||||
|
|
|
|||
|
|
@ -64,40 +64,64 @@ pub struct Document {
|
|||
|
||||
#[derive(Debug, Serialize, Deserialize, ToSchema)]
|
||||
pub struct DocumentResponse {
|
||||
/// Unique identifier for the document
|
||||
pub id: Uuid,
|
||||
/// Current filename in the system
|
||||
pub filename: String,
|
||||
/// Original filename when uploaded
|
||||
pub original_filename: String,
|
||||
/// File size in bytes
|
||||
pub file_size: i64,
|
||||
/// MIME type of the file
|
||||
pub mime_type: String,
|
||||
/// Tags associated with the document
|
||||
pub tags: Vec<String>,
|
||||
/// When the document was created
|
||||
pub created_at: DateTime<Utc>,
|
||||
/// Whether OCR text has been extracted
|
||||
pub has_ocr_text: bool,
|
||||
/// OCR confidence score (0-100, higher is better)
|
||||
pub ocr_confidence: Option<f32>,
|
||||
/// Number of words detected by OCR
|
||||
pub ocr_word_count: Option<i32>,
|
||||
/// Time taken for OCR processing in milliseconds
|
||||
pub ocr_processing_time_ms: Option<i32>,
|
||||
/// Current status of OCR processing (pending, processing, completed, failed)
|
||||
pub ocr_status: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, ToSchema, IntoParams)]
|
||||
pub struct SearchRequest {
|
||||
/// Search query text (searches both document content and OCR-extracted text)
|
||||
pub query: String,
|
||||
/// Filter by specific tags
|
||||
pub tags: Option<Vec<String>>,
|
||||
/// Filter by MIME types (e.g., "application/pdf", "image/png")
|
||||
pub mime_types: Option<Vec<String>>,
|
||||
/// Maximum number of results to return (default: 25)
|
||||
pub limit: Option<i64>,
|
||||
/// Number of results to skip for pagination (default: 0)
|
||||
pub offset: Option<i64>,
|
||||
/// Whether to include text snippets with search matches (default: true)
|
||||
pub include_snippets: Option<bool>,
|
||||
/// Length of text snippets in characters (default: 200)
|
||||
pub snippet_length: Option<i32>,
|
||||
/// Search algorithm to use (default: simple)
|
||||
pub search_mode: Option<SearchMode>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, ToSchema)]
|
||||
pub enum SearchMode {
|
||||
/// Simple text search with basic word matching
|
||||
#[serde(rename = "simple")]
|
||||
Simple,
|
||||
/// Exact phrase matching
|
||||
#[serde(rename = "phrase")]
|
||||
Phrase,
|
||||
/// Fuzzy search using similarity matching (good for typos and partial matches)
|
||||
#[serde(rename = "fuzzy")]
|
||||
Fuzzy,
|
||||
/// Boolean search with AND, OR, NOT operators
|
||||
#[serde(rename = "boolean")]
|
||||
Boolean,
|
||||
}
|
||||
|
|
@ -110,41 +134,65 @@ impl Default for SearchMode {
|
|||
|
||||
#[derive(Debug, Serialize, Deserialize, ToSchema)]
|
||||
pub struct SearchSnippet {
|
||||
/// The snippet text content
|
||||
pub text: String,
|
||||
/// Starting character position in the original document
|
||||
pub start_offset: i32,
|
||||
/// Ending character position in the original document
|
||||
pub end_offset: i32,
|
||||
/// Ranges within the snippet that should be highlighted
|
||||
pub highlight_ranges: Vec<HighlightRange>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, ToSchema)]
|
||||
pub struct HighlightRange {
|
||||
/// Start position of highlight within the snippet
|
||||
pub start: i32,
|
||||
/// End position of highlight within the snippet
|
||||
pub end: i32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, ToSchema)]
|
||||
pub struct EnhancedDocumentResponse {
|
||||
/// Unique identifier for the document
|
||||
pub id: Uuid,
|
||||
/// Current filename in the system
|
||||
pub filename: String,
|
||||
/// Original filename when uploaded
|
||||
pub original_filename: String,
|
||||
/// File size in bytes
|
||||
pub file_size: i64,
|
||||
/// MIME type of the file
|
||||
pub mime_type: String,
|
||||
/// Tags associated with the document
|
||||
pub tags: Vec<String>,
|
||||
/// When the document was created
|
||||
pub created_at: DateTime<Utc>,
|
||||
/// Whether OCR text has been extracted
|
||||
pub has_ocr_text: bool,
|
||||
/// OCR confidence score (0-100, higher is better)
|
||||
pub ocr_confidence: Option<f32>,
|
||||
/// Number of words detected by OCR
|
||||
pub ocr_word_count: Option<i32>,
|
||||
/// Time taken for OCR processing in milliseconds
|
||||
pub ocr_processing_time_ms: Option<i32>,
|
||||
/// Current status of OCR processing (pending, processing, completed, failed)
|
||||
pub ocr_status: Option<String>,
|
||||
/// Search relevance score (0-1, higher is more relevant)
|
||||
pub search_rank: Option<f32>,
|
||||
/// Text snippets showing search matches with highlights
|
||||
pub snippets: Vec<SearchSnippet>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, ToSchema)]
|
||||
pub struct SearchResponse {
|
||||
/// List of matching documents with enhanced metadata and snippets
|
||||
pub documents: Vec<EnhancedDocumentResponse>,
|
||||
/// Total number of documents matching the search criteria
|
||||
pub total: i64,
|
||||
/// Time taken to execute the search in milliseconds
|
||||
pub query_time_ms: u64,
|
||||
/// Search suggestions for query improvement
|
||||
pub suggestions: Vec<String>,
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -37,12 +37,12 @@ pub fn router() -> Router<Arc<AppState>> {
|
|||
security(
|
||||
("bearer_auth" = [])
|
||||
),
|
||||
request_body(content = String, description = "Multipart form data with file", content_type = "multipart/form-data"),
|
||||
request_body(content = String, description = "Multipart form data with file. Supported formats: PDF, PNG, JPG, JPEG, TIFF, BMP, TXT. OCR will be automatically performed on image and PDF files.", content_type = "multipart/form-data"),
|
||||
responses(
|
||||
(status = 200, description = "Document uploaded successfully", body = DocumentResponse),
|
||||
(status = 400, description = "Bad request - invalid file or data"),
|
||||
(status = 200, description = "Document uploaded successfully. OCR processing will begin automatically if enabled in user settings.", body = DocumentResponse),
|
||||
(status = 400, description = "Bad request - invalid file type or malformed data"),
|
||||
(status = 413, description = "Payload too large - file exceeds size limit"),
|
||||
(status = 401, description = "Unauthorized")
|
||||
(status = 401, description = "Unauthorized - valid authentication required")
|
||||
)
|
||||
)]
|
||||
async fn upload_document(
|
||||
|
|
|
|||
|
|
@ -0,0 +1,267 @@
|
|||
use axum::{
|
||||
extract::State,
|
||||
http::StatusCode,
|
||||
response::Json,
|
||||
routing::get,
|
||||
Router,
|
||||
};
|
||||
use std::sync::Arc;
|
||||
use serde::Serialize;
|
||||
use utoipa::ToSchema;
|
||||
|
||||
use crate::{auth::AuthUser, AppState};
|
||||
|
||||
#[derive(Serialize, ToSchema)]
|
||||
pub struct SystemMetrics {
|
||||
pub database: DatabaseMetrics,
|
||||
pub ocr: OcrMetrics,
|
||||
pub documents: DocumentMetrics,
|
||||
pub users: UserMetrics,
|
||||
pub system: GeneralSystemMetrics,
|
||||
pub timestamp: i64,
|
||||
}
|
||||
|
||||
#[derive(Serialize, ToSchema)]
|
||||
pub struct DatabaseMetrics {
|
||||
pub active_connections: i32,
|
||||
pub total_queries_today: i64,
|
||||
pub avg_query_time_ms: f64,
|
||||
}
|
||||
|
||||
#[derive(Serialize, ToSchema)]
|
||||
pub struct OcrMetrics {
|
||||
pub pending_jobs: i64,
|
||||
pub processing_jobs: i64,
|
||||
pub failed_jobs: i64,
|
||||
pub completed_today: i64,
|
||||
pub avg_processing_time_minutes: Option<f64>,
|
||||
pub queue_depth: i64,
|
||||
pub oldest_pending_minutes: Option<f64>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, ToSchema)]
|
||||
pub struct DocumentMetrics {
|
||||
pub total_documents: i64,
|
||||
pub documents_uploaded_today: i64,
|
||||
pub total_storage_bytes: i64,
|
||||
pub avg_document_size_bytes: f64,
|
||||
pub documents_with_ocr: i64,
|
||||
pub documents_without_ocr: i64,
|
||||
}
|
||||
|
||||
#[derive(Serialize, ToSchema)]
|
||||
pub struct UserMetrics {
|
||||
pub total_users: i64,
|
||||
pub active_users_today: i64,
|
||||
pub new_registrations_today: i64,
|
||||
}
|
||||
|
||||
#[derive(Serialize, ToSchema)]
|
||||
pub struct GeneralSystemMetrics {
|
||||
pub uptime_seconds: u64,
|
||||
pub app_version: String,
|
||||
pub rust_version: String,
|
||||
}
|
||||
|
||||
pub fn router() -> Router<Arc<AppState>> {
|
||||
Router::new()
|
||||
.route("/", get(get_system_metrics))
|
||||
}
|
||||
|
||||
#[utoipa::path(
|
||||
get,
|
||||
path = "/api/metrics",
|
||||
tag = "metrics",
|
||||
security(
|
||||
("bearer_auth" = [])
|
||||
),
|
||||
responses(
|
||||
(status = 200, description = "System metrics and monitoring data", body = SystemMetrics),
|
||||
(status = 401, description = "Unauthorized - valid authentication required"),
|
||||
(status = 500, description = "Internal server error")
|
||||
)
|
||||
)]
|
||||
pub async fn get_system_metrics(
|
||||
State(state): State<Arc<AppState>>,
|
||||
_auth_user: AuthUser, // Require authentication
|
||||
) -> Result<Json<SystemMetrics>, StatusCode> {
|
||||
let timestamp = chrono::Utc::now().timestamp();
|
||||
|
||||
// Collect all metrics concurrently for better performance
|
||||
let (database_metrics, ocr_metrics, document_metrics, user_metrics, system_metrics) = tokio::try_join!(
|
||||
collect_database_metrics(&state),
|
||||
collect_ocr_metrics(&state),
|
||||
collect_document_metrics(&state),
|
||||
collect_user_metrics(&state),
|
||||
collect_system_metrics()
|
||||
)?;
|
||||
|
||||
let metrics = SystemMetrics {
|
||||
database: database_metrics,
|
||||
ocr: ocr_metrics,
|
||||
documents: document_metrics,
|
||||
users: user_metrics,
|
||||
system: system_metrics,
|
||||
timestamp,
|
||||
};
|
||||
|
||||
Ok(Json(metrics))
|
||||
}
|
||||
|
||||
async fn collect_database_metrics(state: &Arc<AppState>) -> Result<DatabaseMetrics, StatusCode> {
|
||||
// Get connection pool information
|
||||
let _pool_info = state.db.pool.options();
|
||||
let active_connections = state.db.pool.size() as i32;
|
||||
|
||||
// For now, use placeholder values for queries
|
||||
// In production, you might want to implement query tracking
|
||||
Ok(DatabaseMetrics {
|
||||
active_connections,
|
||||
total_queries_today: 0, // Placeholder - would need query tracking
|
||||
avg_query_time_ms: 0.0, // Placeholder - would need query timing
|
||||
})
|
||||
}
|
||||
|
||||
async fn collect_ocr_metrics(state: &Arc<AppState>) -> Result<OcrMetrics, StatusCode> {
|
||||
// Use existing OCR queue statistics
|
||||
use crate::ocr_queue::OcrQueueService;
|
||||
|
||||
let queue_service = OcrQueueService::new(
|
||||
state.db.clone(),
|
||||
state.db.pool.clone(),
|
||||
state.config.concurrent_ocr_jobs
|
||||
);
|
||||
|
||||
let stats = queue_service
|
||||
.get_stats()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("Failed to get OCR stats: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
Ok(OcrMetrics {
|
||||
pending_jobs: stats.pending_count,
|
||||
processing_jobs: stats.processing_count,
|
||||
failed_jobs: stats.failed_count,
|
||||
completed_today: stats.completed_today,
|
||||
avg_processing_time_minutes: stats.avg_wait_time_minutes,
|
||||
queue_depth: stats.pending_count + stats.processing_count,
|
||||
oldest_pending_minutes: stats.oldest_pending_minutes,
|
||||
})
|
||||
}
|
||||
|
||||
async fn collect_document_metrics(state: &Arc<AppState>) -> Result<DocumentMetrics, StatusCode> {
|
||||
// Get total document count
|
||||
let total_docs = sqlx::query_scalar::<_, i64>("SELECT COUNT(*) FROM documents")
|
||||
.fetch_one(&state.db.pool)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("Failed to get total document count: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
// Get documents uploaded today
|
||||
let docs_today = sqlx::query_scalar::<_, i64>(
|
||||
"SELECT COUNT(*) FROM documents WHERE DATE(created_at) = CURRENT_DATE"
|
||||
)
|
||||
.fetch_one(&state.db.pool)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("Failed to get today's document count: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
// Get total storage size
|
||||
let total_size = sqlx::query_scalar::<_, Option<i64>>("SELECT SUM(file_size) FROM documents")
|
||||
.fetch_one(&state.db.pool)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("Failed to get total storage size: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?
|
||||
.unwrap_or(0);
|
||||
|
||||
// Get documents with and without OCR
|
||||
let docs_with_ocr = sqlx::query_scalar::<_, i64>(
|
||||
"SELECT COUNT(*) FROM documents WHERE has_ocr_text = true"
|
||||
)
|
||||
.fetch_one(&state.db.pool)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("Failed to get OCR document count: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
let docs_without_ocr = total_docs - docs_with_ocr;
|
||||
|
||||
let avg_size = if total_docs > 0 {
|
||||
total_size as f64 / total_docs as f64
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
Ok(DocumentMetrics {
|
||||
total_documents: total_docs,
|
||||
documents_uploaded_today: docs_today,
|
||||
total_storage_bytes: total_size,
|
||||
avg_document_size_bytes: avg_size,
|
||||
documents_with_ocr: docs_with_ocr,
|
||||
documents_without_ocr: docs_without_ocr,
|
||||
})
|
||||
}
|
||||
|
||||
async fn collect_user_metrics(state: &Arc<AppState>) -> Result<UserMetrics, StatusCode> {
|
||||
// Get total user count
|
||||
let total_users = sqlx::query_scalar::<_, i64>("SELECT COUNT(*) FROM users")
|
||||
.fetch_one(&state.db.pool)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("Failed to get total user count: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
// Get new users today
|
||||
let new_users_today = sqlx::query_scalar::<_, i64>(
|
||||
"SELECT COUNT(*) FROM users WHERE DATE(created_at) = CURRENT_DATE"
|
||||
)
|
||||
.fetch_one(&state.db.pool)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("Failed to get new user count: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
// For active users, count users who uploaded documents today
|
||||
let active_users_today = sqlx::query_scalar::<_, i64>(
|
||||
"SELECT COUNT(DISTINCT user_id) FROM documents WHERE DATE(created_at) = CURRENT_DATE"
|
||||
)
|
||||
.fetch_one(&state.db.pool)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("Failed to get active user count: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
Ok(UserMetrics {
|
||||
total_users,
|
||||
active_users_today,
|
||||
new_registrations_today: new_users_today,
|
||||
})
|
||||
}
|
||||
|
||||
async fn collect_system_metrics() -> Result<GeneralSystemMetrics, StatusCode> {
|
||||
// Get application uptime (this is a simplified version)
|
||||
// In a real application, you'd track the start time
|
||||
let uptime_seconds = 3600; // Placeholder
|
||||
|
||||
// Get version information
|
||||
let app_version = env!("CARGO_PKG_VERSION").to_string();
|
||||
let rust_version = std::env::var("RUST_VERSION").unwrap_or_else(|_| "unknown".to_string());
|
||||
|
||||
Ok(GeneralSystemMetrics {
|
||||
uptime_seconds,
|
||||
app_version,
|
||||
rust_version,
|
||||
})
|
||||
}
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
pub mod auth;
|
||||
pub mod documents;
|
||||
pub mod metrics;
|
||||
pub mod queue;
|
||||
pub mod search;
|
||||
pub mod settings;
|
||||
|
|
|
|||
|
|
@ -23,8 +23,9 @@ pub fn router() -> Router<Arc<AppState>> {
|
|||
("bearer_auth" = [])
|
||||
),
|
||||
responses(
|
||||
(status = 200, description = "OCR queue statistics"),
|
||||
(status = 401, description = "Unauthorized")
|
||||
(status = 200, description = "OCR queue statistics including pending jobs, processing status, and performance metrics"),
|
||||
(status = 401, description = "Unauthorized - valid authentication required"),
|
||||
(status = 500, description = "Internal server error")
|
||||
)
|
||||
)]
|
||||
async fn get_queue_stats(
|
||||
|
|
|
|||
|
|
@ -30,8 +30,9 @@ pub fn router() -> Router<Arc<AppState>> {
|
|||
SearchRequest
|
||||
),
|
||||
responses(
|
||||
(status = 200, description = "Search results", body = SearchResponse),
|
||||
(status = 401, description = "Unauthorized")
|
||||
(status = 200, description = "Enhanced search results with relevance ranking, text snippets, and OCR-extracted content matching", body = SearchResponse),
|
||||
(status = 401, description = "Unauthorized - valid authentication required"),
|
||||
(status = 500, description = "Internal server error")
|
||||
)
|
||||
)]
|
||||
async fn search_documents(
|
||||
|
|
|
|||
|
|
@ -10,6 +10,9 @@ use crate::{
|
|||
DocumentResponse, SearchRequest, SearchResponse, EnhancedDocumentResponse,
|
||||
SettingsResponse, UpdateSettings, SearchMode, SearchSnippet, HighlightRange
|
||||
},
|
||||
routes::metrics::{
|
||||
SystemMetrics, DatabaseMetrics, OcrMetrics, DocumentMetrics, UserMetrics, GeneralSystemMetrics
|
||||
},
|
||||
AppState,
|
||||
};
|
||||
|
||||
|
|
@ -39,12 +42,15 @@ use crate::{
|
|||
// Queue endpoints
|
||||
crate::routes::queue::get_queue_stats,
|
||||
crate::routes::queue::requeue_failed,
|
||||
// Metrics endpoints
|
||||
crate::routes::metrics::get_system_metrics,
|
||||
),
|
||||
components(
|
||||
schemas(
|
||||
CreateUser, LoginRequest, LoginResponse, UserResponse, UpdateUser,
|
||||
DocumentResponse, SearchRequest, SearchResponse, EnhancedDocumentResponse,
|
||||
SettingsResponse, UpdateSettings, SearchMode, SearchSnippet, HighlightRange
|
||||
SettingsResponse, UpdateSettings, SearchMode, SearchSnippet, HighlightRange,
|
||||
SystemMetrics, DatabaseMetrics, OcrMetrics, DocumentMetrics, UserMetrics, GeneralSystemMetrics
|
||||
)
|
||||
),
|
||||
tags(
|
||||
|
|
@ -54,6 +60,7 @@ use crate::{
|
|||
(name = "settings", description = "User settings endpoints"),
|
||||
(name = "users", description = "User management endpoints"),
|
||||
(name = "queue", description = "OCR queue management endpoints"),
|
||||
(name = "metrics", description = "System metrics and monitoring endpoints"),
|
||||
),
|
||||
modifiers(&SecurityAddon),
|
||||
info(
|
||||
|
|
|
|||
|
|
@ -37,6 +37,12 @@ mod tests {
|
|||
mime_type: "application/pdf".to_string(),
|
||||
content: Some("Test content".to_string()),
|
||||
ocr_text: Some("OCR extracted text".to_string()),
|
||||
ocr_confidence: Some(95.0),
|
||||
ocr_word_count: Some(10),
|
||||
ocr_processing_time_ms: Some(800),
|
||||
ocr_status: Some("completed".to_string()),
|
||||
ocr_error: None,
|
||||
ocr_completed_at: Some(Utc::now()),
|
||||
tags: vec!["test".to_string(), "document".to_string()],
|
||||
created_at: Utc::now(),
|
||||
updated_at: Utc::now(),
|
||||
|
|
|
|||
|
|
@ -300,6 +300,10 @@ mod tests {
|
|||
tags: vec!["test".to_string()],
|
||||
created_at: now,
|
||||
has_ocr_text: true,
|
||||
ocr_confidence: Some(85.5),
|
||||
ocr_word_count: Some(50),
|
||||
ocr_processing_time_ms: Some(1500),
|
||||
ocr_status: Some("completed".to_string()),
|
||||
search_rank: Some(0.75),
|
||||
snippets,
|
||||
};
|
||||
|
|
@ -679,6 +683,10 @@ mod tests {
|
|||
tags: vec!["test".to_string(), "document".to_string()],
|
||||
created_at: now,
|
||||
has_ocr_text: true,
|
||||
ocr_confidence: Some(92.3),
|
||||
ocr_word_count: Some(75),
|
||||
ocr_processing_time_ms: Some(2000),
|
||||
ocr_status: Some("completed".to_string()),
|
||||
search_rank: Some(0.85),
|
||||
snippets: vec![
|
||||
SearchSnippet {
|
||||
|
|
@ -919,6 +927,12 @@ mod tests {
|
|||
mime_type: "application/pdf".to_string(),
|
||||
content: Some("This is a comprehensive test document for enhanced search functionality testing".to_string()),
|
||||
ocr_text: Some("OCR extracted content with additional test information for search validation".to_string()),
|
||||
ocr_confidence: Some(88.7),
|
||||
ocr_word_count: Some(25),
|
||||
ocr_processing_time_ms: Some(1200),
|
||||
ocr_status: Some("completed".to_string()),
|
||||
ocr_error: None,
|
||||
ocr_completed_at: Some(Utc::now()),
|
||||
tags: vec!["enhanced".to_string(), "search".to_string(), "test".to_string()],
|
||||
created_at: Utc::now(),
|
||||
updated_at: Utc::now(),
|
||||
|
|
|
|||
Loading…
Reference in New Issue