feat(client/server): advanced search, along with fixing build errors
This commit is contained in:
parent
4dda4d143d
commit
479c62a4f1
|
|
@ -14,6 +14,7 @@ import DocumentDetailsPage from './pages/DocumentDetailsPage';
|
||||||
import SettingsPage from './pages/SettingsPage';
|
import SettingsPage from './pages/SettingsPage';
|
||||||
import SourcesPage from './pages/SourcesPage';
|
import SourcesPage from './pages/SourcesPage';
|
||||||
import WatchFolderPage from './pages/WatchFolderPage';
|
import WatchFolderPage from './pages/WatchFolderPage';
|
||||||
|
import FailedOcrPage from './pages/FailedOcrPage';
|
||||||
|
|
||||||
function App(): JSX.Element {
|
function App(): JSX.Element {
|
||||||
const { user, loading } = useAuth();
|
const { user, loading } = useAuth();
|
||||||
|
|
@ -69,6 +70,7 @@ function App(): JSX.Element {
|
||||||
<Route path="/sources" element={<SourcesPage />} />
|
<Route path="/sources" element={<SourcesPage />} />
|
||||||
<Route path="/watch" element={<WatchFolderPage />} />
|
<Route path="/watch" element={<WatchFolderPage />} />
|
||||||
<Route path="/settings" element={<SettingsPage />} />
|
<Route path="/settings" element={<SettingsPage />} />
|
||||||
|
<Route path="/failed-ocr" element={<FailedOcrPage />} />
|
||||||
<Route path="/profile" element={<div>Profile Page - Coming Soon</div>} />
|
<Route path="/profile" element={<div>Profile Page - Coming Soon</div>} />
|
||||||
</Routes>
|
</Routes>
|
||||||
</AppLayout>
|
</AppLayout>
|
||||||
|
|
|
||||||
|
|
@ -347,8 +347,8 @@ const AdvancedSearchPanel: React.FC<AdvancedSearchPanelProps> = ({
|
||||||
label="Boost Recent Documents"
|
label="Boost Recent Documents"
|
||||||
/>
|
/>
|
||||||
</Box>
|
</Box>
|
||||||
</Grid>
|
</Box>
|
||||||
</Grid>
|
</Box>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{/* Results Display Section */}
|
{/* Results Display Section */}
|
||||||
|
|
|
||||||
|
|
@ -181,6 +181,7 @@ const EnhancedSearchGuide: React.FC<EnhancedSearchGuideProps> = ({ onExampleClic
|
||||||
sx={{
|
sx={{
|
||||||
mb: 1.5,
|
mb: 1.5,
|
||||||
transition: 'all 0.2s',
|
transition: 'all 0.2s',
|
||||||
|
backgroundColor: (theme) => theme.palette.mode === 'dark' ? 'grey.800' : 'background.paper',
|
||||||
'&:hover': {
|
'&:hover': {
|
||||||
boxShadow: 2,
|
boxShadow: 2,
|
||||||
transform: 'translateY(-2px)',
|
transform: 'translateY(-2px)',
|
||||||
|
|
@ -200,7 +201,7 @@ const EnhancedSearchGuide: React.FC<EnhancedSearchGuideProps> = ({ onExampleClic
|
||||||
variant="body2"
|
variant="body2"
|
||||||
fontFamily="monospace"
|
fontFamily="monospace"
|
||||||
sx={{
|
sx={{
|
||||||
backgroundColor: 'grey.100',
|
backgroundColor: (theme) => theme.palette.mode === 'dark' ? 'grey.800' : 'grey.100',
|
||||||
px: 1,
|
px: 1,
|
||||||
py: 0.5,
|
py: 0.5,
|
||||||
borderRadius: 1,
|
borderRadius: 1,
|
||||||
|
|
@ -273,7 +274,11 @@ const EnhancedSearchGuide: React.FC<EnhancedSearchGuideProps> = ({ onExampleClic
|
||||||
|
|
||||||
if (compact && !expanded) {
|
if (compact && !expanded) {
|
||||||
return (
|
return (
|
||||||
<Paper variant="outlined" sx={{ p: 2, mb: 2 }}>
|
<Paper variant="outlined" sx={{
|
||||||
|
p: 2,
|
||||||
|
mb: 2,
|
||||||
|
backgroundColor: (theme) => theme.palette.mode === 'dark' ? 'grey.900' : 'background.paper'
|
||||||
|
}}>
|
||||||
<Box display="flex" alignItems="center" justifyContent="space-between">
|
<Box display="flex" alignItems="center" justifyContent="space-between">
|
||||||
<Box display="flex" alignItems="center" gap={1}>
|
<Box display="flex" alignItems="center" gap={1}>
|
||||||
<TipIcon color="primary" />
|
<TipIcon color="primary" />
|
||||||
|
|
@ -294,7 +299,7 @@ const EnhancedSearchGuide: React.FC<EnhancedSearchGuideProps> = ({ onExampleClic
|
||||||
}
|
}
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Paper elevation={0} sx={{ p: 3, mb: 3, backgroundColor: 'grey.50' }}>
|
<Paper elevation={0} sx={{ p: 3, mb: 3, backgroundColor: (theme) => theme.palette.mode === 'dark' ? 'grey.900' : 'grey.50' }}>
|
||||||
<Box display="flex" alignItems="center" justifyContent="space-between" mb={2}>
|
<Box display="flex" alignItems="center" justifyContent="space-between" mb={2}>
|
||||||
<Typography variant="h6" display="flex" alignItems="center" gap={1}>
|
<Typography variant="h6" display="flex" alignItems="center" gap={1}>
|
||||||
<TipIcon color="primary" />
|
<TipIcon color="primary" />
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,7 @@ import {
|
||||||
Logout as LogoutIcon,
|
Logout as LogoutIcon,
|
||||||
Description as DocumentIcon,
|
Description as DocumentIcon,
|
||||||
Storage as StorageIcon,
|
Storage as StorageIcon,
|
||||||
|
Error as ErrorIcon,
|
||||||
} from '@mui/icons-material';
|
} from '@mui/icons-material';
|
||||||
import { useNavigate, useLocation } from 'react-router-dom';
|
import { useNavigate, useLocation } from 'react-router-dom';
|
||||||
import { useAuth } from '../../contexts/AuthContext';
|
import { useAuth } from '../../contexts/AuthContext';
|
||||||
|
|
@ -64,6 +65,7 @@ const navigationItems: NavigationItem[] = [
|
||||||
{ text: 'Search', icon: SearchIcon, path: '/search' },
|
{ text: 'Search', icon: SearchIcon, path: '/search' },
|
||||||
{ text: 'Sources', icon: StorageIcon, path: '/sources' },
|
{ text: 'Sources', icon: StorageIcon, path: '/sources' },
|
||||||
{ text: 'Watch Folder', icon: FolderIcon, path: '/watch' },
|
{ text: 'Watch Folder', icon: FolderIcon, path: '/watch' },
|
||||||
|
{ text: 'Failed OCR', icon: ErrorIcon, path: '/failed-ocr' },
|
||||||
];
|
];
|
||||||
|
|
||||||
const AppLayout: React.FC<AppLayoutProps> = ({ children }) => {
|
const AppLayout: React.FC<AppLayoutProps> = ({ children }) => {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,576 @@
|
||||||
|
import React, { useState, useEffect } from 'react';
|
||||||
|
import {
|
||||||
|
Box,
|
||||||
|
Typography,
|
||||||
|
Card,
|
||||||
|
CardContent,
|
||||||
|
Grid,
|
||||||
|
Button,
|
||||||
|
Chip,
|
||||||
|
Alert,
|
||||||
|
AlertTitle,
|
||||||
|
Table,
|
||||||
|
TableBody,
|
||||||
|
TableCell,
|
||||||
|
TableContainer,
|
||||||
|
TableHead,
|
||||||
|
TableRow,
|
||||||
|
Paper,
|
||||||
|
Dialog,
|
||||||
|
DialogTitle,
|
||||||
|
DialogContent,
|
||||||
|
DialogActions,
|
||||||
|
Pagination,
|
||||||
|
CircularProgress,
|
||||||
|
Tooltip,
|
||||||
|
IconButton,
|
||||||
|
Collapse,
|
||||||
|
LinearProgress,
|
||||||
|
Snackbar,
|
||||||
|
} from '@mui/material';
|
||||||
|
import {
|
||||||
|
Refresh as RefreshIcon,
|
||||||
|
Error as ErrorIcon,
|
||||||
|
Info as InfoIcon,
|
||||||
|
ExpandMore as ExpandMoreIcon,
|
||||||
|
ExpandLess as ExpandLessIcon,
|
||||||
|
Schedule as ScheduleIcon,
|
||||||
|
Visibility as VisibilityIcon,
|
||||||
|
Download as DownloadIcon,
|
||||||
|
} from '@mui/icons-material';
|
||||||
|
import { format } from 'date-fns';
|
||||||
|
import { api, documentService } from '../services/api';
|
||||||
|
|
||||||
|
interface FailedDocument {
|
||||||
|
id: string;
|
||||||
|
filename: string;
|
||||||
|
original_filename: string;
|
||||||
|
file_size: number;
|
||||||
|
mime_type: string;
|
||||||
|
created_at: string;
|
||||||
|
updated_at: string;
|
||||||
|
tags: string[];
|
||||||
|
ocr_status: string;
|
||||||
|
ocr_error: string;
|
||||||
|
ocr_failure_reason: string;
|
||||||
|
ocr_completed_at?: string;
|
||||||
|
retry_count: number;
|
||||||
|
last_attempt_at?: string;
|
||||||
|
can_retry: boolean;
|
||||||
|
failure_category: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface FailureCategory {
|
||||||
|
reason: string;
|
||||||
|
display_name: string;
|
||||||
|
count: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface FailedOcrResponse {
|
||||||
|
documents: FailedDocument[];
|
||||||
|
pagination: {
|
||||||
|
total: number;
|
||||||
|
limit: number;
|
||||||
|
offset: number;
|
||||||
|
has_more: boolean;
|
||||||
|
};
|
||||||
|
statistics: {
|
||||||
|
total_failed: number;
|
||||||
|
failure_categories: FailureCategory[];
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
interface RetryResponse {
|
||||||
|
success: boolean;
|
||||||
|
message: string;
|
||||||
|
queue_id?: string;
|
||||||
|
estimated_wait_minutes?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
const FailedOcrPage: React.FC = () => {
|
||||||
|
const [documents, setDocuments] = useState<FailedDocument[]>([]);
|
||||||
|
const [loading, setLoading] = useState(true);
|
||||||
|
const [retrying, setRetrying] = useState<string | null>(null);
|
||||||
|
const [statistics, setStatistics] = useState<FailedOcrResponse['statistics'] | null>(null);
|
||||||
|
const [pagination, setPagination] = useState({ page: 1, limit: 25 });
|
||||||
|
const [totalPages, setTotalPages] = useState(0);
|
||||||
|
const [selectedDocument, setSelectedDocument] = useState<FailedDocument | null>(null);
|
||||||
|
const [detailsOpen, setDetailsOpen] = useState(false);
|
||||||
|
const [expandedRows, setExpandedRows] = useState<Set<string>>(new Set());
|
||||||
|
const [snackbar, setSnackbar] = useState<{ open: boolean; message: string; severity: 'success' | 'error' }>({
|
||||||
|
open: false,
|
||||||
|
message: '',
|
||||||
|
severity: 'success'
|
||||||
|
});
|
||||||
|
|
||||||
|
const fetchFailedDocuments = async () => {
|
||||||
|
try {
|
||||||
|
setLoading(true);
|
||||||
|
const offset = (pagination.page - 1) * pagination.limit;
|
||||||
|
const response = await documentService.getFailedOcrDocuments(pagination.limit, offset);
|
||||||
|
|
||||||
|
setDocuments(response.data.documents);
|
||||||
|
setStatistics(response.data.statistics);
|
||||||
|
setTotalPages(Math.ceil(response.data.pagination.total / pagination.limit));
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Failed to fetch failed OCR documents:', error);
|
||||||
|
setSnackbar({
|
||||||
|
open: true,
|
||||||
|
message: 'Failed to load failed OCR documents',
|
||||||
|
severity: 'error'
|
||||||
|
});
|
||||||
|
} finally {
|
||||||
|
setLoading(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
fetchFailedDocuments();
|
||||||
|
}, [pagination.page]);
|
||||||
|
|
||||||
|
const handleRetryOcr = async (document: FailedDocument) => {
|
||||||
|
try {
|
||||||
|
setRetrying(document.id);
|
||||||
|
const response = await documentService.retryOcr(document.id);
|
||||||
|
|
||||||
|
if (response.data.success) {
|
||||||
|
setSnackbar({
|
||||||
|
open: true,
|
||||||
|
message: `OCR retry queued for "${document.filename}". Estimated wait time: ${response.data.estimated_wait_minutes || 'Unknown'} minutes.`,
|
||||||
|
severity: 'success'
|
||||||
|
});
|
||||||
|
|
||||||
|
// Refresh the list to update retry counts and status
|
||||||
|
await fetchFailedDocuments();
|
||||||
|
} else {
|
||||||
|
setSnackbar({
|
||||||
|
open: true,
|
||||||
|
message: response.data.message || 'Failed to retry OCR',
|
||||||
|
severity: 'error'
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Failed to retry OCR:', error);
|
||||||
|
setSnackbar({
|
||||||
|
open: true,
|
||||||
|
message: 'Failed to retry OCR processing',
|
||||||
|
severity: 'error'
|
||||||
|
});
|
||||||
|
} finally {
|
||||||
|
setRetrying(null);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const formatFileSize = (bytes: number): string => {
|
||||||
|
if (bytes === 0) return '0 B';
|
||||||
|
const k = 1024;
|
||||||
|
const sizes = ['B', 'KB', 'MB', 'GB'];
|
||||||
|
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
||||||
|
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
|
||||||
|
};
|
||||||
|
|
||||||
|
const getFailureCategoryColor = (category: string): "error" | "warning" | "info" | "default" => {
|
||||||
|
switch (category) {
|
||||||
|
case 'PDF Font Issues':
|
||||||
|
case 'PDF Corruption':
|
||||||
|
case 'PDF Parsing Error':
|
||||||
|
return 'warning';
|
||||||
|
case 'Timeout':
|
||||||
|
case 'Memory Limit':
|
||||||
|
return 'error';
|
||||||
|
case 'Unknown Error':
|
||||||
|
return 'info';
|
||||||
|
default:
|
||||||
|
return 'default';
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const toggleRowExpansion = (documentId: string) => {
|
||||||
|
const newExpanded = new Set(expandedRows);
|
||||||
|
if (newExpanded.has(documentId)) {
|
||||||
|
newExpanded.delete(documentId);
|
||||||
|
} else {
|
||||||
|
newExpanded.add(documentId);
|
||||||
|
}
|
||||||
|
setExpandedRows(newExpanded);
|
||||||
|
};
|
||||||
|
|
||||||
|
const showDocumentDetails = (document: FailedDocument) => {
|
||||||
|
setSelectedDocument(document);
|
||||||
|
setDetailsOpen(true);
|
||||||
|
};
|
||||||
|
|
||||||
|
if (loading && documents.length === 0) {
|
||||||
|
return (
|
||||||
|
<Box display="flex" justifyContent="center" alignItems="center" minHeight="400px">
|
||||||
|
<CircularProgress />
|
||||||
|
</Box>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Box sx={{ p: 3 }}>
|
||||||
|
<Box display="flex" justifyContent="space-between" alignItems="center" mb={3}>
|
||||||
|
<Typography variant="h4" component="h1">
|
||||||
|
Failed OCR Documents
|
||||||
|
</Typography>
|
||||||
|
<Button
|
||||||
|
variant="outlined"
|
||||||
|
startIcon={<RefreshIcon />}
|
||||||
|
onClick={fetchFailedDocuments}
|
||||||
|
disabled={loading}
|
||||||
|
>
|
||||||
|
Refresh
|
||||||
|
</Button>
|
||||||
|
</Box>
|
||||||
|
|
||||||
|
{/* Statistics Overview */}
|
||||||
|
{statistics && (
|
||||||
|
<Grid container spacing={3} mb={3}>
|
||||||
|
<Grid item xs={12} md={4}>
|
||||||
|
<Card>
|
||||||
|
<CardContent>
|
||||||
|
<Typography variant="h6" color="error">
|
||||||
|
<ErrorIcon sx={{ mr: 1, verticalAlign: 'middle' }} />
|
||||||
|
Total Failed
|
||||||
|
</Typography>
|
||||||
|
<Typography variant="h3" color="error.main">
|
||||||
|
{statistics.total_failed}
|
||||||
|
</Typography>
|
||||||
|
</CardContent>
|
||||||
|
</Card>
|
||||||
|
</Grid>
|
||||||
|
<Grid item xs={12} md={8}>
|
||||||
|
<Card>
|
||||||
|
<CardContent>
|
||||||
|
<Typography variant="h6" mb={2}>
|
||||||
|
Failure Categories
|
||||||
|
</Typography>
|
||||||
|
<Box display="flex" flexWrap="wrap" gap={1}>
|
||||||
|
{statistics.failure_categories.map((category) => (
|
||||||
|
<Chip
|
||||||
|
key={category.reason}
|
||||||
|
label={`${category.display_name}: ${category.count}`}
|
||||||
|
color={getFailureCategoryColor(category.display_name)}
|
||||||
|
variant="outlined"
|
||||||
|
size="small"
|
||||||
|
/>
|
||||||
|
))}
|
||||||
|
</Box>
|
||||||
|
</CardContent>
|
||||||
|
</Card>
|
||||||
|
</Grid>
|
||||||
|
</Grid>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{documents.length === 0 ? (
|
||||||
|
<Alert severity="success" sx={{ mt: 2 }}>
|
||||||
|
<AlertTitle>Great news!</AlertTitle>
|
||||||
|
No documents have failed OCR processing. All your documents are processing successfully.
|
||||||
|
</Alert>
|
||||||
|
) : (
|
||||||
|
<>
|
||||||
|
<Alert severity="info" sx={{ mb: 2 }}>
|
||||||
|
<AlertTitle>OCR Failures</AlertTitle>
|
||||||
|
These documents failed OCR processing. You can retry OCR with detailed output to understand why failures occurred.
|
||||||
|
Common causes include corrupted PDFs, unsupported fonts, or memory limitations.
|
||||||
|
</Alert>
|
||||||
|
|
||||||
|
<TableContainer component={Paper}>
|
||||||
|
<Table>
|
||||||
|
<TableHead>
|
||||||
|
<TableRow>
|
||||||
|
<TableCell />
|
||||||
|
<TableCell>Document</TableCell>
|
||||||
|
<TableCell>Failure Type</TableCell>
|
||||||
|
<TableCell>Retry Count</TableCell>
|
||||||
|
<TableCell>Last Failed</TableCell>
|
||||||
|
<TableCell>Actions</TableCell>
|
||||||
|
</TableRow>
|
||||||
|
</TableHead>
|
||||||
|
<TableBody>
|
||||||
|
{documents.map((document) => (
|
||||||
|
<React.Fragment key={document.id}>
|
||||||
|
<TableRow>
|
||||||
|
<TableCell>
|
||||||
|
<IconButton
|
||||||
|
size="small"
|
||||||
|
onClick={() => toggleRowExpansion(document.id)}
|
||||||
|
>
|
||||||
|
{expandedRows.has(document.id) ? <ExpandLessIcon /> : <ExpandMoreIcon />}
|
||||||
|
</IconButton>
|
||||||
|
</TableCell>
|
||||||
|
<TableCell>
|
||||||
|
<Box>
|
||||||
|
<Typography variant="body2" fontWeight="bold">
|
||||||
|
{document.filename}
|
||||||
|
</Typography>
|
||||||
|
<Typography variant="caption" color="text.secondary">
|
||||||
|
{formatFileSize(document.file_size)} • {document.mime_type}
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
</TableCell>
|
||||||
|
<TableCell>
|
||||||
|
<Chip
|
||||||
|
label={document.failure_category}
|
||||||
|
color={getFailureCategoryColor(document.failure_category)}
|
||||||
|
size="small"
|
||||||
|
/>
|
||||||
|
</TableCell>
|
||||||
|
<TableCell>
|
||||||
|
<Typography variant="body2">
|
||||||
|
{document.retry_count} attempts
|
||||||
|
</Typography>
|
||||||
|
</TableCell>
|
||||||
|
<TableCell>
|
||||||
|
<Typography variant="body2">
|
||||||
|
{document.updated_at ? format(new Date(document.updated_at), 'MMM dd, yyyy HH:mm') : 'Unknown'}
|
||||||
|
</Typography>
|
||||||
|
</TableCell>
|
||||||
|
<TableCell>
|
||||||
|
<Box display="flex" gap={1}>
|
||||||
|
<Tooltip title="Retry OCR">
|
||||||
|
<IconButton
|
||||||
|
size="small"
|
||||||
|
onClick={() => handleRetryOcr(document)}
|
||||||
|
disabled={retrying === document.id || !document.can_retry}
|
||||||
|
>
|
||||||
|
{retrying === document.id ? (
|
||||||
|
<CircularProgress size={16} />
|
||||||
|
) : (
|
||||||
|
<RefreshIcon />
|
||||||
|
)}
|
||||||
|
</IconButton>
|
||||||
|
</Tooltip>
|
||||||
|
<Tooltip title="View Details">
|
||||||
|
<IconButton
|
||||||
|
size="small"
|
||||||
|
onClick={() => showDocumentDetails(document)}
|
||||||
|
>
|
||||||
|
<VisibilityIcon />
|
||||||
|
</IconButton>
|
||||||
|
</Tooltip>
|
||||||
|
<Tooltip title="Download Document">
|
||||||
|
<IconButton
|
||||||
|
size="small"
|
||||||
|
onClick={() => window.open(`/api/documents/${document.id}/download`, '_blank')}
|
||||||
|
>
|
||||||
|
<DownloadIcon />
|
||||||
|
</IconButton>
|
||||||
|
</Tooltip>
|
||||||
|
</Box>
|
||||||
|
</TableCell>
|
||||||
|
</TableRow>
|
||||||
|
<TableRow>
|
||||||
|
<TableCell sx={{ paddingBottom: 0, paddingTop: 0 }} colSpan={6}>
|
||||||
|
<Collapse in={expandedRows.has(document.id)} timeout="auto" unmountOnExit>
|
||||||
|
<Box sx={{ margin: 1, p: 2, bgcolor: 'grey.50' }}>
|
||||||
|
<Typography variant="h6" gutterBottom>
|
||||||
|
Error Details
|
||||||
|
</Typography>
|
||||||
|
<Grid container spacing={2}>
|
||||||
|
<Grid item xs={12} md={6}>
|
||||||
|
<Typography variant="body2" color="text.secondary">
|
||||||
|
<strong>Failure Reason:</strong>
|
||||||
|
</Typography>
|
||||||
|
<Typography variant="body2" sx={{ mb: 1 }}>
|
||||||
|
{document.ocr_failure_reason || 'Not specified'}
|
||||||
|
</Typography>
|
||||||
|
|
||||||
|
<Typography variant="body2" color="text.secondary">
|
||||||
|
<strong>Error Message:</strong>
|
||||||
|
</Typography>
|
||||||
|
<Typography
|
||||||
|
variant="body2"
|
||||||
|
sx={{
|
||||||
|
fontFamily: 'monospace',
|
||||||
|
bgcolor: 'grey.100',
|
||||||
|
p: 1,
|
||||||
|
borderRadius: 1,
|
||||||
|
fontSize: '0.75rem',
|
||||||
|
wordBreak: 'break-word'
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{document.ocr_error || 'No error message available'}
|
||||||
|
</Typography>
|
||||||
|
</Grid>
|
||||||
|
<Grid item xs={12} md={6}>
|
||||||
|
<Typography variant="body2" color="text.secondary">
|
||||||
|
<strong>Last Attempt:</strong>
|
||||||
|
</Typography>
|
||||||
|
<Typography variant="body2" sx={{ mb: 1 }}>
|
||||||
|
{document.last_attempt_at
|
||||||
|
? format(new Date(document.last_attempt_at), 'PPpp')
|
||||||
|
: 'No previous attempts'}
|
||||||
|
</Typography>
|
||||||
|
|
||||||
|
<Typography variant="body2" color="text.secondary">
|
||||||
|
<strong>File Created:</strong>
|
||||||
|
</Typography>
|
||||||
|
<Typography variant="body2">
|
||||||
|
{format(new Date(document.created_at), 'PPpp')}
|
||||||
|
</Typography>
|
||||||
|
</Grid>
|
||||||
|
</Grid>
|
||||||
|
</Box>
|
||||||
|
</Collapse>
|
||||||
|
</TableCell>
|
||||||
|
</TableRow>
|
||||||
|
</React.Fragment>
|
||||||
|
))}
|
||||||
|
</TableBody>
|
||||||
|
</Table>
|
||||||
|
</TableContainer>
|
||||||
|
|
||||||
|
{/* Pagination */}
|
||||||
|
{totalPages > 1 && (
|
||||||
|
<Box display="flex" justifyContent="center" mt={3}>
|
||||||
|
<Pagination
|
||||||
|
count={totalPages}
|
||||||
|
page={pagination.page}
|
||||||
|
onChange={(_, page) => setPagination(prev => ({ ...prev, page }))}
|
||||||
|
color="primary"
|
||||||
|
/>
|
||||||
|
</Box>
|
||||||
|
)}
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Document Details Dialog */}
|
||||||
|
<Dialog
|
||||||
|
open={detailsOpen}
|
||||||
|
onClose={() => setDetailsOpen(false)}
|
||||||
|
maxWidth="md"
|
||||||
|
fullWidth
|
||||||
|
>
|
||||||
|
<DialogTitle>
|
||||||
|
Document Details: {selectedDocument?.filename}
|
||||||
|
</DialogTitle>
|
||||||
|
<DialogContent>
|
||||||
|
{selectedDocument && (
|
||||||
|
<Grid container spacing={2}>
|
||||||
|
<Grid item xs={12} md={6}>
|
||||||
|
<Typography variant="body2" color="text.secondary">
|
||||||
|
<strong>Original Filename:</strong>
|
||||||
|
</Typography>
|
||||||
|
<Typography variant="body2" sx={{ mb: 2 }}>
|
||||||
|
{selectedDocument.original_filename}
|
||||||
|
</Typography>
|
||||||
|
|
||||||
|
<Typography variant="body2" color="text.secondary">
|
||||||
|
<strong>File Size:</strong>
|
||||||
|
</Typography>
|
||||||
|
<Typography variant="body2" sx={{ mb: 2 }}>
|
||||||
|
{formatFileSize(selectedDocument.file_size)}
|
||||||
|
</Typography>
|
||||||
|
|
||||||
|
<Typography variant="body2" color="text.secondary">
|
||||||
|
<strong>MIME Type:</strong>
|
||||||
|
</Typography>
|
||||||
|
<Typography variant="body2" sx={{ mb: 2 }}>
|
||||||
|
{selectedDocument.mime_type}
|
||||||
|
</Typography>
|
||||||
|
|
||||||
|
<Typography variant="body2" color="text.secondary">
|
||||||
|
<strong>Tags:</strong>
|
||||||
|
</Typography>
|
||||||
|
<Box sx={{ mb: 2 }}>
|
||||||
|
{selectedDocument.tags.length > 0 ? (
|
||||||
|
selectedDocument.tags.map((tag) => (
|
||||||
|
<Chip key={tag} label={tag} size="small" sx={{ mr: 1, mb: 1 }} />
|
||||||
|
))
|
||||||
|
) : (
|
||||||
|
<Typography variant="body2" color="text.secondary">No tags</Typography>
|
||||||
|
)}
|
||||||
|
</Box>
|
||||||
|
</Grid>
|
||||||
|
<Grid item xs={12} md={6}>
|
||||||
|
<Typography variant="body2" color="text.secondary">
|
||||||
|
<strong>Failure Category:</strong>
|
||||||
|
</Typography>
|
||||||
|
<Chip
|
||||||
|
label={selectedDocument.failure_category}
|
||||||
|
color={getFailureCategoryColor(selectedDocument.failure_category)}
|
||||||
|
sx={{ mb: 2 }}
|
||||||
|
/>
|
||||||
|
|
||||||
|
<Typography variant="body2" color="text.secondary">
|
||||||
|
<strong>Retry Count:</strong>
|
||||||
|
</Typography>
|
||||||
|
<Typography variant="body2" sx={{ mb: 2 }}>
|
||||||
|
{selectedDocument.retry_count} attempts
|
||||||
|
</Typography>
|
||||||
|
|
||||||
|
<Typography variant="body2" color="text.secondary">
|
||||||
|
<strong>Created:</strong>
|
||||||
|
</Typography>
|
||||||
|
<Typography variant="body2" sx={{ mb: 2 }}>
|
||||||
|
{format(new Date(selectedDocument.created_at), 'PPpp')}
|
||||||
|
</Typography>
|
||||||
|
|
||||||
|
<Typography variant="body2" color="text.secondary">
|
||||||
|
<strong>Last Updated:</strong>
|
||||||
|
</Typography>
|
||||||
|
<Typography variant="body2">
|
||||||
|
{format(new Date(selectedDocument.updated_at), 'PPpp')}
|
||||||
|
</Typography>
|
||||||
|
</Grid>
|
||||||
|
<Grid item xs={12}>
|
||||||
|
<Typography variant="body2" color="text.secondary" sx={{ mb: 1 }}>
|
||||||
|
<strong>Full Error Message:</strong>
|
||||||
|
</Typography>
|
||||||
|
<Paper sx={{ p: 2, bgcolor: 'grey.50' }}>
|
||||||
|
<Typography
|
||||||
|
variant="body2"
|
||||||
|
sx={{
|
||||||
|
fontFamily: 'monospace',
|
||||||
|
fontSize: '0.875rem',
|
||||||
|
wordBreak: 'break-word',
|
||||||
|
whiteSpace: 'pre-wrap'
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{selectedDocument.ocr_error || 'No error message available'}
|
||||||
|
</Typography>
|
||||||
|
</Paper>
|
||||||
|
</Grid>
|
||||||
|
</Grid>
|
||||||
|
)}
|
||||||
|
</DialogContent>
|
||||||
|
<DialogActions>
|
||||||
|
{selectedDocument?.can_retry && (
|
||||||
|
<Button
|
||||||
|
onClick={() => {
|
||||||
|
setDetailsOpen(false);
|
||||||
|
if (selectedDocument) {
|
||||||
|
handleRetryOcr(selectedDocument);
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
startIcon={<RefreshIcon />}
|
||||||
|
disabled={retrying === selectedDocument?.id}
|
||||||
|
>
|
||||||
|
Retry OCR
|
||||||
|
</Button>
|
||||||
|
)}
|
||||||
|
<Button onClick={() => setDetailsOpen(false)}>Close</Button>
|
||||||
|
</DialogActions>
|
||||||
|
</Dialog>
|
||||||
|
|
||||||
|
{/* Success/Error Snackbar */}
|
||||||
|
<Snackbar
|
||||||
|
open={snackbar.open}
|
||||||
|
autoHideDuration={6000}
|
||||||
|
onClose={() => setSnackbar(prev => ({ ...prev, open: false }))}
|
||||||
|
>
|
||||||
|
<Alert
|
||||||
|
onClose={() => setSnackbar(prev => ({ ...prev, open: false }))}
|
||||||
|
severity={snackbar.severity}
|
||||||
|
sx={{ width: '100%' }}
|
||||||
|
>
|
||||||
|
{snackbar.message}
|
||||||
|
</Alert>
|
||||||
|
</Snackbar>
|
||||||
|
</Box>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
export default FailedOcrPage;
|
||||||
|
|
@ -186,6 +186,16 @@ export const documentService = {
|
||||||
})
|
})
|
||||||
},
|
},
|
||||||
|
|
||||||
|
retryOcr: (id: string) => {
|
||||||
|
return api.post(`/documents/${id}/retry-ocr`)
|
||||||
|
},
|
||||||
|
|
||||||
|
getFailedOcrDocuments: (limit = 50, offset = 0) => {
|
||||||
|
return api.get(`/documents/failed-ocr`, {
|
||||||
|
params: { limit, offset },
|
||||||
|
})
|
||||||
|
},
|
||||||
|
|
||||||
search: (searchRequest: SearchRequest) => {
|
search: (searchRequest: SearchRequest) => {
|
||||||
return api.get<SearchResponse>('/search', {
|
return api.get<SearchResponse>('/search', {
|
||||||
params: searchRequest,
|
params: searchRequest,
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ use serde::Deserialize;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use utoipa::ToSchema;
|
use utoipa::ToSchema;
|
||||||
use sha2::{Sha256, Digest};
|
use sha2::{Sha256, Digest};
|
||||||
|
use sqlx::Row;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
auth::AuthUser,
|
auth::AuthUser,
|
||||||
|
|
@ -33,6 +34,8 @@ pub fn router() -> Router<Arc<AppState>> {
|
||||||
.route("/{id}/thumbnail", get(get_document_thumbnail))
|
.route("/{id}/thumbnail", get(get_document_thumbnail))
|
||||||
.route("/{id}/ocr", get(get_document_ocr))
|
.route("/{id}/ocr", get(get_document_ocr))
|
||||||
.route("/{id}/processed-image", get(get_processed_image))
|
.route("/{id}/processed-image", get(get_processed_image))
|
||||||
|
.route("/{id}/retry-ocr", post(retry_ocr))
|
||||||
|
.route("/failed-ocr", get(get_failed_ocr_documents))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[utoipa::path(
|
#[utoipa::path(
|
||||||
|
|
@ -471,4 +474,317 @@ async fn get_processed_image(
|
||||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
Ok(response)
|
Ok(response)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[utoipa::path(
|
||||||
|
post,
|
||||||
|
path = "/api/documents/{id}/retry-ocr",
|
||||||
|
tag = "documents",
|
||||||
|
security(
|
||||||
|
("bearer_auth" = [])
|
||||||
|
),
|
||||||
|
params(
|
||||||
|
("id" = uuid::Uuid, Path, description = "Document ID")
|
||||||
|
),
|
||||||
|
responses(
|
||||||
|
(status = 200, description = "OCR retry queued successfully", body = String),
|
||||||
|
(status = 404, description = "Document not found"),
|
||||||
|
(status = 400, description = "Document is not eligible for OCR retry"),
|
||||||
|
(status = 401, description = "Unauthorized")
|
||||||
|
)
|
||||||
|
)]
|
||||||
|
async fn retry_ocr(
|
||||||
|
State(state): State<Arc<AppState>>,
|
||||||
|
auth_user: AuthUser,
|
||||||
|
Path(document_id): Path<uuid::Uuid>,
|
||||||
|
) -> Result<Json<serde_json::Value>, StatusCode> {
|
||||||
|
// Check if document exists and belongs to user
|
||||||
|
let documents = state
|
||||||
|
.db
|
||||||
|
.get_documents_by_user_with_role(auth_user.user.id, auth_user.user.role, 1000, 0)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
let document = documents
|
||||||
|
.into_iter()
|
||||||
|
.find(|doc| doc.id == document_id)
|
||||||
|
.ok_or(StatusCode::NOT_FOUND)?;
|
||||||
|
|
||||||
|
// Check if document is eligible for OCR retry (failed or not processed)
|
||||||
|
let eligible = document.ocr_status.as_ref().map_or(true, |status| {
|
||||||
|
status == "failed" || status == "pending"
|
||||||
|
});
|
||||||
|
|
||||||
|
if !eligible {
|
||||||
|
return Ok(Json(serde_json::json!({
|
||||||
|
"success": false,
|
||||||
|
"message": "Document is not eligible for OCR retry. Current status: {}",
|
||||||
|
"current_status": document.ocr_status
|
||||||
|
})));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset document OCR fields
|
||||||
|
let reset_result = sqlx::query(
|
||||||
|
r#"
|
||||||
|
UPDATE documents
|
||||||
|
SET ocr_status = 'pending',
|
||||||
|
ocr_text = NULL,
|
||||||
|
ocr_error = NULL,
|
||||||
|
ocr_failure_reason = NULL,
|
||||||
|
ocr_confidence = NULL,
|
||||||
|
ocr_word_count = NULL,
|
||||||
|
ocr_processing_time_ms = NULL,
|
||||||
|
ocr_completed_at = NULL,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = $1
|
||||||
|
"#
|
||||||
|
)
|
||||||
|
.bind(document_id)
|
||||||
|
.execute(state.db.get_pool())
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
if reset_result.rows_affected() == 0 {
|
||||||
|
return Err(StatusCode::NOT_FOUND);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate priority based on file size (higher priority for retries)
|
||||||
|
let priority = match document.file_size {
|
||||||
|
0..=1048576 => 15, // <= 1MB: highest priority (boosted for retry)
|
||||||
|
..=5242880 => 12, // 1-5MB: high priority
|
||||||
|
..=10485760 => 10, // 5-10MB: medium priority
|
||||||
|
..=52428800 => 8, // 10-50MB: low priority
|
||||||
|
_ => 6, // > 50MB: lowest priority
|
||||||
|
};
|
||||||
|
|
||||||
|
// Add to OCR queue with detailed logging
|
||||||
|
match state.queue_service.enqueue_document(document_id, priority, document.file_size).await {
|
||||||
|
Ok(queue_id) => {
|
||||||
|
tracing::info!(
|
||||||
|
"OCR retry queued for document {} ({}): queue_id={}, priority={}, size={}",
|
||||||
|
document_id, document.filename, queue_id, priority, document.file_size
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(Json(serde_json::json!({
|
||||||
|
"success": true,
|
||||||
|
"message": "OCR retry queued successfully",
|
||||||
|
"queue_id": queue_id,
|
||||||
|
"document_id": document_id,
|
||||||
|
"priority": priority,
|
||||||
|
"estimated_wait_minutes": calculate_estimated_wait_time(priority).await
|
||||||
|
})))
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
tracing::error!("Failed to queue OCR retry for document {}: {}", document_id, e);
|
||||||
|
Err(StatusCode::INTERNAL_SERVER_ERROR)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[utoipa::path(
|
||||||
|
get,
|
||||||
|
path = "/api/documents/failed-ocr",
|
||||||
|
tag = "documents",
|
||||||
|
security(
|
||||||
|
("bearer_auth" = [])
|
||||||
|
),
|
||||||
|
params(
|
||||||
|
("limit" = Option<i64>, Query, description = "Number of documents to return (default: 50)"),
|
||||||
|
("offset" = Option<i64>, Query, description = "Number of documents to skip (default: 0)")
|
||||||
|
),
|
||||||
|
responses(
|
||||||
|
(status = 200, description = "List of documents with failed OCR", body = String),
|
||||||
|
(status = 401, description = "Unauthorized")
|
||||||
|
)
|
||||||
|
)]
|
||||||
|
async fn get_failed_ocr_documents(
|
||||||
|
State(state): State<Arc<AppState>>,
|
||||||
|
auth_user: AuthUser,
|
||||||
|
Query(pagination): Query<PaginationQuery>,
|
||||||
|
) -> Result<Json<serde_json::Value>, StatusCode> {
|
||||||
|
let limit = pagination.limit.unwrap_or(50);
|
||||||
|
let offset = pagination.offset.unwrap_or(0);
|
||||||
|
|
||||||
|
// Get failed OCR documents with additional failure details
|
||||||
|
let failed_docs = sqlx::query(
|
||||||
|
r#"
|
||||||
|
SELECT d.id, d.filename, d.original_filename, d.file_path, d.file_size,
|
||||||
|
d.mime_type, d.created_at, d.updated_at, d.user_id,
|
||||||
|
d.ocr_status, d.ocr_error, d.ocr_failure_reason,
|
||||||
|
d.ocr_completed_at, d.tags,
|
||||||
|
-- Count retry attempts from OCR queue
|
||||||
|
COALESCE(q.retry_count, 0) as retry_count,
|
||||||
|
q.last_attempt_at
|
||||||
|
FROM documents d
|
||||||
|
LEFT JOIN (
|
||||||
|
SELECT document_id,
|
||||||
|
COUNT(*) as retry_count,
|
||||||
|
MAX(created_at) as last_attempt_at
|
||||||
|
FROM ocr_queue
|
||||||
|
WHERE status IN ('failed', 'completed')
|
||||||
|
GROUP BY document_id
|
||||||
|
) q ON d.id = q.document_id
|
||||||
|
WHERE d.ocr_status = 'failed'
|
||||||
|
AND ($1 = $1 OR d.user_id = $1) -- Admin can see all, users see only their own
|
||||||
|
ORDER BY d.updated_at DESC
|
||||||
|
LIMIT $2 OFFSET $3
|
||||||
|
"#
|
||||||
|
)
|
||||||
|
.bind(if auth_user.user.role == crate::models::UserRole::Admin {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(auth_user.user.id)
|
||||||
|
})
|
||||||
|
.bind(limit)
|
||||||
|
.bind(offset)
|
||||||
|
.fetch_all(state.db.get_pool())
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
// Count total failed documents
|
||||||
|
let total_count: i64 = sqlx::query_scalar(
|
||||||
|
r#"
|
||||||
|
SELECT COUNT(*)
|
||||||
|
FROM documents
|
||||||
|
WHERE ocr_status = 'failed'
|
||||||
|
AND ($1 = $1 OR user_id = $1)
|
||||||
|
"#
|
||||||
|
)
|
||||||
|
.bind(if auth_user.user.role == crate::models::UserRole::Admin {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(auth_user.user.id)
|
||||||
|
})
|
||||||
|
.fetch_one(state.db.get_pool())
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
let failed_documents: Vec<serde_json::Value> = failed_docs
|
||||||
|
.into_iter()
|
||||||
|
.map(|row| {
|
||||||
|
let tags: Vec<String> = row.get::<Option<Vec<String>>, _>("tags").unwrap_or_default();
|
||||||
|
|
||||||
|
serde_json::json!({
|
||||||
|
"id": row.get::<uuid::Uuid, _>("id"),
|
||||||
|
"filename": row.get::<String, _>("filename"),
|
||||||
|
"original_filename": row.get::<String, _>("original_filename"),
|
||||||
|
"file_size": row.get::<i64, _>("file_size"),
|
||||||
|
"mime_type": row.get::<String, _>("mime_type"),
|
||||||
|
"created_at": row.get::<chrono::DateTime<chrono::Utc>, _>("created_at"),
|
||||||
|
"updated_at": row.get::<chrono::DateTime<chrono::Utc>, _>("updated_at"),
|
||||||
|
"tags": tags,
|
||||||
|
"ocr_status": row.get::<Option<String>, _>("ocr_status"),
|
||||||
|
"ocr_error": row.get::<Option<String>, _>("ocr_error"),
|
||||||
|
"ocr_failure_reason": row.get::<Option<String>, _>("ocr_failure_reason"),
|
||||||
|
"ocr_completed_at": row.get::<Option<chrono::DateTime<chrono::Utc>>, _>("ocr_completed_at"),
|
||||||
|
"retry_count": row.get::<Option<i64>, _>("retry_count").unwrap_or(0),
|
||||||
|
"last_attempt_at": row.get::<Option<chrono::DateTime<chrono::Utc>>, _>("last_attempt_at"),
|
||||||
|
"can_retry": true,
|
||||||
|
"failure_category": categorize_failure_reason(
|
||||||
|
row.get::<Option<String>, _>("ocr_failure_reason").as_deref(),
|
||||||
|
row.get::<Option<String>, _>("ocr_error").as_deref()
|
||||||
|
)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let response = serde_json::json!({
|
||||||
|
"documents": failed_documents,
|
||||||
|
"pagination": {
|
||||||
|
"total": total_count,
|
||||||
|
"limit": limit,
|
||||||
|
"offset": offset,
|
||||||
|
"has_more": offset + limit < total_count
|
||||||
|
},
|
||||||
|
"statistics": {
|
||||||
|
"total_failed": total_count,
|
||||||
|
"failure_categories": get_failure_statistics(&state, auth_user.user.id, auth_user.user.role.clone()).await?
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
Ok(Json(response))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn calculate_estimated_wait_time(priority: i32) -> i64 {
|
||||||
|
// Simple estimation based on priority - in a real implementation,
|
||||||
|
// this would check actual queue depth and processing times
|
||||||
|
match priority {
|
||||||
|
15.. => 1, // High priority retry: ~1 minute
|
||||||
|
10..14 => 3, // Medium priority: ~3 minutes
|
||||||
|
5..9 => 10, // Low priority: ~10 minutes
|
||||||
|
_ => 30, // Very low priority: ~30 minutes
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn categorize_failure_reason(failure_reason: Option<&str>, error_message: Option<&str>) -> &'static str {
|
||||||
|
match failure_reason {
|
||||||
|
Some("pdf_font_encoding") => "PDF Font Issues",
|
||||||
|
Some("pdf_corruption") => "PDF Corruption",
|
||||||
|
Some("processing_timeout") => "Timeout",
|
||||||
|
Some("memory_limit") => "Memory Limit",
|
||||||
|
Some("pdf_parsing_panic") => "PDF Parsing Error",
|
||||||
|
Some("unknown") | None => {
|
||||||
|
// Try to categorize based on error message
|
||||||
|
if let Some(error) = error_message {
|
||||||
|
let error_lower = error.to_lowercase();
|
||||||
|
if error_lower.contains("timeout") {
|
||||||
|
"Timeout"
|
||||||
|
} else if error_lower.contains("memory") {
|
||||||
|
"Memory Limit"
|
||||||
|
} else if error_lower.contains("font") || error_lower.contains("encoding") {
|
||||||
|
"PDF Font Issues"
|
||||||
|
} else if error_lower.contains("corrupt") {
|
||||||
|
"PDF Corruption"
|
||||||
|
} else {
|
||||||
|
"Unknown Error"
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
"Unknown Error"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => "Other"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_failure_statistics(
|
||||||
|
state: &Arc<AppState>,
|
||||||
|
user_id: uuid::Uuid,
|
||||||
|
user_role: crate::models::UserRole
|
||||||
|
) -> Result<serde_json::Value, StatusCode> {
|
||||||
|
let stats = sqlx::query(
|
||||||
|
r#"
|
||||||
|
SELECT
|
||||||
|
ocr_failure_reason,
|
||||||
|
COUNT(*) as count
|
||||||
|
FROM documents
|
||||||
|
WHERE ocr_status = 'failed'
|
||||||
|
AND ($1 = $1 OR user_id = $1)
|
||||||
|
GROUP BY ocr_failure_reason
|
||||||
|
ORDER BY count DESC
|
||||||
|
"#
|
||||||
|
)
|
||||||
|
.bind(if user_role == crate::models::UserRole::Admin {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(user_id)
|
||||||
|
})
|
||||||
|
.fetch_all(state.db.get_pool())
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
let categories: Vec<serde_json::Value> = stats
|
||||||
|
.into_iter()
|
||||||
|
.map(|row| {
|
||||||
|
let reason = row.get::<Option<String>, _>("ocr_failure_reason");
|
||||||
|
let count = row.get::<i64, _>("count");
|
||||||
|
|
||||||
|
serde_json::json!({
|
||||||
|
"reason": reason.clone().unwrap_or_else(|| "unknown".to_string()),
|
||||||
|
"display_name": categorize_failure_reason(reason.as_deref(), None),
|
||||||
|
"count": count
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
Ok(serde_json::json!(categories))
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue