Readur/frontend/src/pages/FailedOcrPage.tsx

1164 lines
45 KiB
TypeScript

import React, { useState, useEffect } from 'react';
import {
Box,
Typography,
Card,
CardContent,
Button,
Chip,
Alert,
AlertTitle,
Table,
TableBody,
TableCell,
TableContainer,
TableHead,
TableRow,
Paper,
Dialog,
DialogTitle,
DialogContent,
DialogActions,
Pagination,
CircularProgress,
Tooltip,
IconButton,
Collapse,
LinearProgress,
Snackbar,
Tabs,
Tab,
TextField,
useTheme,
} from '@mui/material';
import Grid from '@mui/material/GridLegacy';
import {
Refresh as RefreshIcon,
Error as ErrorIcon,
Info as InfoIcon,
ExpandMore as ExpandMoreIcon,
ExpandLess as ExpandLessIcon,
Schedule as ScheduleIcon,
Visibility as VisibilityIcon,
Download as DownloadIcon,
FileCopy as FileCopyIcon,
Delete as DeleteIcon,
FindInPage as FindInPageIcon,
} from '@mui/icons-material';
import { format } from 'date-fns';
import { api, documentService } from '../services/api';
interface FailedDocument {
id: string;
filename: string;
original_filename: string;
file_size: number;
mime_type: string;
created_at: string;
updated_at: string;
tags: string[];
ocr_status: string;
ocr_error: string;
ocr_failure_reason: string;
ocr_completed_at?: string;
retry_count: number;
last_attempt_at?: string;
can_retry: boolean;
failure_category: string;
}
interface FailureCategory {
reason: string;
display_name: string;
count: number;
}
interface FailedOcrResponse {
documents: FailedDocument[];
pagination: {
total: number;
limit: number;
offset: number;
has_more: boolean;
};
statistics: {
total_failed: number;
failure_categories: FailureCategory[];
};
}
interface RetryResponse {
success: boolean;
message: string;
queue_id?: string;
estimated_wait_minutes?: number;
}
interface DuplicateDocument {
id: string;
filename: string;
original_filename: string;
file_size: number;
mime_type: string;
created_at: string;
user_id: string;
}
interface DuplicateGroup {
file_hash: string;
duplicate_count: number;
first_uploaded: string;
last_uploaded: string;
documents: DuplicateDocument[];
}
interface DuplicatesResponse {
duplicates: DuplicateGroup[];
pagination: {
total: number;
limit: number;
offset: number;
has_more: boolean;
};
statistics: {
total_duplicate_groups: number;
};
}
const FailedOcrPage: React.FC = () => {
const theme = useTheme();
const [currentTab, setCurrentTab] = useState(0);
const [documents, setDocuments] = useState<FailedDocument[]>([]);
const [duplicates, setDuplicates] = useState<DuplicateGroup[]>([]);
const [loading, setLoading] = useState(true);
const [duplicatesLoading, setDuplicatesLoading] = useState(false);
const [retrying, setRetrying] = useState<string | null>(null);
const [statistics, setStatistics] = useState<FailedOcrResponse['statistics'] | null>(null);
const [duplicateStatistics, setDuplicateStatistics] = useState<DuplicatesResponse['statistics'] | null>(null);
const [pagination, setPagination] = useState({ page: 1, limit: 25 });
const [duplicatesPagination, setDuplicatesPagination] = useState({ page: 1, limit: 25 });
const [totalPages, setTotalPages] = useState(0);
const [duplicatesTotalPages, setDuplicatesTotalPages] = useState(0);
const [selectedDocument, setSelectedDocument] = useState<FailedDocument | null>(null);
const [detailsOpen, setDetailsOpen] = useState(false);
const [expandedRows, setExpandedRows] = useState<Set<string>>(new Set());
const [expandedDuplicateGroups, setExpandedDuplicateGroups] = useState<Set<string>>(new Set());
const [snackbar, setSnackbar] = useState<{ open: boolean; message: string; severity: 'success' | 'error' | 'info' | 'warning' }>({
open: false,
message: '',
severity: 'success'
});
// Low confidence documents state
const [confidenceThreshold, setConfidenceThreshold] = useState<number>(30);
const [lowConfidenceLoading, setLowConfidenceLoading] = useState(false);
const [previewData, setPreviewData] = useState<any>(null);
const [confirmDeleteOpen, setConfirmDeleteOpen] = useState(false);
const fetchFailedDocuments = async () => {
try {
setLoading(true);
const offset = (pagination.page - 1) * pagination.limit;
const response = await documentService.getFailedOcrDocuments(pagination.limit, offset);
if (response?.data) {
setDocuments(response.data.documents || []);
setStatistics(response.data.statistics || null);
if (response.data.pagination) {
setTotalPages(Math.ceil(response.data.pagination.total / pagination.limit));
}
}
} catch (error) {
console.error('Failed to fetch failed OCR documents:', error);
setSnackbar({
open: true,
message: 'Failed to load failed OCR documents',
severity: 'error'
});
} finally {
setLoading(false);
}
};
const fetchDuplicates = async () => {
try {
setDuplicatesLoading(true);
const offset = (duplicatesPagination.page - 1) * duplicatesPagination.limit;
const response = await documentService.getDuplicates(duplicatesPagination.limit, offset);
if (response?.data) {
setDuplicates(response.data.duplicates || []);
setDuplicateStatistics(response.data.statistics || null);
if (response.data.pagination) {
setDuplicatesTotalPages(Math.ceil(response.data.pagination.total / duplicatesPagination.limit));
}
}
} catch (error) {
console.error('Failed to fetch duplicates:', error);
setSnackbar({
open: true,
message: 'Failed to load duplicate documents',
severity: 'error'
});
} finally {
setDuplicatesLoading(false);
}
};
useEffect(() => {
fetchFailedDocuments();
}, [pagination.page]);
useEffect(() => {
if (currentTab === 1) {
fetchDuplicates();
}
}, [currentTab, duplicatesPagination.page]);
const handleRetryOcr = async (document: FailedDocument) => {
try {
setRetrying(document.id);
const response = await documentService.retryOcr(document.id);
if (response.data.success) {
setSnackbar({
open: true,
message: `OCR retry queued for "${document.filename}". Estimated wait time: ${response.data.estimated_wait_minutes || 'Unknown'} minutes.`,
severity: 'success'
});
// Refresh the list to update retry counts and status
await fetchFailedDocuments();
} else {
setSnackbar({
open: true,
message: response.data.message || 'Failed to retry OCR',
severity: 'error'
});
}
} catch (error) {
console.error('Failed to retry OCR:', error);
setSnackbar({
open: true,
message: 'Failed to retry OCR processing',
severity: 'error'
});
} finally {
setRetrying(null);
}
};
const formatFileSize = (bytes: number): string => {
if (bytes === 0) return '0 B';
const k = 1024;
const sizes = ['B', 'KB', 'MB', 'GB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
};
const getFailureCategoryColor = (category: string): "error" | "warning" | "info" | "default" => {
switch (category) {
case 'PDF Font Issues':
case 'PDF Corruption':
case 'PDF Parsing Error':
return 'warning';
case 'Timeout':
case 'Memory Limit':
return 'error';
case 'Unknown Error':
return 'info';
default:
return 'default';
}
};
const toggleRowExpansion = (documentId: string) => {
const newExpanded = new Set(expandedRows);
if (newExpanded.has(documentId)) {
newExpanded.delete(documentId);
} else {
newExpanded.add(documentId);
}
setExpandedRows(newExpanded);
};
const showDocumentDetails = (document: FailedDocument) => {
setSelectedDocument(document);
setDetailsOpen(true);
};
const toggleDuplicateGroupExpansion = (groupHash: string) => {
const newExpanded = new Set(expandedDuplicateGroups);
if (newExpanded.has(groupHash)) {
newExpanded.delete(groupHash);
} else {
newExpanded.add(groupHash);
}
setExpandedDuplicateGroups(newExpanded);
};
const handleTabChange = (event: React.SyntheticEvent, newValue: number) => {
setCurrentTab(newValue);
};
const refreshCurrentTab = () => {
if (currentTab === 0) {
fetchFailedDocuments();
} else if (currentTab === 1) {
fetchDuplicates();
} else if (currentTab === 2) {
handlePreviewLowConfidence();
}
};
// Low confidence document handlers
const handlePreviewLowConfidence = async () => {
try {
setLowConfidenceLoading(true);
const response = await documentService.deleteLowConfidence(confidenceThreshold, true);
setPreviewData(response.data);
setSnackbar({
open: true,
message: response.data.message,
severity: 'info'
});
} catch (error) {
setSnackbar({
open: true,
message: 'Failed to preview low confidence documents',
severity: 'error'
});
} finally {
setLowConfidenceLoading(false);
}
};
const handleDeleteLowConfidence = async () => {
if (!previewData || previewData.matched_count === 0) {
setSnackbar({
open: true,
message: 'No documents to delete',
severity: 'warning'
});
return;
}
try {
setLowConfidenceLoading(true);
const response = await documentService.deleteLowConfidence(confidenceThreshold, false);
setSnackbar({
open: true,
message: response.data.message,
severity: 'success'
});
setPreviewData(null);
setConfirmDeleteOpen(false);
// Refresh other tabs if they have data affected
if (currentTab === 0) {
fetchFailedDocuments();
}
} catch (error) {
setSnackbar({
open: true,
message: 'Failed to delete low confidence documents',
severity: 'error'
});
} finally {
setLowConfidenceLoading(false);
}
};
if (loading && (!documents || documents.length === 0)) {
return (
<Box display="flex" justifyContent="center" alignItems="center" minHeight="400px">
<CircularProgress />
</Box>
);
}
return (
<Box sx={{ p: 3 }}>
<Box display="flex" justifyContent="space-between" alignItems="center" mb={3}>
<Typography variant="h4" component="h1">
Document Management
</Typography>
<Button
variant="outlined"
startIcon={<RefreshIcon />}
onClick={refreshCurrentTab}
disabled={loading || duplicatesLoading}
>
Refresh
</Button>
</Box>
<Paper sx={{ mb: 3 }}>
<Tabs value={currentTab} onChange={handleTabChange} aria-label="document management tabs">
<Tab
icon={<ErrorIcon />}
label={`Failed OCR${statistics ? ` (${statistics.total_failed})` : ''}`}
iconPosition="start"
/>
<Tab
icon={<FileCopyIcon />}
label={`Duplicates${duplicateStatistics ? ` (${duplicateStatistics.total_duplicate_groups})` : ''}`}
iconPosition="start"
/>
<Tab
icon={<FindInPageIcon />}
label={`Low Confidence${previewData ? ` (${previewData.matched_count})` : ''}`}
iconPosition="start"
/>
</Tabs>
</Paper>
{/* Failed OCR Tab Content */}
{currentTab === 0 && (
<>
{/* Statistics Overview */}
{statistics && (
<Grid container spacing={3} mb={3}>
<Grid item xs={12} md={4}>
<Card>
<CardContent>
<Typography variant="h6" color="error">
<ErrorIcon sx={{ mr: 1, verticalAlign: 'middle' }} />
Total Failed
</Typography>
<Typography variant="h3" color="error.main">
{statistics.total_failed}
</Typography>
</CardContent>
</Card>
</Grid>
<Grid item xs={12} md={8}>
<Card>
<CardContent>
<Typography variant="h6" mb={2}>
Failure Categories
</Typography>
<Box display="flex" flexWrap="wrap" gap={1}>
{statistics.failure_categories.map((category) => (
<Chip
key={category.reason}
label={`${category.display_name}: ${category.count}`}
color={getFailureCategoryColor(category.display_name)}
variant="outlined"
size="small"
/>
))}
</Box>
</CardContent>
</Card>
</Grid>
</Grid>
)}
{(!documents || documents.length === 0) ? (
<Alert severity="success" sx={{ mt: 2 }}>
<AlertTitle>Great news!</AlertTitle>
No documents have failed OCR processing. All your documents are processing successfully.
</Alert>
) : (
<>
<Alert severity="info" sx={{ mb: 2 }}>
<AlertTitle>OCR Failures</AlertTitle>
These documents failed OCR processing. You can retry OCR with detailed output to understand why failures occurred.
Common causes include corrupted PDFs, unsupported fonts, or memory limitations.
</Alert>
<TableContainer component={Paper}>
<Table>
<TableHead>
<TableRow>
<TableCell />
<TableCell>Document</TableCell>
<TableCell>Failure Type</TableCell>
<TableCell>Retry Count</TableCell>
<TableCell>Last Failed</TableCell>
<TableCell>Actions</TableCell>
</TableRow>
</TableHead>
<TableBody>
{(documents || []).map((document) => (
<React.Fragment key={document.id}>
<TableRow>
<TableCell>
<IconButton
size="small"
onClick={() => toggleRowExpansion(document.id)}
>
{expandedRows.has(document.id) ? <ExpandLessIcon /> : <ExpandMoreIcon />}
</IconButton>
</TableCell>
<TableCell>
<Box>
<Typography variant="body2" fontWeight="bold">
{document.filename}
</Typography>
<Typography variant="caption" color="text.secondary">
{formatFileSize(document.file_size)} {document.mime_type}
</Typography>
</Box>
</TableCell>
<TableCell>
<Chip
label={document.failure_category}
color={getFailureCategoryColor(document.failure_category)}
size="small"
/>
</TableCell>
<TableCell>
<Typography variant="body2">
{document.retry_count} attempts
</Typography>
</TableCell>
<TableCell>
<Typography variant="body2">
{document.updated_at ? format(new Date(document.updated_at), 'MMM dd, yyyy HH:mm') : 'Unknown'}
</Typography>
</TableCell>
<TableCell>
<Box display="flex" gap={1}>
<Tooltip title="Retry OCR">
<IconButton
size="small"
onClick={() => handleRetryOcr(document)}
disabled={retrying === document.id || !document.can_retry}
>
{retrying === document.id ? (
<CircularProgress size={16} />
) : (
<RefreshIcon />
)}
</IconButton>
</Tooltip>
<Tooltip title="View Details">
<IconButton
size="small"
onClick={() => showDocumentDetails(document)}
>
<VisibilityIcon />
</IconButton>
</Tooltip>
<Tooltip title="Download Document">
<IconButton
size="small"
onClick={async () => {
try {
await documentService.downloadFile(document.id, document.original_filename || document.filename);
} catch (error) {
console.error('Download failed:', error);
}
}}
>
<DownloadIcon />
</IconButton>
</Tooltip>
</Box>
</TableCell>
</TableRow>
<TableRow>
<TableCell sx={{ paddingBottom: 0, paddingTop: 0 }} colSpan={6}>
<Collapse in={expandedRows.has(document.id)} timeout="auto" unmountOnExit>
<Box sx={{
margin: 1,
p: 2,
bgcolor: (theme) => theme.palette.mode === 'dark' ? 'grey.900' : 'grey.50',
borderRadius: 1
}}>
<Typography variant="h6" gutterBottom>
Error Details
</Typography>
<Grid container spacing={2}>
<Grid item xs={12} md={6}>
<Typography variant="body2" color="text.secondary">
<strong>Failure Reason:</strong>
</Typography>
<Typography variant="body2" sx={{ mb: 1 }}>
{document.ocr_failure_reason || 'Not specified'}
</Typography>
<Typography variant="body2" color="text.secondary">
<strong>Error Message:</strong>
</Typography>
<Typography
variant="body2"
sx={{
fontFamily: 'monospace',
bgcolor: (theme) => theme.palette.mode === 'dark' ? 'grey.800' : 'grey.100',
p: 1,
borderRadius: 1,
fontSize: '0.75rem',
wordBreak: 'break-word'
}}
>
{document.ocr_error || 'No error message available'}
</Typography>
</Grid>
<Grid item xs={12} md={6}>
<Typography variant="body2" color="text.secondary">
<strong>Last Attempt:</strong>
</Typography>
<Typography variant="body2" sx={{ mb: 1 }}>
{document.last_attempt_at
? format(new Date(document.last_attempt_at), 'PPpp')
: 'No previous attempts'}
</Typography>
<Typography variant="body2" color="text.secondary">
<strong>File Created:</strong>
</Typography>
<Typography variant="body2">
{format(new Date(document.created_at), 'PPpp')}
</Typography>
</Grid>
</Grid>
</Box>
</Collapse>
</TableCell>
</TableRow>
</React.Fragment>
))}
</TableBody>
</Table>
</TableContainer>
{/* Pagination */}
{totalPages > 1 && (
<Box display="flex" justifyContent="center" mt={3}>
<Pagination
count={totalPages}
page={pagination.page}
onChange={(_, page) => setPagination(prev => ({ ...prev, page }))}
color="primary"
/>
</Box>
)}
</>
)}
</>
)}
{/* Duplicates Tab Content */}
{currentTab === 1 && (
<>
{/* Duplicate Statistics Overview */}
{duplicateStatistics && (
<Grid container spacing={3} mb={3}>
<Grid item xs={12} md={6}>
<Card>
<CardContent>
<Typography variant="h6" color="warning.main">
<FileCopyIcon sx={{ mr: 1, verticalAlign: 'middle' }} />
Total Duplicate Groups
</Typography>
<Typography variant="h3" color="warning.main">
{duplicateStatistics.total_duplicate_groups}
</Typography>
</CardContent>
</Card>
</Grid>
</Grid>
)}
{duplicatesLoading ? (
<Box display="flex" justifyContent="center" alignItems="center" minHeight="400px">
<CircularProgress />
</Box>
) : duplicates.length === 0 ? (
<Alert severity="success" sx={{ mt: 2 }}>
<AlertTitle>No duplicates found!</AlertTitle>
You don't have any duplicate documents. All your files have unique content.
</Alert>
) : (
<>
<Alert severity="info" sx={{ mb: 2 }}>
<AlertTitle>Duplicate Documents Found</AlertTitle>
These documents have identical content but may have different filenames.
You can expand each group to see all files with the same content and choose which ones to keep.
</Alert>
<Alert severity="warning" sx={{ mb: 2 }}>
<AlertTitle>What should you do?</AlertTitle>
<Box component="ul" sx={{ mt: 1, mb: 0, pl: 2 }}>
<li><strong>Review each group:</strong> Click to expand and see all duplicate files</li>
<li><strong>Keep the best version:</strong> Choose the file with the most descriptive name</li>
<li><strong>Check content:</strong> Use View/Download to verify files are truly identical</li>
<li><strong>Note for admin:</strong> Consider implementing bulk delete functionality for duplicates</li>
</Box>
</Alert>
<TableContainer component={Paper}>
<Table>
<TableHead>
<TableRow>
<TableCell />
<TableCell>Content Hash</TableCell>
<TableCell>Duplicate Count</TableCell>
<TableCell>First Uploaded</TableCell>
<TableCell>Last Uploaded</TableCell>
<TableCell>Actions</TableCell>
</TableRow>
</TableHead>
<TableBody>
{duplicates.map((group) => (
<React.Fragment key={group.file_hash}>
<TableRow>
<TableCell>
<IconButton
size="small"
onClick={() => toggleDuplicateGroupExpansion(group.file_hash)}
>
{expandedDuplicateGroups.has(group.file_hash) ? <ExpandLessIcon /> : <ExpandMoreIcon />}
</IconButton>
</TableCell>
<TableCell>
<Typography variant="body2" fontFamily="monospace">
{group.file_hash.substring(0, 16)}...
</Typography>
</TableCell>
<TableCell>
<Chip
label={`${group.duplicate_count} files`}
color="warning"
size="small"
/>
</TableCell>
<TableCell>
<Typography variant="body2">
{format(new Date(group.first_uploaded), 'MMM dd, yyyy')}
</Typography>
</TableCell>
<TableCell>
<Typography variant="body2">
{format(new Date(group.last_uploaded), 'MMM dd, yyyy')}
</Typography>
</TableCell>
<TableCell>
<Typography variant="body2" color="text.secondary">
View files below
</Typography>
</TableCell>
</TableRow>
<TableRow>
<TableCell sx={{ paddingBottom: 0, paddingTop: 0 }} colSpan={6}>
<Collapse in={expandedDuplicateGroups.has(group.file_hash)} timeout="auto" unmountOnExit>
<Box
sx={{
margin: 1,
p: 3,
background: theme.palette.mode === 'light'
? 'rgba(248, 250, 252, 0.8)'
: 'rgba(30, 30, 30, 0.8)',
backdropFilter: 'blur(10px)',
borderRadius: 2,
border: `1px solid ${theme.palette.divider}`,
}}
>
<Typography variant="h6" gutterBottom sx={{
color: theme.palette.primary.main,
display: 'flex',
alignItems: 'center',
gap: 1
}}>
<FileCopyIcon />
Duplicate Files ({group.duplicate_count} total)
</Typography>
<Alert severity="info" sx={{ mb: 2, fontSize: '0.875rem' }}>
<strong>Storage Impact:</strong> These {group.duplicate_count} files contain identical content.
Consider keeping only the best-named version to save space.
</Alert>
<Grid container spacing={2}>
{group.documents.map((doc, index) => (
<Grid item xs={12} md={6} lg={4} key={doc.id}>
<Card
variant="outlined"
sx={{
background: theme.palette.mode === 'light'
? 'rgba(255, 255, 255, 0.9)'
: 'rgba(40, 40, 40, 0.9)',
backdropFilter: 'blur(5px)',
border: `1px solid ${theme.palette.divider}`,
transition: 'all 0.2s ease',
'&:hover': {
transform: 'translateY(-2px)',
boxShadow: theme.shadows[4],
}
}}
>
<CardContent>
<Box display="flex" justifyContent="space-between" alignItems="flex-start" mb={1}>
<Typography variant="body2" fontWeight="bold" sx={{
color: theme.palette.text.primary,
wordBreak: 'break-word',
flex: 1,
mr: 1
}}>
{doc.filename}
</Typography>
{index === 0 && (
<Chip
label="First"
size="small"
color="primary"
variant="outlined"
/>
)}
</Box>
{doc.original_filename !== doc.filename && (
<Typography variant="caption" color="text.secondary" display="block">
Original: {doc.original_filename}
</Typography>
)}
<Typography variant="caption" display="block" color="text.secondary" sx={{ mb: 1 }}>
{formatFileSize(doc.file_size)} • {doc.mime_type}
</Typography>
<Typography variant="caption" display="block" color="text.secondary" sx={{ mb: 2 }}>
Uploaded: {format(new Date(doc.created_at), 'MMM dd, yyyy HH:mm')}
</Typography>
<Box display="flex" justifyContent="space-between" alignItems="center">
<Box>
<Tooltip title="View Document">
<IconButton
size="small"
onClick={() => window.open(`/api/documents/${doc.id}/view`, '_blank')}
sx={{ color: theme.palette.primary.main }}
>
<VisibilityIcon />
</IconButton>
</Tooltip>
<Tooltip title="Download Document">
<IconButton
size="small"
onClick={async () => {
try {
await documentService.downloadFile(doc.id, doc.original_filename || doc.filename);
} catch (error) {
console.error('Download failed:', error);
}
}}
sx={{ color: theme.palette.secondary.main }}
>
<DownloadIcon />
</IconButton>
</Tooltip>
</Box>
<Tooltip title="Get document details and duplicate information">
<Button
size="small"
variant="outlined"
color="info"
startIcon={<FindInPageIcon />}
sx={{ fontSize: '0.75rem' }}
onClick={() => {
setSnackbar({
open: true,
message: `Document "${doc.filename}" has ${group.duplicate_count - 1} duplicate(s). Content hash: ${group.file_hash.substring(0, 16)}...`,
severity: 'info'
});
}}
>
Info
</Button>
</Tooltip>
</Box>
</CardContent>
</Card>
</Grid>
))}
</Grid>
</Box>
</Collapse>
</TableCell>
</TableRow>
</React.Fragment>
))}
</TableBody>
</Table>
</TableContainer>
{/* Duplicates Pagination */}
{duplicatesTotalPages > 1 && (
<Box display="flex" justifyContent="center" mt={3}>
<Pagination
count={duplicatesTotalPages}
page={duplicatesPagination.page}
onChange={(_, page) => setDuplicatesPagination(prev => ({ ...prev, page }))}
color="primary"
/>
</Box>
)}
</>
)}
</>
)}
{/* Low Confidence Documents Tab Content */}
{currentTab === 2 && (
<>
<Alert severity="info" sx={{ mb: 3 }}>
<AlertTitle>Low Confidence Document Deletion</AlertTitle>
<Typography>
This tool allows you to delete documents with OCR confidence below a specified threshold.
Use the preview feature first to see what documents would be affected before deleting.
</Typography>
</Alert>
<Card sx={{ mb: 3 }}>
<CardContent>
<Grid container spacing={3} alignItems="center">
<Grid item xs={12} md={4}>
<TextField
label="Maximum Confidence Threshold (%)"
type="number"
value={confidenceThreshold}
onChange={(e) => setConfidenceThreshold(Math.max(0, Math.min(100, Number(e.target.value))))}
fullWidth
inputProps={{ min: 0, max: 100, step: 1 }}
helperText="Documents with confidence below this value will be deleted"
/>
</Grid>
<Grid item xs={12} md={4}>
<Button
variant="outlined"
onClick={handlePreviewLowConfidence}
disabled={lowConfidenceLoading}
startIcon={lowConfidenceLoading ? <CircularProgress size={20} /> : <FindInPageIcon />}
fullWidth
>
Preview Documents
</Button>
</Grid>
<Grid item xs={12} md={4}>
<Button
variant="contained"
color="warning"
onClick={() => setConfirmDeleteOpen(true)}
disabled={!previewData || previewData.matched_count === 0 || lowConfidenceLoading}
startIcon={<DeleteIcon />}
fullWidth
>
Delete Low Confidence Documents
</Button>
</Grid>
</Grid>
</CardContent>
</Card>
{/* Preview Results */}
{previewData && (
<Card sx={{ mb: 3 }}>
<CardContent>
<Typography variant="h6" gutterBottom>
Preview Results
</Typography>
<Typography color={previewData.matched_count > 0 ? 'warning.main' : 'success.main'}>
{previewData.message}
</Typography>
{previewData.matched_count > 0 && (
<Box sx={{ mt: 2 }}>
<Typography variant="body2" color="text.secondary">
Document IDs that would be deleted:
</Typography>
<Typography variant="body2" sx={{ fontFamily: 'monospace', wordBreak: 'break-all' }}>
{previewData.document_ids.slice(0, 10).join(', ')}
{previewData.document_ids.length > 10 && ` ... and ${previewData.document_ids.length - 10} more`}
</Typography>
</Box>
)}
</CardContent>
</Card>
)}
{/* Loading State */}
{lowConfidenceLoading && !previewData && (
<Box display="flex" justifyContent="center" alignItems="center" minHeight="200px">
<CircularProgress />
<Typography sx={{ ml: 2 }}>Processing request...</Typography>
</Box>
)}
</>
)}
{/* Confirmation Dialog */}
<Dialog
open={confirmDeleteOpen}
onClose={() => setConfirmDeleteOpen(false)}
maxWidth="sm"
fullWidth
>
<DialogTitle color="warning.main">
<DeleteIcon sx={{ mr: 1, verticalAlign: 'middle' }} />
Confirm Low Confidence Document Deletion
</DialogTitle>
<DialogContent>
<Typography>
Are you sure you want to delete {previewData?.matched_count || 0} documents with OCR confidence below {confidenceThreshold}%?
</Typography>
<Alert severity="warning" sx={{ mt: 2 }}>
This action cannot be undone. The documents and their files will be permanently deleted.
</Alert>
</DialogContent>
<DialogActions>
<Button onClick={() => setConfirmDeleteOpen(false)}>
Cancel
</Button>
<Button
onClick={handleDeleteLowConfidence}
color="warning"
variant="contained"
disabled={lowConfidenceLoading}
startIcon={lowConfidenceLoading ? <CircularProgress size={20} /> : <DeleteIcon />}
>
{lowConfidenceLoading ? 'Deleting...' : 'Delete Documents'}
</Button>
</DialogActions>
</Dialog>
{/* Document Details Dialog */}
<Dialog
open={detailsOpen}
onClose={() => setDetailsOpen(false)}
maxWidth="md"
fullWidth
>
<DialogTitle>
Document Details: {selectedDocument?.filename}
</DialogTitle>
<DialogContent>
{selectedDocument && (
<Grid container spacing={2}>
<Grid item xs={12} md={6}>
<Typography variant="body2" color="text.secondary">
<strong>Original Filename:</strong>
</Typography>
<Typography variant="body2" sx={{ mb: 2 }}>
{selectedDocument.original_filename}
</Typography>
<Typography variant="body2" color="text.secondary">
<strong>File Size:</strong>
</Typography>
<Typography variant="body2" sx={{ mb: 2 }}>
{formatFileSize(selectedDocument.file_size)}
</Typography>
<Typography variant="body2" color="text.secondary">
<strong>MIME Type:</strong>
</Typography>
<Typography variant="body2" sx={{ mb: 2 }}>
{selectedDocument.mime_type}
</Typography>
<Typography variant="body2" color="text.secondary">
<strong>Tags:</strong>
</Typography>
<Box sx={{ mb: 2 }}>
{selectedDocument.tags.length > 0 ? (
selectedDocument.tags.map((tag) => (
<Chip key={tag} label={tag} size="small" sx={{ mr: 1, mb: 1 }} />
))
) : (
<Typography variant="body2" color="text.secondary">No tags</Typography>
)}
</Box>
</Grid>
<Grid item xs={12} md={6}>
<Typography variant="body2" color="text.secondary">
<strong>Failure Category:</strong>
</Typography>
<Chip
label={selectedDocument.failure_category}
color={getFailureCategoryColor(selectedDocument.failure_category)}
sx={{ mb: 2 }}
/>
<Typography variant="body2" color="text.secondary">
<strong>Retry Count:</strong>
</Typography>
<Typography variant="body2" sx={{ mb: 2 }}>
{selectedDocument.retry_count} attempts
</Typography>
<Typography variant="body2" color="text.secondary">
<strong>Created:</strong>
</Typography>
<Typography variant="body2" sx={{ mb: 2 }}>
{format(new Date(selectedDocument.created_at), 'PPpp')}
</Typography>
<Typography variant="body2" color="text.secondary">
<strong>Last Updated:</strong>
</Typography>
<Typography variant="body2">
{format(new Date(selectedDocument.updated_at), 'PPpp')}
</Typography>
</Grid>
<Grid item xs={12}>
<Typography variant="body2" color="text.secondary" sx={{ mb: 1 }}>
<strong>Full Error Message:</strong>
</Typography>
<Paper sx={{ p: 2, bgcolor: 'grey.50' }}>
<Typography
variant="body2"
sx={{
fontFamily: 'monospace',
fontSize: '0.875rem',
wordBreak: 'break-word',
whiteSpace: 'pre-wrap'
}}
>
{selectedDocument.ocr_error || 'No error message available'}
</Typography>
</Paper>
</Grid>
</Grid>
)}
</DialogContent>
<DialogActions>
{selectedDocument?.can_retry && (
<Button
onClick={() => {
setDetailsOpen(false);
if (selectedDocument) {
handleRetryOcr(selectedDocument);
}
}}
startIcon={<RefreshIcon />}
disabled={retrying === selectedDocument?.id}
>
Retry OCR
</Button>
)}
<Button onClick={() => setDetailsOpen(false)}>Close</Button>
</DialogActions>
</Dialog>
{/* Success/Error Snackbar */}
<Snackbar
open={snackbar.open}
autoHideDuration={6000}
onClose={() => setSnackbar(prev => ({ ...prev, open: false }))}
>
<Alert
onClose={() => setSnackbar(prev => ({ ...prev, open: false }))}
severity={snackbar.severity}
sx={{ width: '100%' }}
>
{snackbar.message}
</Alert>
</Snackbar>
</Box>
);
};
export default FailedOcrPage;