1746 lines
69 KiB
TypeScript
1746 lines
69 KiB
TypeScript
import React, { useState, useEffect } from 'react';
|
|
import { useNavigate } from 'react-router-dom';
|
|
import {
|
|
Box,
|
|
Typography,
|
|
Card,
|
|
CardContent,
|
|
Button,
|
|
Chip,
|
|
Alert,
|
|
AlertTitle,
|
|
Table,
|
|
TableBody,
|
|
TableCell,
|
|
TableContainer,
|
|
TableHead,
|
|
TableRow,
|
|
Paper,
|
|
Dialog,
|
|
DialogTitle,
|
|
DialogContent,
|
|
DialogActions,
|
|
Pagination,
|
|
CircularProgress,
|
|
Tooltip,
|
|
IconButton,
|
|
Collapse,
|
|
LinearProgress,
|
|
Snackbar,
|
|
Tabs,
|
|
Tab,
|
|
TextField,
|
|
useTheme,
|
|
Divider,
|
|
} from '@mui/material';
|
|
import Grid from '@mui/material/GridLegacy';
|
|
import {
|
|
Refresh as RefreshIcon,
|
|
Error as ErrorIcon,
|
|
Info as InfoIcon,
|
|
ExpandMore as ExpandMoreIcon,
|
|
ExpandLess as ExpandLessIcon,
|
|
Schedule as ScheduleIcon,
|
|
Visibility as VisibilityIcon,
|
|
Download as DownloadIcon,
|
|
FileCopy as FileCopyIcon,
|
|
Delete as DeleteIcon,
|
|
FindInPage as FindInPageIcon,
|
|
OpenInNew as OpenInNewIcon,
|
|
Warning as WarningIcon,
|
|
} from '@mui/icons-material';
|
|
import { format } from 'date-fns';
|
|
import { api, documentService, queueService } from '../services/api';
|
|
import DocumentViewer from '../components/DocumentViewer';
|
|
|
|
interface FailedDocument {
|
|
id: string;
|
|
filename: string;
|
|
original_filename: string;
|
|
file_size: number;
|
|
mime_type: string;
|
|
created_at: string;
|
|
updated_at: string;
|
|
tags: string[];
|
|
ocr_status: string;
|
|
ocr_error: string;
|
|
ocr_failure_reason: string;
|
|
ocr_completed_at?: string;
|
|
retry_count: number;
|
|
last_attempt_at?: string;
|
|
can_retry: boolean;
|
|
failure_category: string;
|
|
}
|
|
|
|
interface FailureCategory {
|
|
reason: string;
|
|
display_name: string;
|
|
count: number;
|
|
}
|
|
|
|
interface FailedOcrResponse {
|
|
documents: FailedDocument[];
|
|
pagination: {
|
|
total: number;
|
|
limit: number;
|
|
offset: number;
|
|
total_pages: number;
|
|
};
|
|
statistics: {
|
|
total_failed: number;
|
|
by_reason: Record<string, number>;
|
|
by_stage: Record<string, number>;
|
|
};
|
|
}
|
|
|
|
interface RetryResponse {
|
|
success: boolean;
|
|
message: string;
|
|
queue_id?: string;
|
|
estimated_wait_minutes?: number;
|
|
}
|
|
|
|
interface DuplicateDocument {
|
|
id: string;
|
|
filename: string;
|
|
original_filename: string;
|
|
file_size: number;
|
|
mime_type: string;
|
|
created_at: string;
|
|
user_id: string;
|
|
}
|
|
|
|
interface DuplicateGroup {
|
|
file_hash: string;
|
|
duplicate_count: number;
|
|
first_uploaded: string;
|
|
last_uploaded: string;
|
|
documents: DuplicateDocument[];
|
|
}
|
|
|
|
interface DuplicatesResponse {
|
|
duplicates: DuplicateGroup[];
|
|
pagination: {
|
|
total: number;
|
|
limit: number;
|
|
offset: number;
|
|
has_more: boolean;
|
|
};
|
|
statistics: {
|
|
total_duplicate_groups: number;
|
|
};
|
|
}
|
|
|
|
const FailedOcrPage: React.FC = () => {
|
|
const theme = useTheme();
|
|
const navigate = useNavigate();
|
|
const [currentTab, setCurrentTab] = useState(0);
|
|
const [documents, setDocuments] = useState<FailedDocument[]>([]);
|
|
const [duplicates, setDuplicates] = useState<DuplicateGroup[]>([]);
|
|
const [failedDocuments, setFailedDocuments] = useState<any[]>([]);
|
|
const [loading, setLoading] = useState(true);
|
|
const [duplicatesLoading, setDuplicatesLoading] = useState(false);
|
|
const [failedDocumentsLoading, setFailedDocumentsLoading] = useState(false);
|
|
const [failedDocumentsFilters, setFailedDocumentsFilters] = useState<{ stage?: string; reason?: string }>({});
|
|
const [selectedFailedDocument, setSelectedFailedDocument] = useState<any>(null);
|
|
const [retrying, setRetrying] = useState<string | null>(null);
|
|
const [retryingAll, setRetryingAll] = useState(false);
|
|
const [statistics, setStatistics] = useState<FailedOcrResponse['statistics'] | null>(null);
|
|
const [duplicateStatistics, setDuplicateStatistics] = useState<DuplicatesResponse['statistics'] | null>(null);
|
|
const [pagination, setPagination] = useState({ page: 1, limit: 25 });
|
|
const [duplicatesPagination, setDuplicatesPagination] = useState({ page: 1, limit: 25 });
|
|
const [failedDocumentsPagination, setFailedDocumentsPagination] = useState({ page: 1, limit: 25 });
|
|
const [totalPages, setTotalPages] = useState(0);
|
|
const [duplicatesTotalPages, setDuplicatesTotalPages] = useState(0);
|
|
const [failedDocumentsTotalPages, setFailedDocumentsTotalPages] = useState(0);
|
|
const [selectedDocument, setSelectedDocument] = useState<FailedDocument | null>(null);
|
|
const [detailsOpen, setDetailsOpen] = useState(false);
|
|
const [expandedRows, setExpandedRows] = useState<Set<string>>(new Set());
|
|
const [expandedDuplicateGroups, setExpandedDuplicateGroups] = useState<Set<string>>(new Set());
|
|
const [snackbar, setSnackbar] = useState<{ open: boolean; message: string; severity: 'success' | 'error' | 'info' | 'warning' }>({
|
|
open: false,
|
|
message: '',
|
|
severity: 'success'
|
|
});
|
|
|
|
// Low confidence documents state
|
|
const [confidenceThreshold, setConfidenceThreshold] = useState<number>(30);
|
|
const [lowConfidenceLoading, setLowConfidenceLoading] = useState(false);
|
|
const [previewData, setPreviewData] = useState<any>(null);
|
|
const [confirmDeleteOpen, setConfirmDeleteOpen] = useState(false);
|
|
|
|
// Failed documents deletion state
|
|
const [failedDocsLoading, setFailedDocsLoading] = useState(false);
|
|
const [failedPreviewData, setFailedPreviewData] = useState<any>(null);
|
|
const [confirmDeleteFailedOpen, setConfirmDeleteFailedOpen] = useState(false);
|
|
|
|
const fetchFailedDocuments = async () => {
|
|
try {
|
|
setLoading(true);
|
|
const offset = (pagination.page - 1) * pagination.limit;
|
|
const response = await documentService.getFailedOcrDocuments(pagination.limit, offset);
|
|
|
|
if (response?.data) {
|
|
setDocuments(response.data.documents || []);
|
|
setStatistics(response.data.statistics || null);
|
|
if (response.data.pagination) {
|
|
setTotalPages(Math.ceil(response.data.pagination.total / pagination.limit));
|
|
}
|
|
}
|
|
} catch (error) {
|
|
console.error('Failed to fetch failed OCR documents:', error);
|
|
setSnackbar({
|
|
open: true,
|
|
message: 'Failed to load failed OCR documents',
|
|
severity: 'error'
|
|
});
|
|
} finally {
|
|
setLoading(false);
|
|
}
|
|
};
|
|
|
|
const fetchDuplicates = async () => {
|
|
try {
|
|
setDuplicatesLoading(true);
|
|
const offset = (duplicatesPagination.page - 1) * duplicatesPagination.limit;
|
|
const response = await documentService.getDuplicates(duplicatesPagination.limit, offset);
|
|
|
|
if (response?.data) {
|
|
setDuplicates(response.data.duplicates || []);
|
|
setDuplicateStatistics(response.data.statistics || null);
|
|
if (response.data.pagination) {
|
|
setDuplicatesTotalPages(Math.ceil(response.data.pagination.total / duplicatesPagination.limit));
|
|
}
|
|
}
|
|
} catch (error) {
|
|
console.error('Failed to fetch duplicates:', error);
|
|
setSnackbar({
|
|
open: true,
|
|
message: 'Failed to load duplicate documents',
|
|
severity: 'error'
|
|
});
|
|
} finally {
|
|
setDuplicatesLoading(false);
|
|
}
|
|
};
|
|
|
|
useEffect(() => {
|
|
fetchFailedDocuments();
|
|
}, [pagination.page]);
|
|
|
|
useEffect(() => {
|
|
if (currentTab === 1) {
|
|
fetchDuplicates();
|
|
} else if (currentTab === 4) {
|
|
fetchFailedDocumentsList();
|
|
}
|
|
}, [currentTab, duplicatesPagination.page, failedDocumentsPagination.page, failedDocumentsFilters]);
|
|
|
|
const fetchFailedDocumentsList = async () => {
|
|
try {
|
|
setFailedDocumentsLoading(true);
|
|
const offset = (failedDocumentsPagination.page - 1) * failedDocumentsPagination.limit;
|
|
const response = await documentService.getFailedDocuments(
|
|
failedDocumentsPagination.limit,
|
|
offset,
|
|
failedDocumentsFilters.stage,
|
|
failedDocumentsFilters.reason
|
|
);
|
|
|
|
if (response?.data) {
|
|
setFailedDocuments(response.data.documents || []);
|
|
if (response.data.pagination) {
|
|
setFailedDocumentsTotalPages(Math.ceil(response.data.pagination.total / failedDocumentsPagination.limit));
|
|
}
|
|
}
|
|
} catch (error) {
|
|
console.error('Failed to fetch failed documents:', error);
|
|
setSnackbar({
|
|
open: true,
|
|
message: 'Failed to load failed documents',
|
|
severity: 'error'
|
|
});
|
|
} finally {
|
|
setFailedDocumentsLoading(false);
|
|
}
|
|
};
|
|
|
|
const getFailureReasonColor = (reason: string): "error" | "warning" | "info" | "default" => {
|
|
switch (reason) {
|
|
case 'low_ocr_confidence':
|
|
case 'ocr_timeout':
|
|
case 'ocr_memory_limit':
|
|
case 'pdf_parsing_error':
|
|
return 'error';
|
|
case 'duplicate_content':
|
|
case 'unsupported_format':
|
|
case 'file_too_large':
|
|
return 'warning';
|
|
case 'file_corrupted':
|
|
case 'access_denied':
|
|
case 'permission_denied':
|
|
return 'error';
|
|
default:
|
|
return 'default';
|
|
}
|
|
};
|
|
|
|
const handleRetryOcr = async (document: FailedDocument) => {
|
|
try {
|
|
setRetrying(document.id);
|
|
const response = await documentService.retryOcr(document.id);
|
|
|
|
if (response.data.success) {
|
|
setSnackbar({
|
|
open: true,
|
|
message: `OCR retry queued for "${document.filename}". Estimated wait time: ${response.data.estimated_wait_minutes || 'Unknown'} minutes.`,
|
|
severity: 'success'
|
|
});
|
|
|
|
// Refresh the list to update retry counts and status
|
|
await fetchFailedDocuments();
|
|
} else {
|
|
setSnackbar({
|
|
open: true,
|
|
message: response.data.message || 'Failed to retry OCR',
|
|
severity: 'error'
|
|
});
|
|
}
|
|
} catch (error) {
|
|
console.error('Failed to retry OCR:', error);
|
|
setSnackbar({
|
|
open: true,
|
|
message: 'Failed to retry OCR processing',
|
|
severity: 'error'
|
|
});
|
|
} finally {
|
|
setRetrying(null);
|
|
}
|
|
};
|
|
|
|
const handleRetryAllFailed = async () => {
|
|
try {
|
|
setRetryingAll(true);
|
|
const response = await queueService.requeueFailed();
|
|
|
|
if (response.data.requeued_count > 0) {
|
|
setSnackbar({
|
|
open: true,
|
|
message: `Successfully queued ${response.data.requeued_count} failed documents for OCR retry. Check the queue stats for progress.`,
|
|
severity: 'success'
|
|
});
|
|
|
|
// Refresh the list to update status
|
|
await fetchFailedDocuments();
|
|
} else {
|
|
setSnackbar({
|
|
open: true,
|
|
message: 'No failed documents found to retry',
|
|
severity: 'info'
|
|
});
|
|
}
|
|
} catch (error) {
|
|
console.error('Failed to retry all failed OCR:', error);
|
|
setSnackbar({
|
|
open: true,
|
|
message: 'Failed to retry all failed OCR documents',
|
|
severity: 'error'
|
|
});
|
|
} finally {
|
|
setRetryingAll(false);
|
|
}
|
|
};
|
|
|
|
const formatFileSize = (bytes: number): string => {
|
|
if (bytes === 0) return '0 B';
|
|
const k = 1024;
|
|
const sizes = ['B', 'KB', 'MB', 'GB'];
|
|
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
|
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
|
|
};
|
|
|
|
const getFailureCategoryColor = (category: string): "error" | "warning" | "info" | "default" => {
|
|
switch (category) {
|
|
case 'PDF Font Issues':
|
|
case 'PDF Corruption':
|
|
case 'PDF Parsing Error':
|
|
return 'warning';
|
|
case 'Timeout':
|
|
case 'Memory Limit':
|
|
return 'error';
|
|
case 'Low OCR Confidence':
|
|
return 'warning';
|
|
case 'Unknown Error':
|
|
return 'info';
|
|
default:
|
|
return 'default';
|
|
}
|
|
};
|
|
|
|
const toggleRowExpansion = (documentId: string) => {
|
|
const newExpanded = new Set(expandedRows);
|
|
if (newExpanded.has(documentId)) {
|
|
newExpanded.delete(documentId);
|
|
} else {
|
|
newExpanded.add(documentId);
|
|
}
|
|
setExpandedRows(newExpanded);
|
|
};
|
|
|
|
const showDocumentDetails = (document: FailedDocument) => {
|
|
setSelectedDocument(document);
|
|
setDetailsOpen(true);
|
|
};
|
|
|
|
const toggleDuplicateGroupExpansion = (groupHash: string) => {
|
|
const newExpanded = new Set(expandedDuplicateGroups);
|
|
if (newExpanded.has(groupHash)) {
|
|
newExpanded.delete(groupHash);
|
|
} else {
|
|
newExpanded.add(groupHash);
|
|
}
|
|
setExpandedDuplicateGroups(newExpanded);
|
|
};
|
|
|
|
const handleTabChange = (event: React.SyntheticEvent, newValue: number) => {
|
|
setCurrentTab(newValue);
|
|
};
|
|
|
|
const refreshCurrentTab = () => {
|
|
if (currentTab === 0) {
|
|
fetchFailedDocuments();
|
|
} else if (currentTab === 1) {
|
|
fetchDuplicates();
|
|
} else if (currentTab === 2) {
|
|
handlePreviewLowConfidence();
|
|
} else if (currentTab === 3) {
|
|
handlePreviewFailedDocuments();
|
|
} else if (currentTab === 4) {
|
|
fetchFailedDocumentsList();
|
|
}
|
|
};
|
|
|
|
// Low confidence document handlers
|
|
const handlePreviewLowConfidence = async () => {
|
|
try {
|
|
setLowConfidenceLoading(true);
|
|
const response = await documentService.deleteLowConfidence(confidenceThreshold, true);
|
|
setPreviewData(response.data);
|
|
setSnackbar({
|
|
open: true,
|
|
message: response.data.message,
|
|
severity: 'info'
|
|
});
|
|
} catch (error) {
|
|
setSnackbar({
|
|
open: true,
|
|
message: 'Failed to preview low confidence documents',
|
|
severity: 'error'
|
|
});
|
|
} finally {
|
|
setLowConfidenceLoading(false);
|
|
}
|
|
};
|
|
|
|
const handleDeleteLowConfidence = async () => {
|
|
if (!previewData || previewData.matched_count === 0) {
|
|
setSnackbar({
|
|
open: true,
|
|
message: 'No documents to delete',
|
|
severity: 'warning'
|
|
});
|
|
return;
|
|
}
|
|
|
|
try {
|
|
setLowConfidenceLoading(true);
|
|
const response = await documentService.deleteLowConfidence(confidenceThreshold, false);
|
|
setSnackbar({
|
|
open: true,
|
|
message: response.data.message,
|
|
severity: 'success'
|
|
});
|
|
setPreviewData(null);
|
|
setConfirmDeleteOpen(false);
|
|
|
|
// Refresh other tabs if they have data affected
|
|
if (currentTab === 0) {
|
|
fetchFailedDocuments();
|
|
}
|
|
} catch (error) {
|
|
setSnackbar({
|
|
open: true,
|
|
message: 'Failed to delete low confidence documents',
|
|
severity: 'error'
|
|
});
|
|
} finally {
|
|
setLowConfidenceLoading(false);
|
|
}
|
|
};
|
|
|
|
// Failed documents handlers
|
|
const handlePreviewFailedDocuments = async () => {
|
|
try {
|
|
setFailedDocsLoading(true);
|
|
const response = await documentService.deleteFailedOcr(true);
|
|
setFailedPreviewData(response.data);
|
|
} catch (error) {
|
|
setSnackbar({
|
|
open: true,
|
|
message: 'Failed to preview failed documents',
|
|
severity: 'error'
|
|
});
|
|
} finally {
|
|
setFailedDocsLoading(false);
|
|
}
|
|
};
|
|
|
|
const handleDeleteFailedDocuments = async () => {
|
|
try {
|
|
setFailedDocsLoading(true);
|
|
const response = await documentService.deleteFailedOcr(false);
|
|
|
|
setSnackbar({
|
|
open: true,
|
|
message: response.data.message,
|
|
severity: 'success'
|
|
});
|
|
setFailedPreviewData(null);
|
|
setConfirmDeleteFailedOpen(false);
|
|
|
|
// Refresh failed OCR tab if currently viewing it
|
|
if (currentTab === 0) {
|
|
fetchFailedDocuments();
|
|
}
|
|
} catch (error) {
|
|
setSnackbar({
|
|
open: true,
|
|
message: 'Failed to delete failed documents',
|
|
severity: 'error'
|
|
});
|
|
} finally {
|
|
setFailedDocsLoading(false);
|
|
}
|
|
};
|
|
|
|
if (loading && (!documents || documents.length === 0)) {
|
|
return (
|
|
<Box display="flex" justifyContent="center" alignItems="center" minHeight="400px">
|
|
<CircularProgress />
|
|
</Box>
|
|
);
|
|
}
|
|
|
|
return (
|
|
<Box sx={{ p: 3 }}>
|
|
<Box display="flex" justifyContent="space-between" alignItems="center" mb={3}>
|
|
<Typography variant="h4" component="h1">
|
|
Document Management
|
|
</Typography>
|
|
<Button
|
|
variant="outlined"
|
|
startIcon={<RefreshIcon />}
|
|
onClick={refreshCurrentTab}
|
|
disabled={loading || duplicatesLoading || retryingAll}
|
|
>
|
|
Refresh
|
|
</Button>
|
|
</Box>
|
|
|
|
<Paper sx={{ mb: 3 }}>
|
|
<Tabs value={currentTab} onChange={handleTabChange} aria-label="document management tabs">
|
|
<Tab
|
|
icon={<ErrorIcon />}
|
|
label={`Failed Documents${statistics ? ` (${statistics.total_failed})` : ''}`}
|
|
iconPosition="start"
|
|
/>
|
|
<Tab
|
|
icon={<FileCopyIcon />}
|
|
label={`Duplicate Files${duplicateStatistics ? ` (${duplicateStatistics.total_duplicate_groups})` : ''}`}
|
|
iconPosition="start"
|
|
/>
|
|
<Tab
|
|
icon={<FindInPageIcon />}
|
|
label={`Low Quality Manager${previewData ? ` (${previewData.matched_count})` : ''}`}
|
|
iconPosition="start"
|
|
/>
|
|
<Tab
|
|
icon={<DeleteIcon />}
|
|
label="Bulk Cleanup"
|
|
iconPosition="start"
|
|
/>
|
|
<Tab
|
|
icon={<WarningIcon />}
|
|
label="Failed Documents"
|
|
iconPosition="start"
|
|
/>
|
|
</Tabs>
|
|
</Paper>
|
|
|
|
{/* Failed OCR Tab Content */}
|
|
{currentTab === 0 && (
|
|
<>
|
|
{/* Statistics Overview */}
|
|
{statistics && (
|
|
<Grid container spacing={3} mb={3}>
|
|
<Grid item xs={12} md={4}>
|
|
<Card>
|
|
<CardContent>
|
|
<Typography variant="h6" color="error">
|
|
<ErrorIcon sx={{ mr: 1, verticalAlign: 'middle' }} />
|
|
Total Failed
|
|
</Typography>
|
|
<Typography variant="h3" color="error.main">
|
|
{statistics.total_failed}
|
|
</Typography>
|
|
<Box sx={{ mt: 2 }}>
|
|
<Button
|
|
variant="contained"
|
|
color="warning"
|
|
startIcon={retryingAll ? <CircularProgress size={20} /> : <RefreshIcon />}
|
|
onClick={handleRetryAllFailed}
|
|
disabled={retryingAll || statistics.total_failed === 0}
|
|
size="small"
|
|
fullWidth
|
|
>
|
|
{retryingAll ? 'Retrying All...' : 'Retry All Failed OCR'}
|
|
</Button>
|
|
</Box>
|
|
</CardContent>
|
|
</Card>
|
|
</Grid>
|
|
<Grid item xs={12} md={8}>
|
|
<Card>
|
|
<CardContent>
|
|
<Typography variant="h6" mb={2}>
|
|
Failure Categories
|
|
</Typography>
|
|
<Box display="flex" flexWrap="wrap" gap={1}>
|
|
{statistics?.by_reason ? Object.entries(statistics.by_reason).map(([reason, count]) => (
|
|
<Chip
|
|
key={reason}
|
|
label={`${reason}: ${count}`}
|
|
color={getFailureCategoryColor(reason)}
|
|
variant="outlined"
|
|
size="small"
|
|
/>
|
|
)) : (
|
|
<Typography variant="body2" color="text.secondary">
|
|
No failure data available
|
|
</Typography>
|
|
)}
|
|
</Box>
|
|
</CardContent>
|
|
</Card>
|
|
</Grid>
|
|
</Grid>
|
|
)}
|
|
|
|
{(!documents || documents.length === 0) ? (
|
|
<Alert severity="success" sx={{ mt: 2 }}>
|
|
<AlertTitle>Great news!</AlertTitle>
|
|
No documents have failed OCR processing. All your documents are processing successfully.
|
|
</Alert>
|
|
) : (
|
|
<>
|
|
<Alert severity="info" sx={{ mb: 2 }}>
|
|
<AlertTitle>OCR Failures</AlertTitle>
|
|
These documents failed OCR processing. You can retry OCR with detailed output to understand why failures occurred.
|
|
Common causes include corrupted PDFs, unsupported fonts, or memory limitations.
|
|
</Alert>
|
|
|
|
<TableContainer component={Paper}>
|
|
<Table>
|
|
<TableHead>
|
|
<TableRow>
|
|
<TableCell />
|
|
<TableCell>Document</TableCell>
|
|
<TableCell>Failure Type</TableCell>
|
|
<TableCell>Retry Count</TableCell>
|
|
<TableCell>Last Failed</TableCell>
|
|
<TableCell>Actions</TableCell>
|
|
</TableRow>
|
|
</TableHead>
|
|
<TableBody>
|
|
{(documents || []).map((document) => (
|
|
<React.Fragment key={document.id}>
|
|
<TableRow>
|
|
<TableCell>
|
|
<IconButton
|
|
size="small"
|
|
onClick={() => toggleRowExpansion(document.id)}
|
|
>
|
|
{expandedRows.has(document.id) ? <ExpandLessIcon /> : <ExpandMoreIcon />}
|
|
</IconButton>
|
|
</TableCell>
|
|
<TableCell>
|
|
<Box>
|
|
<Typography variant="body2" fontWeight="bold">
|
|
{document.filename}
|
|
</Typography>
|
|
<Typography variant="caption" color="text.secondary">
|
|
{formatFileSize(document.file_size)} • {document.mime_type}
|
|
</Typography>
|
|
</Box>
|
|
</TableCell>
|
|
<TableCell>
|
|
<Chip
|
|
label={document.failure_category}
|
|
color={getFailureCategoryColor(document.failure_category)}
|
|
size="small"
|
|
/>
|
|
</TableCell>
|
|
<TableCell>
|
|
<Typography variant="body2">
|
|
{document.retry_count} attempts
|
|
</Typography>
|
|
</TableCell>
|
|
<TableCell>
|
|
<Typography variant="body2">
|
|
{document.updated_at ? format(new Date(document.updated_at), 'MMM dd, yyyy HH:mm') : 'Unknown'}
|
|
</Typography>
|
|
</TableCell>
|
|
<TableCell>
|
|
<Box display="flex" gap={1}>
|
|
<Tooltip title="Retry OCR">
|
|
<IconButton
|
|
size="small"
|
|
onClick={() => handleRetryOcr(document)}
|
|
disabled={retrying === document.id || !document.can_retry}
|
|
>
|
|
{retrying === document.id ? (
|
|
<CircularProgress size={16} />
|
|
) : (
|
|
<RefreshIcon />
|
|
)}
|
|
</IconButton>
|
|
</Tooltip>
|
|
<Tooltip title="View Details">
|
|
<IconButton
|
|
size="small"
|
|
onClick={() => showDocumentDetails(document)}
|
|
>
|
|
<VisibilityIcon />
|
|
</IconButton>
|
|
</Tooltip>
|
|
<Tooltip title="Download Document">
|
|
<IconButton
|
|
size="small"
|
|
onClick={async () => {
|
|
try {
|
|
await documentService.downloadFile(document.id, document.original_filename || document.filename);
|
|
} catch (error) {
|
|
console.error('Download failed:', error);
|
|
}
|
|
}}
|
|
>
|
|
<DownloadIcon />
|
|
</IconButton>
|
|
</Tooltip>
|
|
</Box>
|
|
</TableCell>
|
|
</TableRow>
|
|
<TableRow>
|
|
<TableCell sx={{ paddingBottom: 0, paddingTop: 0 }} colSpan={6}>
|
|
<Collapse in={expandedRows.has(document.id)} timeout="auto" unmountOnExit>
|
|
<Box sx={{
|
|
margin: 1,
|
|
p: 2,
|
|
bgcolor: (theme) => theme.palette.mode === 'dark' ? 'grey.900' : 'grey.50',
|
|
borderRadius: 1
|
|
}}>
|
|
<Typography variant="h6" gutterBottom>
|
|
Error Details
|
|
</Typography>
|
|
<Grid container spacing={2}>
|
|
<Grid item xs={12} md={6}>
|
|
<Typography variant="body2" color="text.secondary">
|
|
<strong>Failure Reason:</strong>
|
|
</Typography>
|
|
<Typography variant="body2" sx={{ mb: 1 }}>
|
|
{document.ocr_failure_reason || 'Not specified'}
|
|
</Typography>
|
|
|
|
<Typography variant="body2" color="text.secondary">
|
|
<strong>Error Message:</strong>
|
|
</Typography>
|
|
<Typography
|
|
variant="body2"
|
|
sx={{
|
|
fontFamily: 'monospace',
|
|
bgcolor: (theme) => theme.palette.mode === 'dark' ? 'grey.800' : 'grey.100',
|
|
p: 1,
|
|
borderRadius: 1,
|
|
fontSize: '0.75rem',
|
|
wordBreak: 'break-word'
|
|
}}
|
|
>
|
|
{document.ocr_error || 'No error message available'}
|
|
</Typography>
|
|
</Grid>
|
|
<Grid item xs={12} md={6}>
|
|
<Typography variant="body2" color="text.secondary">
|
|
<strong>Last Attempt:</strong>
|
|
</Typography>
|
|
<Typography variant="body2" sx={{ mb: 1 }}>
|
|
{document.last_attempt_at
|
|
? format(new Date(document.last_attempt_at), 'PPpp')
|
|
: 'No previous attempts'}
|
|
</Typography>
|
|
|
|
<Typography variant="body2" color="text.secondary">
|
|
<strong>File Created:</strong>
|
|
</Typography>
|
|
<Typography variant="body2">
|
|
{format(new Date(document.created_at), 'PPpp')}
|
|
</Typography>
|
|
</Grid>
|
|
</Grid>
|
|
</Box>
|
|
</Collapse>
|
|
</TableCell>
|
|
</TableRow>
|
|
</React.Fragment>
|
|
))}
|
|
</TableBody>
|
|
</Table>
|
|
</TableContainer>
|
|
|
|
{/* Pagination */}
|
|
{totalPages > 1 && (
|
|
<Box display="flex" justifyContent="center" mt={3}>
|
|
<Pagination
|
|
count={totalPages}
|
|
page={pagination.page}
|
|
onChange={(_, page) => setPagination(prev => ({ ...prev, page }))}
|
|
color="primary"
|
|
/>
|
|
</Box>
|
|
)}
|
|
</>
|
|
)}
|
|
</>
|
|
)}
|
|
|
|
{/* Duplicates Tab Content */}
|
|
{currentTab === 1 && (
|
|
<>
|
|
{/* Duplicate Statistics Overview */}
|
|
{duplicateStatistics && (
|
|
<Grid container spacing={3} mb={3}>
|
|
<Grid item xs={12} md={6}>
|
|
<Card>
|
|
<CardContent>
|
|
<Typography variant="h6" color="warning.main">
|
|
<FileCopyIcon sx={{ mr: 1, verticalAlign: 'middle' }} />
|
|
Total Duplicate Groups
|
|
</Typography>
|
|
<Typography variant="h3" color="warning.main">
|
|
{duplicateStatistics.total_duplicate_groups}
|
|
</Typography>
|
|
</CardContent>
|
|
</Card>
|
|
</Grid>
|
|
</Grid>
|
|
)}
|
|
|
|
{duplicatesLoading ? (
|
|
<Box display="flex" justifyContent="center" alignItems="center" minHeight="400px">
|
|
<CircularProgress />
|
|
</Box>
|
|
) : duplicates.length === 0 ? (
|
|
<Alert severity="success" sx={{ mt: 2 }}>
|
|
<AlertTitle>No duplicates found!</AlertTitle>
|
|
You don't have any duplicate documents. All your files have unique content.
|
|
</Alert>
|
|
) : (
|
|
<>
|
|
<Alert severity="info" sx={{ mb: 2 }}>
|
|
<AlertTitle>Duplicate Documents Found</AlertTitle>
|
|
These documents have identical content but may have different filenames.
|
|
You can expand each group to see all files with the same content and choose which ones to keep.
|
|
</Alert>
|
|
|
|
<Alert severity="warning" sx={{ mb: 2 }}>
|
|
<AlertTitle>What should you do?</AlertTitle>
|
|
<Typography variant="body2" component="div" sx={{ mt: 1, mb: 0 }}>
|
|
<Box component="ul" sx={{ pl: 2, mt: 0, mb: 0 }}>
|
|
<li><strong>Review each group:</strong> Click to expand and see all duplicate files</li>
|
|
<li><strong>Keep the best version:</strong> Choose the file with the most descriptive name</li>
|
|
<li><strong>Check content:</strong> Use View/Download to verify files are truly identical</li>
|
|
<li><strong>Note for admin:</strong> Consider implementing bulk delete functionality for duplicates</li>
|
|
</Box>
|
|
</Typography>
|
|
</Alert>
|
|
|
|
<TableContainer component={Paper}>
|
|
<Table>
|
|
<TableHead>
|
|
<TableRow>
|
|
<TableCell />
|
|
<TableCell>Content Hash</TableCell>
|
|
<TableCell>Duplicate Count</TableCell>
|
|
<TableCell>First Uploaded</TableCell>
|
|
<TableCell>Last Uploaded</TableCell>
|
|
<TableCell>Actions</TableCell>
|
|
</TableRow>
|
|
</TableHead>
|
|
<TableBody>
|
|
{duplicates.map((group) => (
|
|
<React.Fragment key={group.file_hash}>
|
|
<TableRow>
|
|
<TableCell>
|
|
<IconButton
|
|
size="small"
|
|
onClick={() => toggleDuplicateGroupExpansion(group.file_hash)}
|
|
>
|
|
{expandedDuplicateGroups.has(group.file_hash) ? <ExpandLessIcon /> : <ExpandMoreIcon />}
|
|
</IconButton>
|
|
</TableCell>
|
|
<TableCell>
|
|
<Typography variant="body2" fontFamily="monospace">
|
|
{group.file_hash.substring(0, 16)}...
|
|
</Typography>
|
|
</TableCell>
|
|
<TableCell>
|
|
<Chip
|
|
label={`${group.duplicate_count} files`}
|
|
color="warning"
|
|
size="small"
|
|
/>
|
|
</TableCell>
|
|
<TableCell>
|
|
<Typography variant="body2">
|
|
{format(new Date(group.first_uploaded), 'MMM dd, yyyy')}
|
|
</Typography>
|
|
</TableCell>
|
|
<TableCell>
|
|
<Typography variant="body2">
|
|
{format(new Date(group.last_uploaded), 'MMM dd, yyyy')}
|
|
</Typography>
|
|
</TableCell>
|
|
<TableCell>
|
|
<Typography variant="body2" color="text.secondary">
|
|
View files below
|
|
</Typography>
|
|
</TableCell>
|
|
</TableRow>
|
|
<TableRow>
|
|
<TableCell sx={{ paddingBottom: 0, paddingTop: 0 }} colSpan={6}>
|
|
<Collapse in={expandedDuplicateGroups.has(group.file_hash)} timeout="auto" unmountOnExit>
|
|
<Box
|
|
sx={{
|
|
margin: 1,
|
|
p: 3,
|
|
background: theme.palette.mode === 'light'
|
|
? 'rgba(248, 250, 252, 0.8)'
|
|
: 'rgba(30, 30, 30, 0.8)',
|
|
backdropFilter: 'blur(10px)',
|
|
borderRadius: 2,
|
|
border: `1px solid ${theme.palette.divider}`,
|
|
}}
|
|
>
|
|
<Typography variant="h6" gutterBottom sx={{
|
|
color: theme.palette.primary.main,
|
|
display: 'flex',
|
|
alignItems: 'center',
|
|
gap: 1
|
|
}}>
|
|
<FileCopyIcon />
|
|
Duplicate Files ({group.duplicate_count} total)
|
|
</Typography>
|
|
|
|
<Alert severity="info" sx={{ mb: 2, fontSize: '0.875rem' }}>
|
|
<strong>Storage Impact:</strong> These {group.duplicate_count} files contain identical content.
|
|
Consider keeping only the best-named version to save space.
|
|
</Alert>
|
|
|
|
<Grid container spacing={2}>
|
|
{group.documents.map((doc, index) => (
|
|
<Grid item xs={12} md={6} lg={4} key={doc.id}>
|
|
<Card
|
|
variant="outlined"
|
|
sx={{
|
|
background: theme.palette.mode === 'light'
|
|
? 'rgba(255, 255, 255, 0.9)'
|
|
: 'rgba(40, 40, 40, 0.9)',
|
|
backdropFilter: 'blur(5px)',
|
|
border: `1px solid ${theme.palette.divider}`,
|
|
transition: 'all 0.2s ease',
|
|
'&:hover': {
|
|
transform: 'translateY(-2px)',
|
|
boxShadow: theme.shadows[4],
|
|
}
|
|
}}
|
|
>
|
|
<CardContent>
|
|
<Box display="flex" justifyContent="space-between" alignItems="flex-start" mb={1}>
|
|
<Typography variant="body2" fontWeight="bold" sx={{
|
|
color: theme.palette.text.primary,
|
|
wordBreak: 'break-word',
|
|
flex: 1,
|
|
mr: 1
|
|
}}>
|
|
{doc.filename}
|
|
</Typography>
|
|
{index === 0 && (
|
|
<Chip
|
|
label="First"
|
|
size="small"
|
|
color="primary"
|
|
variant="outlined"
|
|
/>
|
|
)}
|
|
</Box>
|
|
|
|
{doc.original_filename !== doc.filename && (
|
|
<Typography variant="caption" color="text.secondary" display="block">
|
|
Original: {doc.original_filename}
|
|
</Typography>
|
|
)}
|
|
|
|
<Typography variant="caption" display="block" color="text.secondary" sx={{ mb: 1 }}>
|
|
{formatFileSize(doc.file_size)} • {doc.mime_type}
|
|
</Typography>
|
|
|
|
<Typography variant="caption" display="block" color="text.secondary" sx={{ mb: 2 }}>
|
|
Uploaded: {format(new Date(doc.created_at), 'MMM dd, yyyy HH:mm')}
|
|
</Typography>
|
|
|
|
<Box display="flex" justifyContent="space-between" alignItems="center">
|
|
<Box>
|
|
<Tooltip title="View Document">
|
|
<IconButton
|
|
size="small"
|
|
onClick={() => window.open(`/api/documents/${doc.id}/view`, '_blank')}
|
|
sx={{ color: theme.palette.primary.main }}
|
|
>
|
|
<VisibilityIcon />
|
|
</IconButton>
|
|
</Tooltip>
|
|
<Tooltip title="Download Document">
|
|
<IconButton
|
|
size="small"
|
|
onClick={async () => {
|
|
try {
|
|
await documentService.downloadFile(doc.id, doc.original_filename || doc.filename);
|
|
} catch (error) {
|
|
console.error('Download failed:', error);
|
|
}
|
|
}}
|
|
sx={{ color: theme.palette.secondary.main }}
|
|
>
|
|
<DownloadIcon />
|
|
</IconButton>
|
|
</Tooltip>
|
|
</Box>
|
|
|
|
<Tooltip title="Get document details and duplicate information">
|
|
<Button
|
|
size="small"
|
|
variant="outlined"
|
|
color="info"
|
|
startIcon={<FindInPageIcon />}
|
|
sx={{ fontSize: '0.75rem' }}
|
|
onClick={() => {
|
|
setSnackbar({
|
|
open: true,
|
|
message: `Document "${doc.filename}" has ${group.duplicate_count - 1} duplicate(s). Content hash: ${group.file_hash.substring(0, 16)}...`,
|
|
severity: 'info'
|
|
});
|
|
}}
|
|
>
|
|
Info
|
|
</Button>
|
|
</Tooltip>
|
|
</Box>
|
|
</CardContent>
|
|
</Card>
|
|
</Grid>
|
|
))}
|
|
</Grid>
|
|
</Box>
|
|
</Collapse>
|
|
</TableCell>
|
|
</TableRow>
|
|
</React.Fragment>
|
|
))}
|
|
</TableBody>
|
|
</Table>
|
|
</TableContainer>
|
|
|
|
{/* Duplicates Pagination */}
|
|
{duplicatesTotalPages > 1 && (
|
|
<Box display="flex" justifyContent="center" mt={3}>
|
|
<Pagination
|
|
count={duplicatesTotalPages}
|
|
page={duplicatesPagination.page}
|
|
onChange={(_, page) => setDuplicatesPagination(prev => ({ ...prev, page }))}
|
|
color="primary"
|
|
/>
|
|
</Box>
|
|
)}
|
|
</>
|
|
)}
|
|
</>
|
|
)}
|
|
|
|
{/* Low Confidence Documents Tab Content */}
|
|
{currentTab === 2 && (
|
|
<>
|
|
<Alert severity="info" sx={{ mb: 3 }}>
|
|
<AlertTitle>Low Confidence Document Deletion</AlertTitle>
|
|
<Typography>
|
|
This tool allows you to delete documents with OCR confidence below a specified threshold.
|
|
Use the preview feature first to see what documents would be affected before deleting.
|
|
</Typography>
|
|
</Alert>
|
|
|
|
<Card sx={{ mb: 3 }}>
|
|
<CardContent>
|
|
<Grid container spacing={3} alignItems="center">
|
|
<Grid item xs={12} md={4}>
|
|
<TextField
|
|
label="Maximum Confidence Threshold (%)"
|
|
type="number"
|
|
value={confidenceThreshold}
|
|
onChange={(e) => setConfidenceThreshold(Math.max(0, Math.min(100, Number(e.target.value))))}
|
|
fullWidth
|
|
inputProps={{ min: 0, max: 100, step: 1 }}
|
|
helperText="Documents with confidence below this value will be deleted"
|
|
/>
|
|
</Grid>
|
|
<Grid item xs={12} md={4}>
|
|
<Button
|
|
variant="outlined"
|
|
onClick={handlePreviewLowConfidence}
|
|
disabled={lowConfidenceLoading}
|
|
startIcon={lowConfidenceLoading ? <CircularProgress size={20} /> : <FindInPageIcon />}
|
|
fullWidth
|
|
>
|
|
Preview Documents
|
|
</Button>
|
|
</Grid>
|
|
<Grid item xs={12} md={4}>
|
|
<Button
|
|
variant="contained"
|
|
color="warning"
|
|
onClick={() => setConfirmDeleteOpen(true)}
|
|
disabled={!previewData || previewData.matched_count === 0 || lowConfidenceLoading}
|
|
startIcon={<DeleteIcon />}
|
|
fullWidth
|
|
>
|
|
Delete Low Confidence Documents
|
|
</Button>
|
|
</Grid>
|
|
</Grid>
|
|
</CardContent>
|
|
</Card>
|
|
|
|
{/* Preview Results */}
|
|
{previewData && (
|
|
<Card sx={{ mb: 3 }}>
|
|
<CardContent>
|
|
<Typography variant="h6" gutterBottom>
|
|
Preview Results
|
|
</Typography>
|
|
<Typography color={previewData.matched_count > 0 ? 'warning.main' : 'success.main'}>
|
|
{previewData.message}
|
|
</Typography>
|
|
{previewData.matched_count > 0 && previewData.documents && (
|
|
<Box sx={{ mt: 2 }}>
|
|
<Typography variant="body2" color="text.secondary" gutterBottom>
|
|
Documents that would be deleted:
|
|
</Typography>
|
|
<TableContainer component={Paper} variant="outlined" sx={{ mt: 2 }}>
|
|
<Table size="small">
|
|
<TableHead>
|
|
<TableRow>
|
|
<TableCell>Filename</TableCell>
|
|
<TableCell>Size</TableCell>
|
|
<TableCell>OCR Confidence</TableCell>
|
|
<TableCell>Status</TableCell>
|
|
<TableCell>Date</TableCell>
|
|
</TableRow>
|
|
</TableHead>
|
|
<TableBody>
|
|
{previewData.documents.slice(0, 20).map((doc: any) => (
|
|
<TableRow key={doc.id}>
|
|
<TableCell>
|
|
<Typography variant="body2" noWrap>
|
|
{doc.original_filename || doc.filename}
|
|
</Typography>
|
|
</TableCell>
|
|
<TableCell>
|
|
<Typography variant="body2">
|
|
{formatFileSize(doc.file_size)}
|
|
</Typography>
|
|
</TableCell>
|
|
<TableCell>
|
|
<Typography variant="body2" color={doc.ocr_confidence ? 'warning.main' : 'error.main'}>
|
|
{doc.ocr_confidence ? `${doc.ocr_confidence.toFixed(1)}%` : 'N/A'}
|
|
</Typography>
|
|
</TableCell>
|
|
<TableCell>
|
|
<Chip
|
|
size="small"
|
|
label={doc.ocr_status || 'Unknown'}
|
|
color={doc.ocr_status === 'failed' ? 'error' : 'default'}
|
|
/>
|
|
</TableCell>
|
|
<TableCell>
|
|
<Typography variant="body2">
|
|
{new Date(doc.created_at).toLocaleDateString()}
|
|
</Typography>
|
|
</TableCell>
|
|
</TableRow>
|
|
))}
|
|
</TableBody>
|
|
</Table>
|
|
</TableContainer>
|
|
{previewData.documents.length > 20 && (
|
|
<Typography variant="body2" color="text.secondary" sx={{ mt: 1 }}>
|
|
... and {previewData.documents.length - 20} more documents
|
|
</Typography>
|
|
)}
|
|
</Box>
|
|
)}
|
|
</CardContent>
|
|
</Card>
|
|
)}
|
|
|
|
{/* Loading State */}
|
|
{lowConfidenceLoading && !previewData && (
|
|
<Box display="flex" justifyContent="center" alignItems="center" minHeight="200px">
|
|
<CircularProgress />
|
|
<Typography sx={{ ml: 2 }}>Processing request...</Typography>
|
|
</Box>
|
|
)}
|
|
</>
|
|
)}
|
|
|
|
{/* Delete Failed Documents Tab Content */}
|
|
{currentTab === 3 && (
|
|
<>
|
|
<Alert severity="warning" sx={{ mb: 3 }}>
|
|
<AlertTitle>Delete Failed OCR Documents</AlertTitle>
|
|
<Typography>
|
|
This tool allows you to delete all documents where OCR processing failed completely.
|
|
This includes documents with NULL confidence values or explicit failure status.
|
|
Use the preview feature first to see what documents would be affected before deleting.
|
|
</Typography>
|
|
</Alert>
|
|
|
|
<Card sx={{ mb: 3 }}>
|
|
<CardContent>
|
|
<Grid container spacing={3} alignItems="center">
|
|
<Grid item xs={12} md={6}>
|
|
<Button
|
|
variant="outlined"
|
|
onClick={handlePreviewFailedDocuments}
|
|
disabled={failedDocsLoading}
|
|
startIcon={failedDocsLoading ? <CircularProgress size={20} /> : <FindInPageIcon />}
|
|
fullWidth
|
|
>
|
|
Preview Failed Documents
|
|
</Button>
|
|
</Grid>
|
|
<Grid item xs={12} md={6}>
|
|
<Button
|
|
variant="contained"
|
|
color="error"
|
|
onClick={() => setConfirmDeleteFailedOpen(true)}
|
|
disabled={!failedPreviewData || failedPreviewData.matched_count === 0 || failedDocsLoading}
|
|
startIcon={<DeleteIcon />}
|
|
fullWidth
|
|
>
|
|
Delete Failed Documents
|
|
</Button>
|
|
</Grid>
|
|
</Grid>
|
|
</CardContent>
|
|
</Card>
|
|
|
|
{/* Preview Results */}
|
|
{failedPreviewData && (
|
|
<Card sx={{ mb: 3 }}>
|
|
<CardContent>
|
|
<Typography variant="h6" gutterBottom>
|
|
Preview Results
|
|
</Typography>
|
|
<Typography color={failedPreviewData.matched_count > 0 ? 'error.main' : 'success.main'}>
|
|
{failedPreviewData.message}
|
|
</Typography>
|
|
{failedPreviewData.matched_count > 0 && (
|
|
<Box sx={{ mt: 2 }}>
|
|
<Typography variant="body2" color="text.secondary">
|
|
Document IDs that would be deleted:
|
|
</Typography>
|
|
<Typography variant="body2" sx={{ fontFamily: 'monospace', wordBreak: 'break-all' }}>
|
|
{failedPreviewData.document_ids.slice(0, 10).join(', ')}
|
|
{failedPreviewData.document_ids.length > 10 && ` ... and ${failedPreviewData.document_ids.length - 10} more`}
|
|
</Typography>
|
|
</Box>
|
|
)}
|
|
</CardContent>
|
|
</Card>
|
|
)}
|
|
|
|
{/* Loading State */}
|
|
{failedDocsLoading && !failedPreviewData && (
|
|
<Box display="flex" justifyContent="center" alignItems="center" minHeight="200px">
|
|
<CircularProgress />
|
|
<Typography sx={{ ml: 2 }}>Processing request...</Typography>
|
|
</Box>
|
|
)}
|
|
</>
|
|
)}
|
|
|
|
{/* Failed Documents Tab Content */}
|
|
{currentTab === 4 && (
|
|
<>
|
|
<Alert severity="info" sx={{ mb: 3 }}>
|
|
<AlertTitle>Failed Documents Overview</AlertTitle>
|
|
<Typography>
|
|
This shows all documents that failed at any stage of processing: ingestion, validation, OCR, storage, etc.
|
|
Use the filters to narrow down by failure stage or specific reason.
|
|
</Typography>
|
|
</Alert>
|
|
|
|
{/* Filter Controls */}
|
|
<Card sx={{ mb: 3 }}>
|
|
<CardContent>
|
|
<Grid container spacing={3} alignItems="center">
|
|
<Grid item xs={12} md={3}>
|
|
<TextField
|
|
label="Filter by Stage"
|
|
select
|
|
value={failedDocumentsFilters.stage || ''}
|
|
onChange={(e) => setFailedDocumentsFilters(prev => ({ ...prev, stage: e.target.value || undefined }))}
|
|
fullWidth
|
|
SelectProps={{ native: true }}
|
|
>
|
|
<option value="">All Stages</option>
|
|
<option value="ocr">OCR Processing</option>
|
|
<option value="ingestion">Document Ingestion</option>
|
|
<option value="validation">Validation</option>
|
|
<option value="storage">File Storage</option>
|
|
<option value="processing">Processing</option>
|
|
<option value="sync">Synchronization</option>
|
|
</TextField>
|
|
</Grid>
|
|
<Grid item xs={12} md={3}>
|
|
<TextField
|
|
label="Filter by Reason"
|
|
select
|
|
value={failedDocumentsFilters.reason || ''}
|
|
onChange={(e) => setFailedDocumentsFilters(prev => ({ ...prev, reason: e.target.value || undefined }))}
|
|
fullWidth
|
|
SelectProps={{ native: true }}
|
|
>
|
|
<option value="">All Reasons</option>
|
|
<option value="duplicate_content">Duplicate Content</option>
|
|
<option value="low_ocr_confidence">Low OCR Confidence</option>
|
|
<option value="unsupported_format">Unsupported Format</option>
|
|
<option value="file_too_large">File Too Large</option>
|
|
<option value="file_corrupted">File Corrupted</option>
|
|
<option value="ocr_timeout">OCR Timeout</option>
|
|
<option value="pdf_parsing_error">PDF Parsing Error</option>
|
|
<option value="other">Other</option>
|
|
</TextField>
|
|
</Grid>
|
|
<Grid item xs={12} md={2}>
|
|
<Button
|
|
variant="outlined"
|
|
onClick={fetchFailedDocumentsList}
|
|
disabled={failedDocumentsLoading}
|
|
startIcon={failedDocumentsLoading ? <CircularProgress size={20} /> : <RefreshIcon />}
|
|
fullWidth
|
|
>
|
|
Apply Filters
|
|
</Button>
|
|
</Grid>
|
|
</Grid>
|
|
</CardContent>
|
|
</Card>
|
|
|
|
{/* Failed Documents List */}
|
|
{failedDocuments.length > 0 && (
|
|
<Card sx={{ mb: 3 }}>
|
|
<CardContent>
|
|
<Typography variant="h6" gutterBottom>
|
|
Failed Documents ({failedDocuments.length})
|
|
</Typography>
|
|
<TableContainer component={Paper} variant="outlined">
|
|
<Table>
|
|
<TableHead>
|
|
<TableRow>
|
|
<TableCell>Filename</TableCell>
|
|
<TableCell>Stage</TableCell>
|
|
<TableCell>Reason</TableCell>
|
|
<TableCell>Size</TableCell>
|
|
<TableCell>Date</TableCell>
|
|
<TableCell>Actions</TableCell>
|
|
</TableRow>
|
|
</TableHead>
|
|
<TableBody>
|
|
{failedDocuments.slice((failedDocumentsPagination.page - 1) * failedDocumentsPagination.limit, failedDocumentsPagination.page * failedDocumentsPagination.limit).map((doc: any) => (
|
|
<TableRow key={doc.id}>
|
|
<TableCell>
|
|
<Typography variant="body2" noWrap>
|
|
{doc.original_filename || doc.filename}
|
|
</Typography>
|
|
{doc.ingestion_source && (
|
|
<Chip size="small" label={doc.ingestion_source} variant="outlined" sx={{ mt: 0.5 }} />
|
|
)}
|
|
</TableCell>
|
|
<TableCell>
|
|
<Chip
|
|
size="small"
|
|
label={doc.source || doc.failure_stage}
|
|
color={doc.failure_stage === 'ocr' ? 'error' : 'warning'}
|
|
/>
|
|
</TableCell>
|
|
<TableCell>
|
|
<Chip
|
|
size="small"
|
|
label={doc.failure_category || doc.failure_reason}
|
|
color={getFailureReasonColor(doc.failure_reason)}
|
|
/>
|
|
</TableCell>
|
|
<TableCell>
|
|
<Typography variant="body2">
|
|
{doc.file_size ? formatFileSize(doc.file_size) : 'N/A'}
|
|
</Typography>
|
|
</TableCell>
|
|
<TableCell>
|
|
<Typography variant="body2">
|
|
{new Date(doc.created_at).toLocaleDateString()}
|
|
</Typography>
|
|
</TableCell>
|
|
<TableCell>
|
|
<IconButton
|
|
size="small"
|
|
onClick={() => setSelectedFailedDocument(doc)}
|
|
title="View Details"
|
|
>
|
|
<InfoIcon />
|
|
</IconButton>
|
|
{doc.existing_document_id && (
|
|
<IconButton
|
|
size="small"
|
|
onClick={() => navigate(`/documents/${doc.existing_document_id}`)}
|
|
title="View Existing Document"
|
|
>
|
|
<OpenInNewIcon />
|
|
</IconButton>
|
|
)}
|
|
</TableCell>
|
|
</TableRow>
|
|
))}
|
|
</TableBody>
|
|
</Table>
|
|
</TableContainer>
|
|
|
|
{/* Pagination */}
|
|
{failedDocumentsTotalPages > 1 && (
|
|
<Box display="flex" justifyContent="center" mt={2}>
|
|
<Pagination
|
|
count={failedDocumentsTotalPages}
|
|
page={failedDocumentsPagination.page}
|
|
onChange={(_, page) => setFailedDocumentsPagination(prev => ({ ...prev, page }))}
|
|
color="primary"
|
|
/>
|
|
</Box>
|
|
)}
|
|
</CardContent>
|
|
</Card>
|
|
)}
|
|
|
|
{/* Loading State */}
|
|
{failedDocumentsLoading && (
|
|
<Box display="flex" justifyContent="center" alignItems="center" minHeight="200px">
|
|
<CircularProgress />
|
|
<Typography sx={{ ml: 2 }}>Loading failed documents...</Typography>
|
|
</Box>
|
|
)}
|
|
|
|
{/* Empty State */}
|
|
{!failedDocumentsLoading && failedDocuments.length === 0 && (
|
|
<Alert severity="success">
|
|
<AlertTitle>No Failed Documents Found</AlertTitle>
|
|
<Typography>
|
|
No documents have failed processing with the current filters. This is good!
|
|
</Typography>
|
|
</Alert>
|
|
)}
|
|
</>
|
|
)}
|
|
|
|
{/* Confirmation Dialog */}
|
|
<Dialog
|
|
open={confirmDeleteOpen}
|
|
onClose={() => setConfirmDeleteOpen(false)}
|
|
maxWidth="sm"
|
|
fullWidth
|
|
>
|
|
<DialogTitle color="warning.main">
|
|
<DeleteIcon sx={{ mr: 1, verticalAlign: 'middle' }} />
|
|
Confirm Low Confidence Document Deletion
|
|
</DialogTitle>
|
|
<DialogContent>
|
|
<Typography>
|
|
Are you sure you want to delete {previewData?.matched_count || 0} documents with OCR confidence below {confidenceThreshold}%?
|
|
</Typography>
|
|
<Alert severity="warning" sx={{ mt: 2 }}>
|
|
This action cannot be undone. The documents and their files will be permanently deleted.
|
|
</Alert>
|
|
</DialogContent>
|
|
<DialogActions>
|
|
<Button onClick={() => setConfirmDeleteOpen(false)}>
|
|
Cancel
|
|
</Button>
|
|
<Button
|
|
onClick={handleDeleteLowConfidence}
|
|
color="warning"
|
|
variant="contained"
|
|
disabled={lowConfidenceLoading}
|
|
startIcon={lowConfidenceLoading ? <CircularProgress size={20} /> : <DeleteIcon />}
|
|
>
|
|
{lowConfidenceLoading ? 'Deleting...' : 'Delete Documents'}
|
|
</Button>
|
|
</DialogActions>
|
|
</Dialog>
|
|
|
|
{/* Confirmation Dialog for Failed Documents */}
|
|
<Dialog
|
|
open={confirmDeleteFailedOpen}
|
|
onClose={() => setConfirmDeleteFailedOpen(false)}
|
|
maxWidth="sm"
|
|
fullWidth
|
|
>
|
|
<DialogTitle color="error.main">
|
|
<DeleteIcon sx={{ mr: 1, verticalAlign: 'middle' }} />
|
|
Confirm Failed Document Deletion
|
|
</DialogTitle>
|
|
<DialogContent>
|
|
<Typography>
|
|
Are you sure you want to delete {failedPreviewData?.matched_count || 0} documents with failed OCR processing?
|
|
</Typography>
|
|
<Alert severity="error" sx={{ mt: 2 }}>
|
|
This action cannot be undone. The documents and their files will be permanently deleted.
|
|
</Alert>
|
|
</DialogContent>
|
|
<DialogActions>
|
|
<Button onClick={() => setConfirmDeleteFailedOpen(false)}>
|
|
Cancel
|
|
</Button>
|
|
<Button
|
|
onClick={handleDeleteFailedDocuments}
|
|
color="error"
|
|
variant="contained"
|
|
disabled={failedDocsLoading}
|
|
startIcon={failedDocsLoading ? <CircularProgress size={20} /> : <DeleteIcon />}
|
|
>
|
|
{failedDocsLoading ? 'Deleting...' : 'Delete Failed Documents'}
|
|
</Button>
|
|
</DialogActions>
|
|
</Dialog>
|
|
|
|
{/* Document Details Dialog */}
|
|
<Dialog
|
|
open={detailsOpen}
|
|
onClose={() => setDetailsOpen(false)}
|
|
maxWidth="lg"
|
|
fullWidth
|
|
>
|
|
<DialogTitle>
|
|
Document Details: {selectedDocument?.filename}
|
|
</DialogTitle>
|
|
<DialogContent>
|
|
{selectedDocument && (
|
|
<Grid container spacing={3}>
|
|
{/* File Preview Section */}
|
|
<Grid item xs={12} md={6}>
|
|
<Typography variant="h6" sx={{ mb: 2 }}>
|
|
File Preview
|
|
</Typography>
|
|
<Box
|
|
onClick={() => {
|
|
if (selectedDocument) {
|
|
navigate(`/documents/${selectedDocument.id}`);
|
|
}
|
|
}}
|
|
sx={{
|
|
cursor: 'pointer',
|
|
border: '2px dashed',
|
|
borderColor: 'primary.main',
|
|
borderRadius: 2,
|
|
p: 1,
|
|
transition: 'all 0.2s ease-in-out',
|
|
'&:hover': {
|
|
borderColor: 'primary.dark',
|
|
boxShadow: 2,
|
|
},
|
|
}}
|
|
>
|
|
<DocumentViewer
|
|
documentId={selectedDocument.id}
|
|
filename={selectedDocument.original_filename}
|
|
mimeType={selectedDocument.mime_type}
|
|
/>
|
|
<Box sx={{ mt: 1, textAlign: 'center' }}>
|
|
<Typography variant="caption" color="primary.main">
|
|
Click to open full document details page
|
|
</Typography>
|
|
</Box>
|
|
</Box>
|
|
</Grid>
|
|
|
|
{/* Document Information Section */}
|
|
<Grid item xs={12} md={6}>
|
|
<Typography variant="h6" sx={{ mb: 2 }}>
|
|
Document Information
|
|
</Typography>
|
|
<Box>
|
|
<Typography variant="body2" color="text.secondary">
|
|
<strong>Original Filename:</strong>
|
|
</Typography>
|
|
<Typography variant="body2" sx={{ mb: 2 }}>
|
|
{selectedDocument.original_filename}
|
|
</Typography>
|
|
|
|
<Typography variant="body2" color="text.secondary">
|
|
<strong>File Size:</strong>
|
|
</Typography>
|
|
<Typography variant="body2" sx={{ mb: 2 }}>
|
|
{formatFileSize(selectedDocument.file_size)}
|
|
</Typography>
|
|
|
|
<Typography variant="body2" color="text.secondary">
|
|
<strong>MIME Type:</strong>
|
|
</Typography>
|
|
<Typography variant="body2" sx={{ mb: 2 }}>
|
|
{selectedDocument.mime_type}
|
|
</Typography>
|
|
|
|
<Typography variant="body2" color="text.secondary">
|
|
<strong>Failure Category:</strong>
|
|
</Typography>
|
|
<Chip
|
|
label={selectedDocument.failure_category}
|
|
color={getFailureCategoryColor(selectedDocument.failure_category)}
|
|
sx={{ mb: 2 }}
|
|
/>
|
|
|
|
<Typography variant="body2" color="text.secondary" sx={{ mt: 2 }}>
|
|
<strong>Retry Count:</strong>
|
|
</Typography>
|
|
<Typography variant="body2" sx={{ mb: 2 }}>
|
|
{selectedDocument.retry_count} attempts
|
|
</Typography>
|
|
|
|
<Typography variant="body2" color="text.secondary">
|
|
<strong>Created:</strong>
|
|
</Typography>
|
|
<Typography variant="body2" sx={{ mb: 2 }}>
|
|
{format(new Date(selectedDocument.created_at), 'PPpp')}
|
|
</Typography>
|
|
|
|
<Typography variant="body2" color="text.secondary">
|
|
<strong>Last Updated:</strong>
|
|
</Typography>
|
|
<Typography variant="body2">
|
|
{format(new Date(selectedDocument.updated_at), 'PPpp')}
|
|
</Typography>
|
|
|
|
<Typography variant="body2" color="text.secondary" sx={{ mt: 2 }}>
|
|
<strong>Tags:</strong>
|
|
</Typography>
|
|
<Box sx={{ mb: 2 }}>
|
|
{selectedDocument.tags.length > 0 ? (
|
|
selectedDocument.tags.map((tag) => (
|
|
<Chip key={tag} label={tag} size="small" sx={{ mr: 1, mb: 1 }} />
|
|
))
|
|
) : (
|
|
<Typography variant="body2" color="text.secondary">No tags</Typography>
|
|
)}
|
|
</Box>
|
|
</Box>
|
|
</Grid>
|
|
|
|
{/* Error Details Section */}
|
|
<Grid item xs={12}>
|
|
<Divider sx={{ my: 2 }} />
|
|
<Typography variant="h6" sx={{ mb: 2 }}>
|
|
Error Details
|
|
</Typography>
|
|
<Typography variant="body2" color="text.secondary" sx={{ mb: 1 }}>
|
|
<strong>Full Error Message:</strong>
|
|
</Typography>
|
|
<Paper sx={{ p: 2, bgcolor: 'grey.50' }}>
|
|
<Typography
|
|
variant="body2"
|
|
sx={{
|
|
fontFamily: 'monospace',
|
|
fontSize: '0.875rem',
|
|
wordBreak: 'break-word',
|
|
whiteSpace: 'pre-wrap'
|
|
}}
|
|
>
|
|
{selectedDocument.ocr_error || 'No error message available'}
|
|
</Typography>
|
|
</Paper>
|
|
</Grid>
|
|
</Grid>
|
|
)}
|
|
</DialogContent>
|
|
<DialogActions>
|
|
<Button
|
|
onClick={() => {
|
|
if (selectedDocument) {
|
|
navigate(`/documents/${selectedDocument.id}`);
|
|
}
|
|
}}
|
|
startIcon={<OpenInNewIcon />}
|
|
color="primary"
|
|
>
|
|
Open Document Details
|
|
</Button>
|
|
{selectedDocument?.can_retry && (
|
|
<Button
|
|
onClick={() => {
|
|
setDetailsOpen(false);
|
|
if (selectedDocument) {
|
|
handleRetryOcr(selectedDocument);
|
|
}
|
|
}}
|
|
startIcon={<RefreshIcon />}
|
|
disabled={retrying === selectedDocument?.id}
|
|
>
|
|
Retry OCR
|
|
</Button>
|
|
)}
|
|
<Button onClick={() => setDetailsOpen(false)}>Close</Button>
|
|
</DialogActions>
|
|
</Dialog>
|
|
|
|
{/* Success/Error Snackbar */}
|
|
<Snackbar
|
|
open={snackbar.open}
|
|
autoHideDuration={6000}
|
|
onClose={() => setSnackbar(prev => ({ ...prev, open: false }))}
|
|
>
|
|
<Alert
|
|
onClose={() => setSnackbar(prev => ({ ...prev, open: false }))}
|
|
severity={snackbar.severity}
|
|
sx={{ width: '100%' }}
|
|
>
|
|
{snackbar.message}
|
|
</Alert>
|
|
</Snackbar>
|
|
</Box>
|
|
);
|
|
};
|
|
|
|
export default FailedOcrPage; |