import React, { useState, useEffect } from 'react'; import { useNavigate } from 'react-router-dom'; import { Box, Typography, Card, CardContent, Button, Chip, Alert, AlertTitle, Table, TableBody, TableCell, TableContainer, TableHead, TableRow, Paper, Dialog, DialogTitle, DialogContent, DialogContentText, DialogActions, Pagination, CircularProgress, Tooltip, IconButton, Collapse, LinearProgress, Snackbar, Tabs, Tab, TextField, MenuItem, useTheme, Divider, InputAdornment, Checkbox, } from '@mui/material'; import Grid from '@mui/material/GridLegacy'; import { Refresh as RefreshIcon, Error as ErrorIcon, Info as InfoIcon, ExpandMore as ExpandMoreIcon, ExpandLess as ExpandLessIcon, Schedule as ScheduleIcon, Visibility as VisibilityIcon, Download as DownloadIcon, FileCopy as FileCopyIcon, Delete as DeleteIcon, FindInPage as FindInPageIcon, OpenInNew as OpenInNewIcon, Warning as WarningIcon, Block as BlockIcon, History as HistoryIcon, } from '@mui/icons-material'; import { format } from 'date-fns'; import { api, documentService, queueService, BulkOcrRetryResponse } from '../services/api'; import DocumentViewer from '../components/DocumentViewer'; import FailedDocumentViewer from '../components/FailedDocumentViewer'; import MetadataDisplay from '../components/MetadataDisplay'; import { BulkRetryModal } from '../components/BulkRetryModal'; import { RetryRecommendations } from '../components/RetryRecommendations'; import { RetryHistoryModal } from '../components/RetryHistoryModal'; interface FailedDocument { id: string; filename: string; original_filename: string; file_size: number; mime_type: string; created_at: string; updated_at: string; tags: string[]; ocr_status: string; ocr_error: string; ocr_failure_reason: string; ocr_completed_at?: string; retry_count: number; last_attempt_at?: string; can_retry: boolean; failure_category: string; ocr_confidence?: number; ocr_word_count?: number; failure_reason: string; error_message?: string; original_created_at?: string; original_modified_at?: string; source_metadata?: any; } interface FailureCategory { reason: string; display_name: string; count: number; } interface FailedOcrResponse { documents: FailedDocument[]; pagination: { total: number; limit: number; offset: number; total_pages: number; }; statistics: { total_failed: number; by_reason: Record; by_stage: Record; }; } interface RetryResponse { success: boolean; message: string; queue_id?: string; estimated_wait_minutes?: number; } interface DuplicateDocument { id: string; filename: string; original_filename: string; file_size: number; mime_type: string; created_at: string; user_id: string; } interface DuplicateGroup { file_hash: string; duplicate_count: number; first_uploaded: string; last_uploaded: string; documents: DuplicateDocument[]; } interface DuplicatesResponse { duplicates: DuplicateGroup[]; pagination: { total: number; limit: number; offset: number; has_more: boolean; }; statistics: { total_duplicate_groups: number; }; } interface IgnoredFile { id: string; file_hash: string; filename: string; original_filename: string; file_path: string; file_size: number; mime_type: string; source_type?: string; source_path?: string; source_identifier?: string; ignored_at: string; ignored_by: string; ignored_by_username?: string; reason?: string; created_at: string; } interface IgnoredFilesStats { total_ignored_files: number; by_source_type: Array<{ source_type?: string; count: number; total_size_bytes: number; }>; total_size_bytes: number; most_recent_ignored_at?: string; } const DocumentManagementPage: React.FC = () => { const theme = useTheme(); const navigate = useNavigate(); const [currentTab, setCurrentTab] = useState(0); const [documents, setDocuments] = useState([]); const [duplicates, setDuplicates] = useState([]); const [loading, setLoading] = useState(true); const [duplicatesLoading, setDuplicatesLoading] = useState(false); const [failedDocumentsFilters, setFailedDocumentsFilters] = useState<{ stage?: string; reason?: string }>({}); const [selectedFailedDocument, setSelectedFailedDocument] = useState(null); const [retrying, setRetrying] = useState(null); const [retryingAll, setRetryingAll] = useState(false); const [statistics, setStatistics] = useState(null); const [duplicateStatistics, setDuplicateStatistics] = useState(null); const [pagination, setPagination] = useState({ page: 1, limit: 25 }); const [duplicatesPagination, setDuplicatesPagination] = useState({ page: 1, limit: 25 }); const [totalPages, setTotalPages] = useState(0); const [duplicatesTotalPages, setDuplicatesTotalPages] = useState(0); const [selectedDocument, setSelectedDocument] = useState(null); const [detailsOpen, setDetailsOpen] = useState(false); const [expandedRows, setExpandedRows] = useState>(new Set()); const [expandedDuplicateGroups, setExpandedDuplicateGroups] = useState>(new Set()); const [snackbar, setSnackbar] = useState<{ open: boolean; message: string; severity: 'success' | 'error' | 'info' | 'warning' }>({ open: false, message: '', severity: 'success' }); // Low confidence documents state const [confidenceThreshold, setConfidenceThreshold] = useState(30); const [lowConfidenceLoading, setLowConfidenceLoading] = useState(false); const [previewData, setPreviewData] = useState(null); const [confirmDeleteOpen, setConfirmDeleteOpen] = useState(false); // Failed documents deletion state const [failedDocsLoading, setFailedDocsLoading] = useState(false); const [failedPreviewData, setFailedPreviewData] = useState(null); const [confirmDeleteFailedOpen, setConfirmDeleteFailedOpen] = useState(false); // Ignored files state const [ignoredFiles, setIgnoredFiles] = useState([]); const [ignoredFilesStats, setIgnoredFilesStats] = useState(null); const [ignoredFilesLoading, setIgnoredFilesLoading] = useState(false); const [ignoredFilesPagination, setIgnoredFilesPagination] = useState({ page: 1, limit: 25 }); const [ignoredFilesTotalPages, setIgnoredFilesTotalPages] = useState(0); const [ignoredFilesSearchTerm, setIgnoredFilesSearchTerm] = useState(''); const [ignoredFilesSourceTypeFilter, setIgnoredFilesSourceTypeFilter] = useState(''); const [selectedIgnoredFiles, setSelectedIgnoredFiles] = useState>(new Set()); const [bulkDeleteIgnoredDialog, setBulkDeleteIgnoredDialog] = useState(false); const [deletingIgnoredFiles, setDeletingIgnoredFiles] = useState(false); // Advanced retry functionality state const [bulkRetryModalOpen, setBulkRetryModalOpen] = useState(false); const [retryHistoryModalOpen, setRetryHistoryModalOpen] = useState(false); const [selectedDocumentForHistory, setSelectedDocumentForHistory] = useState(null); const [selectedDocumentIds, setSelectedDocumentIds] = useState([]); const [confirmRetryAllOpen, setConfirmRetryAllOpen] = useState(false); const fetchFailedDocuments = async () => { try { setLoading(true); const offset = (pagination.page - 1) * pagination.limit; // Use the comprehensive API that supports filtering const response = await documentService.getFailedDocuments( pagination.limit, offset, failedDocumentsFilters.stage, failedDocumentsFilters.reason ); if (response?.data) { setDocuments(response.data.documents || []); setStatistics(response.data.statistics || null); if (response.data.pagination) { setTotalPages(Math.ceil(response.data.pagination.total / pagination.limit)); } } } catch (error) { console.error('Failed to fetch failed documents:', error); setSnackbar({ open: true, message: 'Failed to load failed documents', severity: 'error' }); } finally { setLoading(false); } }; const fetchDuplicates = async () => { try { setDuplicatesLoading(true); const offset = (duplicatesPagination.page - 1) * duplicatesPagination.limit; const response = await documentService.getDuplicates(duplicatesPagination.limit, offset); if (response?.data) { setDuplicates(response.data.duplicates || []); setDuplicateStatistics(response.data.statistics || null); if (response.data.pagination) { setDuplicatesTotalPages(Math.ceil(response.data.pagination.total / duplicatesPagination.limit)); } } } catch (error) { console.error('Failed to fetch duplicates:', error); setSnackbar({ open: true, message: 'Failed to load duplicate documents', severity: 'error' }); } finally { setDuplicatesLoading(false); } }; useEffect(() => { fetchFailedDocuments(); // Also fetch ignored files stats for the tab label fetchIgnoredFilesStats(); }, [pagination.page, failedDocumentsFilters]); useEffect(() => { if (currentTab === 2) { fetchDuplicates(); } else if (currentTab === 4) { fetchIgnoredFiles(); } }, [currentTab, duplicatesPagination.page, ignoredFilesPagination.page, ignoredFilesSearchTerm, ignoredFilesSourceTypeFilter]); const getFailureReasonColor = (reason: string): "error" | "warning" | "info" | "default" => { switch (reason) { case 'low_ocr_confidence': case 'ocr_timeout': case 'ocr_memory_limit': case 'pdf_parsing_error': return 'error'; case 'duplicate_content': case 'unsupported_format': case 'file_too_large': return 'warning'; case 'file_corrupted': case 'access_denied': case 'permission_denied': return 'error'; default: return 'default'; } }; const handleRetryOcr = async (document: FailedDocument) => { try { setRetrying(document.id); const response = await documentService.retryOcr(document.id); if (response.data.success) { setSnackbar({ open: true, message: `OCR retry queued for "${document.filename}". Estimated wait time: ${response.data.estimated_wait_minutes || 'Unknown'} minutes.`, severity: 'success' }); // Refresh the list to update retry counts and status await fetchFailedDocuments(); } else { setSnackbar({ open: true, message: response.data.message || 'Failed to retry OCR', severity: 'error' }); } } catch (error) { console.error('Failed to retry OCR:', error); setSnackbar({ open: true, message: 'Failed to retry OCR processing', severity: 'error' }); } finally { setRetrying(null); } }; const handleRetryAllDocuments = async () => { try { setRetryingAll(true); const response = await documentService.bulkRetryOcr({ mode: 'all', preview_only: false }); if (response.data.queued_count > 0) { setSnackbar({ open: true, message: `Successfully queued ${response.data.queued_count} documents for OCR retry. Estimated processing time: ${Math.ceil(response.data.estimated_total_time_minutes)} minutes.`, severity: 'success' }); // Refresh all tabs since we're retrying all documents await refreshCurrentTab(); } else { setSnackbar({ open: true, message: 'No documents found to retry', severity: 'info' }); } } catch (error) { console.error('Error retrying all documents:', error); setSnackbar({ open: true, message: 'Failed to retry documents. Please try again.', severity: 'error' }); } finally { setRetryingAll(false); } }; const handleRetryAllFailed = async () => { try { setRetryingAll(true); const response = await queueService.requeueFailed(); if (response.data.requeued_count > 0) { setSnackbar({ open: true, message: `Successfully queued ${response.data.requeued_count} failed documents for OCR retry. Check the queue stats for progress.`, severity: 'success' }); // Refresh the list to update status await fetchFailedDocuments(); } else { setSnackbar({ open: true, message: 'No failed documents found to retry', severity: 'info' }); } } catch (error) { console.error('Failed to retry all failed OCR:', error); setSnackbar({ open: true, message: 'Failed to retry all failed OCR documents', severity: 'error' }); } finally { setRetryingAll(false); } }; // Advanced retry functionality handlers const handleBulkRetrySuccess = (result: BulkOcrRetryResponse) => { setSnackbar({ open: true, message: `Successfully queued ${result.queued_count} of ${result.matched_count} documents for retry. Estimated processing time: ${Math.round(result.estimated_total_time_minutes)} minutes.`, severity: 'success' }); fetchFailedDocuments(); // Refresh the list }; const handleShowRetryHistory = (documentId: string) => { setSelectedDocumentForHistory(documentId); setRetryHistoryModalOpen(true); }; const formatFileSize = (bytes: number): string => { if (bytes === 0) return '0 B'; const k = 1024; const sizes = ['B', 'KB', 'MB', 'GB']; const i = Math.floor(Math.log(bytes) / Math.log(k)); return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i]; }; const getFailureCategoryColor = (category: string): "error" | "warning" | "info" | "default" => { switch (category) { case 'PDF Font Issues': case 'PDF Corruption': case 'PDF Parsing Error': return 'warning'; case 'Timeout': case 'Memory Limit': return 'error'; case 'Low OCR Confidence': return 'warning'; case 'Unknown Error': return 'info'; default: return 'default'; } }; const toggleRowExpansion = (documentId: string) => { const newExpanded = new Set(expandedRows); if (newExpanded.has(documentId)) { newExpanded.delete(documentId); } else { newExpanded.add(documentId); } setExpandedRows(newExpanded); }; const showDocumentDetails = (document: FailedDocument) => { setSelectedDocument(document); setDetailsOpen(true); }; // Ignored Files functions const fetchIgnoredFiles = async () => { try { setIgnoredFilesLoading(true); const offset = (ignoredFilesPagination.page - 1) * ignoredFilesPagination.limit; const params = new URLSearchParams({ limit: ignoredFilesPagination.limit.toString(), offset: offset.toString(), }); if (ignoredFilesSearchTerm) { params.append('filename', ignoredFilesSearchTerm); } if (ignoredFilesSourceTypeFilter) { params.append('source_type', ignoredFilesSourceTypeFilter); } const response = await api.get(`/ignored-files?${params}`); if (response?.data) { setIgnoredFiles(response.data.ignored_files || []); setIgnoredFilesTotalPages(Math.ceil(response.data.total / ignoredFilesPagination.limit)); } } catch (error) { console.error('Failed to fetch ignored files:', error); setSnackbar({ open: true, message: 'Failed to load ignored files', severity: 'error' }); } finally { setIgnoredFilesLoading(false); } }; const fetchIgnoredFilesStats = async () => { try { const response = await api.get('/ignored-files/stats'); if (response?.data) { setIgnoredFilesStats(response.data); } } catch (error) { console.error('Failed to fetch ignored files stats:', error); } }; const handleIgnoredFileSelect = (fileId: string) => { const newSelected = new Set(selectedIgnoredFiles); if (newSelected.has(fileId)) { newSelected.delete(fileId); } else { newSelected.add(fileId); } setSelectedIgnoredFiles(newSelected); }; const handleIgnoredFilesSelectAll = () => { if (selectedIgnoredFiles.size === ignoredFiles.length) { setSelectedIgnoredFiles(new Set()); } else { setSelectedIgnoredFiles(new Set(ignoredFiles.map(file => file.id))); } }; const handleDeleteSelectedIgnoredFiles = async () => { if (selectedIgnoredFiles.size === 0) return; setDeletingIgnoredFiles(true); try { const response = await api.delete('/ignored-files/bulk-delete', { data: { ignored_file_ids: Array.from(selectedIgnoredFiles) } }); setSnackbar({ open: true, message: response.data.message || 'Files removed from ignored list', severity: 'success' }); setSelectedIgnoredFiles(new Set()); setBulkDeleteIgnoredDialog(false); fetchIgnoredFiles(); fetchIgnoredFilesStats(); } catch (error: any) { setSnackbar({ open: true, message: error.response?.data?.message || 'Failed to delete ignored files', severity: 'error' }); } finally { setDeletingIgnoredFiles(false); } }; const handleDeleteSingleIgnoredFile = async (fileId: string) => { try { const response = await api.delete(`/ignored-files/${fileId}`); setSnackbar({ open: true, message: response.data.message || 'File removed from ignored list', severity: 'success' }); fetchIgnoredFiles(); fetchIgnoredFilesStats(); } catch (error: any) { setSnackbar({ open: true, message: error.response?.data?.message || 'Failed to delete ignored file', severity: 'error' }); } }; const getSourceIcon = (sourceType?: string) => { switch (sourceType) { case 'webdav': return ; case 'local_folder': return ; case 's3': return ; default: return ; } }; const getSourceTypeDisplay = (sourceType?: string) => { switch (sourceType) { case 'webdav': return 'WebDAV'; case 'local_folder': return 'Local Folder'; case 's3': return 'S3'; default: return sourceType || 'Unknown'; } }; const toggleDuplicateGroupExpansion = (groupHash: string) => { const newExpanded = new Set(expandedDuplicateGroups); if (newExpanded.has(groupHash)) { newExpanded.delete(groupHash); } else { newExpanded.add(groupHash); } setExpandedDuplicateGroups(newExpanded); }; const handleTabChange = (event: React.SyntheticEvent, newValue: number) => { setCurrentTab(newValue); }; const refreshCurrentTab = () => { if (currentTab === 0) { fetchFailedDocuments(); } else if (currentTab === 1) { // Refresh both low confidence and failed documents for the merged cleanup tab handlePreviewLowConfidence(); handlePreviewFailedDocuments(); } else if (currentTab === 2) { fetchDuplicates(); } else if (currentTab === 3) { fetchIgnoredFiles(); fetchIgnoredFilesStats(); } }; // Low confidence document handlers const handlePreviewLowConfidence = async () => { try { setLowConfidenceLoading(true); const response = await documentService.deleteLowConfidence(confidenceThreshold, true); setPreviewData(response.data); setSnackbar({ open: true, message: response.data.message, severity: 'info' }); } catch (error) { setSnackbar({ open: true, message: 'Failed to preview low confidence documents', severity: 'error' }); } finally { setLowConfidenceLoading(false); } }; const handleDeleteLowConfidence = async () => { if (!previewData || previewData.matched_count === 0) { setSnackbar({ open: true, message: 'No documents to delete', severity: 'warning' }); return; } try { setLowConfidenceLoading(true); const response = await documentService.deleteLowConfidence(confidenceThreshold, false); setSnackbar({ open: true, message: response.data.message, severity: 'success' }); setPreviewData(null); setConfirmDeleteOpen(false); // Refresh other tabs if they have data affected if (currentTab === 0) { fetchFailedDocuments(); } } catch (error) { setSnackbar({ open: true, message: 'Failed to delete low confidence documents', severity: 'error' }); } finally { setLowConfidenceLoading(false); } }; // Failed documents handlers const handlePreviewFailedDocuments = async () => { try { setFailedDocsLoading(true); const response = await documentService.deleteFailedOcr(true); setFailedPreviewData(response.data); } catch (error) { setSnackbar({ open: true, message: 'Failed to preview failed documents', severity: 'error' }); } finally { setFailedDocsLoading(false); } }; const handleDeleteFailedDocuments = async () => { try { setFailedDocsLoading(true); const response = await documentService.deleteFailedOcr(false); setSnackbar({ open: true, message: response.data.message, severity: 'success' }); setFailedPreviewData(null); setConfirmDeleteFailedOpen(false); // Refresh failed OCR tab if currently viewing it if (currentTab === 0) { fetchFailedDocuments(); } } catch (error) { setSnackbar({ open: true, message: 'Failed to delete failed documents', severity: 'error' }); } finally { setFailedDocsLoading(false); } }; if (loading && (!documents || documents.length === 0)) { return ( ); } return ( Document Management } label={`Failed Documents${statistics ? ` (${statistics.total_failed})` : ''}`} iconPosition="start" /> } label={`Document Cleanup${(previewData?.matched_count || 0) + (failedPreviewData?.matched_count || 0) > 0 ? ` (${(previewData?.matched_count || 0) + (failedPreviewData?.matched_count || 0)})` : ''}`} iconPosition="start" /> } label={`Duplicate Files${duplicateStatistics ? ` (${duplicateStatistics.total_duplicate_groups})` : ''}`} iconPosition="start" /> } label={`Ignored Files${ignoredFilesStats ? ` (${ignoredFilesStats.total_ignored_files})` : ''}`} iconPosition="start" /> {/* Failed OCR Tab Content */} {currentTab === 0 && ( <> {/* Statistics Overview */} {statistics && ( Total Failed {statistics.total_failed} Failure Categories {statistics?.by_reason ? Object.entries(statistics.by_reason).map(([reason, count]) => ( )) : ( No failure data available )} )} {/* Advanced Retry Components */} Advanced Retry Options Use advanced filtering and selection options to retry specific subsets of failed documents based on file type, failure reason, size, and more. {/* Filter Controls */} Filter Options setFailedDocumentsFilters(prev => ({ ...prev, stage: e.target.value || undefined }))} fullWidth > All Stages OCR Processing Document Ingestion Validation File Storage Processing Synchronization setFailedDocumentsFilters(prev => ({ ...prev, reason: e.target.value || undefined }))} fullWidth > All Reasons Duplicate Content Low OCR Confidence Unsupported Format File Too Large File Corrupted OCR Timeout PDF Parsing Error Other {(!documents || documents.length === 0) ? ( Great news! No documents have failed OCR processing. All your documents are processing successfully. ) : ( <> Failed Documents Overview These documents failed at various stages of processing: ingestion, validation, OCR, storage, etc. Use the filters above to narrow down by failure stage or specific reason. You can retry processing for recoverable failures. Document Failure Type Retry Count Last Failed Actions {(documents || []).map((document) => ( toggleRowExpansion(document.id)} > {expandedRows.has(document.id) ? : } {document.filename} {formatFileSize(document.file_size)} • {document.mime_type} {document.retry_count} attempts {document.updated_at ? format(new Date(document.updated_at), 'MMM dd, yyyy HH:mm') : 'Unknown'} handleRetryOcr(document)} disabled={retrying === document.id || !document.can_retry} > {retrying === document.id ? ( ) : ( )} showDocumentDetails(document)} > handleShowRetryHistory(document.id)} > { try { await documentService.downloadFile(document.id, document.original_filename || document.filename); } catch (error) { console.error('Download failed:', error); } }} > theme.palette.mode === 'dark' ? 'grey.900' : 'grey.50', borderRadius: 1 }}> Error Details Failure Reason: {document.failure_reason || document.ocr_failure_reason || 'Not specified'} {/* Show OCR confidence and word count for low confidence failures */} {(document.failure_reason === 'low_ocr_confidence' || document.ocr_failure_reason === 'low_ocr_confidence') && ( <> OCR Results: {document.ocr_confidence !== undefined && document.ocr_confidence !== null && ( } label={`${document.ocr_confidence.toFixed(1)}% confidence`} color="warning" variant="outlined" /> )} {document.ocr_word_count !== undefined && ( } label={`${document.ocr_word_count} words found`} color="info" variant="outlined" /> )} )} Error Message: theme.palette.mode === 'dark' ? 'grey.800' : 'grey.100', p: 1, borderRadius: 1, fontSize: '0.75rem', wordBreak: 'break-word' }} > {document.error_message || document.ocr_error || 'No error message available'} Last Attempt: {document.last_attempt_at ? format(new Date(document.last_attempt_at), 'PPpp') : 'No previous attempts'} File Created: {format(new Date(document.created_at), 'PPpp')} ))}
{/* Pagination */} {totalPages > 1 && ( setPagination(prev => ({ ...prev, page }))} color="primary" /> )} )} )} {/* Duplicate Files Tab Content */} {currentTab === 2 && ( <> {/* Duplicate Statistics Overview */} {duplicateStatistics && ( Total Duplicate Groups {duplicateStatistics.total_duplicate_groups} )} {duplicatesLoading ? ( ) : duplicates.length === 0 ? ( No duplicates found! You don't have any duplicate documents. All your files have unique content. ) : ( <> Duplicate Documents Found These documents have identical content but may have different filenames. You can expand each group to see all files with the same content and choose which ones to keep. What should you do?
  • Review each group: Click to expand and see all duplicate files
  • Keep the best version: Choose the file with the most descriptive name
  • Check content: Use View/Download to verify files are truly identical
  • Note for admin: Consider implementing bulk delete functionality for duplicates
  • Content Hash Duplicate Count First Uploaded Last Uploaded Actions {duplicates.map((group) => ( toggleDuplicateGroupExpansion(group.file_hash)} > {expandedDuplicateGroups.has(group.file_hash) ? : } {group.file_hash.substring(0, 16)}... {format(new Date(group.first_uploaded), 'MMM dd, yyyy')} {format(new Date(group.last_uploaded), 'MMM dd, yyyy')} View files below Duplicate Files ({group.duplicate_count} total) Storage Impact: These {group.duplicate_count} files contain identical content. Consider keeping only the best-named version to save space. {group.documents.map((doc, index) => ( {doc.filename} {index === 0 && ( )} {doc.original_filename !== doc.filename && ( Original: {doc.original_filename} )} {formatFileSize(doc.file_size)} • {doc.mime_type} Uploaded: {format(new Date(doc.created_at), 'MMM dd, yyyy HH:mm')} window.open(`/api/documents/${doc.id}/view`, '_blank')} sx={{ color: theme.palette.primary.main }} > { try { await documentService.downloadFile(doc.id, doc.original_filename || doc.filename); } catch (error) { console.error('Download failed:', error); } }} sx={{ color: theme.palette.secondary.main }} > ))} ))}
    {/* Duplicates Pagination */} {duplicatesTotalPages > 1 && ( setDuplicatesPagination(prev => ({ ...prev, page }))} color="primary" /> )} )} )} {/* Document Cleanup Tab Content - Merged Low Quality Manager and Bulk Cleanup */} {currentTab === 1 && ( <> Document Cleanup Center Clean up your document library by removing problematic documents. You can delete:
  • Documents with low OCR confidence scores (below a threshold you set)
  • Documents where OCR processing failed completely
  • Always use the preview feature before deleting to see which documents will be affected.
    {/* Low Confidence Documents Section */} Low Confidence Documents setConfidenceThreshold(Math.max(0, Math.min(100, Number(e.target.value))))} fullWidth inputProps={{ min: 0, max: 100, step: 1 }} helperText="Documents with confidence below this value will be deleted" /> {/* Preview Results */} {previewData && ( Preview Results 0 ? 'warning.main' : 'success.main'}> {previewData.message} {previewData.matched_count > 0 && previewData.documents && ( Documents that would be deleted: Filename Size OCR Confidence Status Date {previewData.documents.slice(0, 20).map((doc: any) => ( {doc.original_filename || doc.filename} {formatFileSize(doc.file_size)} {doc.ocr_confidence ? `${doc.ocr_confidence.toFixed(1)}%` : 'N/A'} {new Date(doc.created_at).toLocaleDateString()} ))}
    {previewData.documents.length > 20 && ( ... and {previewData.documents.length - 20} more documents )}
    )}
    )} {/* Loading State */} {lowConfidenceLoading && !previewData && ( Processing request... )} {/* Divider between sections */} {/* Failed Documents Section */} Failed OCR Documents Delete Failed OCR Documents This section allows you to delete all documents where OCR processing failed completely. This includes documents with NULL confidence values or explicit failure status. {/* Preview Results for Failed Documents */} {failedPreviewData && ( Preview Results 0 ? 'error.main' : 'success.main'}> {failedPreviewData.message} {failedPreviewData.matched_count > 0 && ( Document IDs that would be deleted: {failedPreviewData.document_ids.slice(0, 10).join(', ')} {failedPreviewData.document_ids.length > 10 && ` ... and ${failedPreviewData.document_ids.length - 10} more`} )} )} {/* Loading State for Failed Documents */} {failedDocsLoading && !failedPreviewData && ( Processing request... )} )} {/* Ignored Files Tab Content */} {currentTab === 3 && ( <> Ignored Files Management Files that have been marked as ignored during sync operations from various sources. You can remove files from the ignored list to allow them to be synced again. {/* Statistics Cards */} {ignoredFilesStats && ( Total Ignored {ignoredFilesStats.total_ignored_files} Total Size {formatFileSize(ignoredFilesStats.total_size_bytes)} {ignoredFilesStats.most_recent_ignored_at && ( Most Recent {format(new Date(ignoredFilesStats.most_recent_ignored_at), 'MMM dd, yyyy')} )} )} {/* Filters and Search */} { setIgnoredFilesSearchTerm(e.target.value); setIgnoredFilesPagination(prev => ({ ...prev, page: 1 })); }} InputProps={{ startAdornment: ( ), }} sx={{ flexGrow: 1, minWidth: '200px' }} /> { setIgnoredFilesSourceTypeFilter(e.target.value); setIgnoredFilesPagination(prev => ({ ...prev, page: 1 })); }} sx={{ minWidth: '150px' }} > All Sources WebDAV Local Folder S3 {/* Bulk Actions */} {selectedIgnoredFiles.size > 0 && ( {selectedIgnoredFiles.size} file{selectedIgnoredFiles.size !== 1 ? 's' : ''} selected )} {ignoredFilesLoading ? ( ) : ignoredFiles.length === 0 ? ( No ignored files found! You don't have any files in the ignored list. All your files are being processed normally. ) : ( <> 0 && selectedIgnoredFiles.size < ignoredFiles.length} checked={ignoredFiles.length > 0 && selectedIgnoredFiles.size === ignoredFiles.length} onChange={handleIgnoredFilesSelectAll} /> Filename Source Size Ignored Date Reason Actions {ignoredFiles.map((file) => ( handleIgnoredFileSelect(file.id)} /> {file.filename} {file.filename !== file.original_filename && ( Original: {file.original_filename} )} {file.mime_type} {getSourceIcon(file.source_type)} {getSourceTypeDisplay(file.source_type)} {file.source_path && ( {file.source_path} )} {formatFileSize(file.file_size)} {format(new Date(file.ignored_at), 'MMM dd, yyyy')} {format(new Date(file.ignored_at), 'HH:mm')} {file.reason || 'No reason provided'} handleDeleteSingleIgnoredFile(file.id)} color="success" > ))}
    {/* Pagination */} {ignoredFilesTotalPages > 1 && ( setIgnoredFilesPagination(prev => ({ ...prev, page }))} color="primary" /> )} )} )} {/* Confirmation Dialog */} setConfirmDeleteOpen(false)} maxWidth="sm" fullWidth > Confirm Low Confidence Document Deletion Are you sure you want to delete {previewData?.matched_count || 0} documents with OCR confidence below {confidenceThreshold}%? This action cannot be undone. The documents and their files will be permanently deleted. {/* Confirmation Dialog for Failed Documents */} setConfirmDeleteFailedOpen(false)} maxWidth="sm" fullWidth > Confirm Failed Document Deletion Are you sure you want to delete {failedPreviewData?.matched_count || 0} documents with failed OCR processing? This action cannot be undone. The documents and their files will be permanently deleted. {/* Document Details Dialog */} setDetailsOpen(false)} maxWidth="lg" fullWidth > Document Details: {selectedDocument?.filename} {selectedDocument && ( {/* File Preview Section */} File Preview { if (selectedDocument) { navigate(`/documents/${selectedDocument.id}`); } }} sx={{ cursor: 'pointer', border: '2px dashed', borderColor: 'primary.main', borderRadius: 2, p: 1, transition: 'all 0.2s ease-in-out', '&:hover': { borderColor: 'primary.dark', boxShadow: 2, }, }} > Click to open full document details page {/* Document Information Section */} Document Information Original Filename: {selectedDocument.original_filename} File Size: {formatFileSize(selectedDocument.file_size)} MIME Type: {selectedDocument.mime_type} Failure Category: {/* Source Metadata Section */} {selectedDocument.original_created_at && ( <> Original Created: {format(new Date(selectedDocument.original_created_at), 'PPpp')} )} {selectedDocument.original_modified_at && ( <> Original Modified: {format(new Date(selectedDocument.original_modified_at), 'PPpp')} )} {selectedDocument.source_metadata && Object.keys(selectedDocument.source_metadata).length > 0 && ( )} Retry Count: {selectedDocument.retry_count} attempts Created: {format(new Date(selectedDocument.created_at), 'PPpp')} Last Updated: {format(new Date(selectedDocument.updated_at), 'PPpp')} Tags: {selectedDocument.tags.length > 0 ? ( selectedDocument.tags.map((tag) => ( )) ) : ( No tags )} {/* Error Details Section */} Error Details Full Error Message: theme.palette.mode === 'dark' ? 'grey.800' : 'grey.50', borderRadius: 1 }}> {selectedDocument.error_message || selectedDocument.ocr_error || 'No error message available'} )} {selectedDocument?.can_retry && ( )} {/* Bulk Delete Ignored Files Confirmation Dialog */} setBulkDeleteIgnoredDialog(false)}> Confirm Bulk Delete Are you sure you want to remove {selectedIgnoredFiles.size} file{selectedIgnoredFiles.size !== 1 ? 's' : ''} from the ignored list? These files will be eligible for syncing again if encountered from their sources. This action allows them to be re-imported during future syncs. {/* Confirm Retry All Documents Dialog */} setConfirmRetryAllOpen(false)}> Retry All Documents This will retry OCR processing for all documents in your library, regardless of their current OCR status. This includes documents that have already been successfully processed. Note: This is a resource-intensive operation that may take a significant amount of time depending on the number of documents. {/* Advanced Retry Modal */} setBulkRetryModalOpen(false)} onSuccess={handleBulkRetrySuccess} selectedDocumentIds={selectedDocumentIds} /> {/* Retry History Modal */} setRetryHistoryModalOpen(false)} documentId={selectedDocumentForHistory || ''} documentName={selectedDocumentForHistory ? documents.find(d => d.id === selectedDocumentForHistory)?.filename : undefined} /> {/* Success/Error Snackbar */} setSnackbar(prev => ({ ...prev, open: false }))} > setSnackbar(prev => ({ ...prev, open: false }))} severity={snackbar.severity} sx={{ width: '100%' }} > {snackbar.message}
    ); }; export default DocumentManagementPage;