import React, { useState, useEffect } from 'react'; import { useNavigate } from 'react-router-dom'; import { useTranslation } from 'react-i18next'; import { Box, Typography, Card, CardContent, Button, Chip, Alert, AlertTitle, Table, TableBody, TableCell, TableContainer, TableHead, TableRow, Paper, Dialog, DialogTitle, DialogContent, DialogContentText, DialogActions, Pagination, CircularProgress, Tooltip, IconButton, Collapse, LinearProgress, Snackbar, Tabs, Tab, TextField, MenuItem, useTheme, Divider, InputAdornment, Checkbox, } from '@mui/material'; import Grid from '@mui/material/GridLegacy'; import { Refresh as RefreshIcon, Error as ErrorIcon, Info as InfoIcon, ExpandMore as ExpandMoreIcon, ExpandLess as ExpandLessIcon, Schedule as ScheduleIcon, Visibility as VisibilityIcon, Download as DownloadIcon, FileCopy as FileCopyIcon, Delete as DeleteIcon, FindInPage as FindInPageIcon, OpenInNew as OpenInNewIcon, Warning as WarningIcon, Block as BlockIcon, History as HistoryIcon, } from '@mui/icons-material'; import { format } from 'date-fns'; import { api, documentService, queueService, BulkOcrRetryResponse, ErrorHelper, ErrorCodes } from '../services/api'; import DocumentViewer from '../components/DocumentViewer'; import FailedDocumentViewer from '../components/FailedDocumentViewer'; import MetadataDisplay from '../components/MetadataDisplay'; import { BulkRetryModal } from '../components/BulkRetryModal'; import { RetryRecommendations } from '../components/RetryRecommendations'; import { RetryHistoryModal } from '../components/RetryHistoryModal'; interface FailedDocument { id: string; filename: string; original_filename: string; file_size: number; mime_type: string; created_at: string; updated_at: string; tags: string[]; ocr_status: string; ocr_error: string; ocr_failure_reason: string; ocr_completed_at?: string; retry_count: number; last_attempt_at?: string; can_retry: boolean; failure_category: string; ocr_confidence?: number; ocr_word_count?: number; failure_reason: string; error_message?: string; original_created_at?: string; original_modified_at?: string; source_metadata?: any; } interface FailureCategory { reason: string; display_name: string; count: number; } interface FailedOcrResponse { documents: FailedDocument[]; pagination: { total: number; limit: number; offset: number; total_pages: number; }; statistics: { total_failed: number; by_reason: Record; by_stage: Record; }; } interface RetryResponse { success: boolean; message: string; queue_id?: string; estimated_wait_minutes?: number; } interface DuplicateDocument { id: string; filename: string; original_filename: string; file_size: number; mime_type: string; created_at: string; user_id: string; } interface DuplicateGroup { file_hash: string; duplicate_count: number; first_uploaded: string; last_uploaded: string; documents: DuplicateDocument[]; } interface DuplicatesResponse { duplicates: DuplicateGroup[]; pagination: { total: number; limit: number; offset: number; has_more: boolean; }; statistics: { total_duplicate_groups: number; }; } interface IgnoredFile { id: string; file_hash: string; filename: string; original_filename: string; file_path: string; file_size: number; mime_type: string; source_type?: string; source_path?: string; source_identifier?: string; ignored_at: string; ignored_by: string; ignored_by_username?: string; reason?: string; created_at: string; } interface IgnoredFilesStats { total_ignored_files: number; by_source_type: Array<{ source_type?: string; count: number; total_size_bytes: number; }>; total_size_bytes: number; most_recent_ignored_at?: string; } const DocumentManagementPage: React.FC = () => { const { t } = useTranslation(); const theme = useTheme(); const navigate = useNavigate(); const [currentTab, setCurrentTab] = useState(0); const [documents, setDocuments] = useState([]); const [duplicates, setDuplicates] = useState([]); const [loading, setLoading] = useState(true); const [duplicatesLoading, setDuplicatesLoading] = useState(false); const [failedDocumentsFilters, setFailedDocumentsFilters] = useState<{ stage?: string; reason?: string }>({}); const [selectedFailedDocument, setSelectedFailedDocument] = useState(null); const [retrying, setRetrying] = useState(null); const [retryingAll, setRetryingAll] = useState(false); const [statistics, setStatistics] = useState(null); const [duplicateStatistics, setDuplicateStatistics] = useState(null); const [pagination, setPagination] = useState({ page: 1, limit: 25 }); const [duplicatesPagination, setDuplicatesPagination] = useState({ page: 1, limit: 25 }); const [totalPages, setTotalPages] = useState(0); const [duplicatesTotalPages, setDuplicatesTotalPages] = useState(0); const [selectedDocument, setSelectedDocument] = useState(null); const [detailsOpen, setDetailsOpen] = useState(false); const [expandedRows, setExpandedRows] = useState>(new Set()); const [expandedDuplicateGroups, setExpandedDuplicateGroups] = useState>(new Set()); const [snackbar, setSnackbar] = useState<{ open: boolean; message: string; severity: 'success' | 'error' | 'info' | 'warning' }>({ open: false, message: '', severity: 'success' }); // Low confidence documents state const [confidenceThreshold, setConfidenceThreshold] = useState(30); const [lowConfidenceLoading, setLowConfidenceLoading] = useState(false); const [previewData, setPreviewData] = useState(null); const [confirmDeleteOpen, setConfirmDeleteOpen] = useState(false); // Failed documents deletion state const [failedDocsLoading, setFailedDocsLoading] = useState(false); const [failedPreviewData, setFailedPreviewData] = useState(null); const [confirmDeleteFailedOpen, setConfirmDeleteFailedOpen] = useState(false); // Ignored files state const [ignoredFiles, setIgnoredFiles] = useState([]); const [ignoredFilesStats, setIgnoredFilesStats] = useState(null); const [ignoredFilesLoading, setIgnoredFilesLoading] = useState(false); const [ignoredFilesPagination, setIgnoredFilesPagination] = useState({ page: 1, limit: 25 }); const [ignoredFilesTotalPages, setIgnoredFilesTotalPages] = useState(0); const [ignoredFilesSearchTerm, setIgnoredFilesSearchTerm] = useState(''); const [ignoredFilesSourceTypeFilter, setIgnoredFilesSourceTypeFilter] = useState(''); const [selectedIgnoredFiles, setSelectedIgnoredFiles] = useState>(new Set()); const [bulkDeleteIgnoredDialog, setBulkDeleteIgnoredDialog] = useState(false); const [deletingIgnoredFiles, setDeletingIgnoredFiles] = useState(false); // Advanced retry functionality state const [bulkRetryModalOpen, setBulkRetryModalOpen] = useState(false); const [retryHistoryModalOpen, setRetryHistoryModalOpen] = useState(false); const [selectedDocumentForHistory, setSelectedDocumentForHistory] = useState(null); const [selectedDocumentIds, setSelectedDocumentIds] = useState([]); const [confirmRetryAllOpen, setConfirmRetryAllOpen] = useState(false); const fetchFailedDocuments = async () => { try { setLoading(true); const offset = (pagination.page - 1) * pagination.limit; // Use the comprehensive API that supports filtering const response = await documentService.getFailedDocuments( pagination.limit, offset, failedDocumentsFilters.stage, failedDocumentsFilters.reason ); if (response?.data) { setDocuments(response.data.documents || []); setStatistics(response.data.statistics || null); if (response.data.pagination) { setTotalPages(Math.ceil(response.data.pagination.total / pagination.limit)); } } } catch (error) { console.error('Failed to fetch failed documents:', error); const errorInfo = ErrorHelper.formatErrorForDisplay(error, true); let errorMessage = t('documentManagement.errors.loadFailedDocuments'); // Handle specific document management errors if (ErrorHelper.isErrorCode(error, ErrorCodes.USER_SESSION_EXPIRED) || ErrorHelper.isErrorCode(error, ErrorCodes.USER_TOKEN_EXPIRED)) { errorMessage = t('documentManagement.errors.sessionExpired'); } else if (ErrorHelper.isErrorCode(error, ErrorCodes.USER_PERMISSION_DENIED)) { errorMessage = t('documentManagement.errors.permissionDenied'); } else if (ErrorHelper.isErrorCode(error, ErrorCodes.DOCUMENT_NOT_FOUND)) { errorMessage = t('documentManagement.errors.noFailedDocumentsFound'); } else if (errorInfo.category === 'network') { errorMessage = t('documentManagement.errors.networkError'); } else if (errorInfo.category === 'server') { errorMessage = t('documentManagement.errors.serverError'); } else { errorMessage = errorInfo.message || t('documentManagement.errors.loadFailedDocuments'); } setSnackbar({ open: true, message: errorMessage, severity: 'error' }); } finally { setLoading(false); } }; const fetchDuplicates = async () => { try { setDuplicatesLoading(true); const offset = (duplicatesPagination.page - 1) * duplicatesPagination.limit; const response = await documentService.getDuplicates(duplicatesPagination.limit, offset); if (response?.data) { setDuplicates(response.data.duplicates || []); setDuplicateStatistics(response.data.statistics || null); if (response.data.pagination) { setDuplicatesTotalPages(Math.ceil(response.data.pagination.total / duplicatesPagination.limit)); } } } catch (error) { console.error('Failed to fetch duplicates:', error); const errorInfo = ErrorHelper.formatErrorForDisplay(error, true); let errorMessage = t('documentManagement.errors.loadDuplicates'); // Handle specific duplicate fetch errors if (ErrorHelper.isErrorCode(error, ErrorCodes.USER_SESSION_EXPIRED) || ErrorHelper.isErrorCode(error, ErrorCodes.USER_TOKEN_EXPIRED)) { errorMessage = t('documentManagement.errors.sessionExpired'); } else if (ErrorHelper.isErrorCode(error, ErrorCodes.USER_PERMISSION_DENIED)) { errorMessage = t('documentManagement.errors.permissionDeniedDuplicates'); } else if (errorInfo.category === 'network') { errorMessage = t('documentManagement.errors.networkError'); } else if (errorInfo.category === 'server') { errorMessage = t('documentManagement.errors.serverError'); } else { errorMessage = errorInfo.message || t('documentManagement.errors.loadDuplicates'); } setSnackbar({ open: true, message: errorMessage, severity: 'error' }); } finally { setDuplicatesLoading(false); } }; useEffect(() => { fetchFailedDocuments(); // Also fetch ignored files stats for the tab label fetchIgnoredFilesStats(); }, [pagination.page, failedDocumentsFilters]); useEffect(() => { if (currentTab === 2) { fetchDuplicates(); } else if (currentTab === 4) { fetchIgnoredFiles(); } }, [currentTab, duplicatesPagination.page, ignoredFilesPagination.page, ignoredFilesSearchTerm, ignoredFilesSourceTypeFilter]); const getFailureReasonColor = (reason: string): "error" | "warning" | "info" | "default" => { switch (reason) { case 'low_ocr_confidence': case 'ocr_timeout': case 'ocr_memory_limit': case 'pdf_parsing_error': return 'error'; case 'duplicate_content': case 'unsupported_format': case 'file_too_large': return 'warning'; case 'file_corrupted': case 'access_denied': case 'permission_denied': return 'error'; default: return 'default'; } }; const handleRetryOcr = async (document: FailedDocument) => { try { setRetrying(document.id); const response = await documentService.retryOcr(document.id); if (response.data.success) { setSnackbar({ open: true, message: t('documentManagement.retry.queuedSuccess', { filename: document.filename, minutes: response.data.estimated_wait_minutes || t('documentManagement.retry.unknown') }), severity: 'success' }); // Refresh the list to update retry counts and status await fetchFailedDocuments(); } else { setSnackbar({ open: true, message: response.data.message || t('documentManagement.retry.failed'), severity: 'error' }); } } catch (error) { console.error('Failed to retry OCR:', error); const errorInfo = ErrorHelper.formatErrorForDisplay(error, true); let errorMessage = t('documentManagement.retry.processingFailed'); // Handle specific OCR retry errors if (ErrorHelper.isErrorCode(error, ErrorCodes.DOCUMENT_NOT_FOUND)) { errorMessage = t('documentManagement.errors.documentNotFound'); } else if (ErrorHelper.isErrorCode(error, ErrorCodes.DOCUMENT_OCR_FAILED)) { errorMessage = t('documentManagement.errors.cannotRetry'); } else if (ErrorHelper.isErrorCode(error, ErrorCodes.USER_SESSION_EXPIRED) || ErrorHelper.isErrorCode(error, ErrorCodes.USER_TOKEN_EXPIRED)) { errorMessage = t('documentManagement.errors.sessionExpired'); } else if (ErrorHelper.isErrorCode(error, ErrorCodes.USER_PERMISSION_DENIED)) { errorMessage = t('documentManagement.errors.permissionDeniedRetry'); } else if (errorInfo.category === 'server') { errorMessage = t('documentManagement.errors.serverErrorSupport'); } else if (errorInfo.category === 'network') { errorMessage = t('documentManagement.errors.networkError'); } else { errorMessage = errorInfo.message || t('documentManagement.retry.processingFailed'); } setSnackbar({ open: true, message: errorMessage, severity: 'error' }); } finally { setRetrying(null); } }; const handleRetryAllDocuments = async () => { try { setRetryingAll(true); const response = await documentService.bulkRetryOcr({ mode: 'all', preview_only: false }); if (response.data.queued_count > 0) { setSnackbar({ open: true, message: t('documentManagement.retry.bulkSuccess', { count: response.data.queued_count, minutes: Math.ceil(response.data.estimated_total_time_minutes) }), severity: 'success' }); // Refresh all tabs since we're retrying all documents await refreshCurrentTab(); } else { setSnackbar({ open: true, message: t('documentManagement.retry.noDocuments'), severity: 'info' }); } } catch (error) { console.error('Error retrying all documents:', error); setSnackbar({ open: true, message: t('documentManagement.retry.bulkFailed'), severity: 'error' }); } finally { setRetryingAll(false); } }; const handleRetryAllFailed = async () => { try { setRetryingAll(true); const response = await queueService.requeueFailed(); if (response.data.requeued_count > 0) { setSnackbar({ open: true, message: t('documentManagement.retry.requeuedSuccess', { count: response.data.requeued_count }), severity: 'success' }); // Refresh the list to update status await fetchFailedDocuments(); } else { setSnackbar({ open: true, message: t('documentManagement.retry.noFailedDocuments'), severity: 'info' }); } } catch (error) { console.error('Failed to retry all failed OCR:', error); setSnackbar({ open: true, message: t('documentManagement.retry.requeuedFailed'), severity: 'error' }); } finally { setRetryingAll(false); } }; // Advanced retry functionality handlers const handleBulkRetrySuccess = (result: BulkOcrRetryResponse) => { setSnackbar({ open: true, message: t('documentManagement.retry.advancedSuccess', { queued: result.queued_count, matched: result.matched_count, minutes: Math.round(result.estimated_total_time_minutes) }), severity: 'success' }); fetchFailedDocuments(); // Refresh the list }; const handleShowRetryHistory = (documentId: string) => { setSelectedDocumentForHistory(documentId); setRetryHistoryModalOpen(true); }; const formatFileSize = (bytes: number): string => { if (bytes === 0) return '0 B'; const k = 1024; const sizes = ['B', 'KB', 'MB', 'GB']; const i = Math.floor(Math.log(bytes) / Math.log(k)); return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i]; }; const getFailureCategoryColor = (category: string): "error" | "warning" | "info" | "default" => { switch (category) { case 'PDF Font Issues': case 'PDF Corruption': case 'PDF Parsing Error': return 'warning'; case 'Timeout': case 'Memory Limit': return 'error'; case 'Low OCR Confidence': return 'warning'; case 'Unknown Error': return 'info'; default: return 'default'; } }; const toggleRowExpansion = (documentId: string) => { const newExpanded = new Set(expandedRows); if (newExpanded.has(documentId)) { newExpanded.delete(documentId); } else { newExpanded.add(documentId); } setExpandedRows(newExpanded); }; const showDocumentDetails = (document: FailedDocument) => { setSelectedDocument(document); setDetailsOpen(true); }; // Ignored Files functions const fetchIgnoredFiles = async () => { try { setIgnoredFilesLoading(true); const offset = (ignoredFilesPagination.page - 1) * ignoredFilesPagination.limit; const params = new URLSearchParams({ limit: ignoredFilesPagination.limit.toString(), offset: offset.toString(), }); if (ignoredFilesSearchTerm) { params.append('filename', ignoredFilesSearchTerm); } if (ignoredFilesSourceTypeFilter) { params.append('source_type', ignoredFilesSourceTypeFilter); } const response = await api.get(`/ignored-files?${params}`); if (response?.data) { setIgnoredFiles(response.data.ignored_files || []); setIgnoredFilesTotalPages(Math.ceil(response.data.total / ignoredFilesPagination.limit)); } } catch (error) { console.error('Failed to fetch ignored files:', error); const errorInfo = ErrorHelper.formatErrorForDisplay(error, true); let errorMessage = t('documentManagement.errors.loadIgnoredFiles'); // Handle specific ignored files errors if (ErrorHelper.isErrorCode(error, ErrorCodes.USER_SESSION_EXPIRED) || ErrorHelper.isErrorCode(error, ErrorCodes.USER_TOKEN_EXPIRED)) { errorMessage = t('documentManagement.errors.sessionExpired'); } else if (ErrorHelper.isErrorCode(error, ErrorCodes.USER_PERMISSION_DENIED)) { errorMessage = t('documentManagement.errors.permissionDeniedIgnored'); } else if (errorInfo.category === 'network') { errorMessage = t('documentManagement.errors.networkError'); } else if (errorInfo.category === 'server') { errorMessage = t('documentManagement.errors.serverError'); } else { errorMessage = errorInfo.message || t('documentManagement.errors.loadIgnoredFiles'); } setSnackbar({ open: true, message: errorMessage, severity: 'error' }); } finally { setIgnoredFilesLoading(false); } }; const fetchIgnoredFilesStats = async () => { try { const response = await api.get('/ignored-files/stats'); if (response?.data) { setIgnoredFilesStats(response.data); } } catch (error) { console.error('Failed to fetch ignored files stats:', error); } }; const handleIgnoredFileSelect = (fileId: string) => { const newSelected = new Set(selectedIgnoredFiles); if (newSelected.has(fileId)) { newSelected.delete(fileId); } else { newSelected.add(fileId); } setSelectedIgnoredFiles(newSelected); }; const handleIgnoredFilesSelectAll = () => { if (selectedIgnoredFiles.size === ignoredFiles.length) { setSelectedIgnoredFiles(new Set()); } else { setSelectedIgnoredFiles(new Set(ignoredFiles.map(file => file.id))); } }; const handleDeleteSelectedIgnoredFiles = async () => { if (selectedIgnoredFiles.size === 0) return; setDeletingIgnoredFiles(true); try { const response = await api.delete('/ignored-files/bulk-delete', { data: { ignored_file_ids: Array.from(selectedIgnoredFiles) } }); setSnackbar({ open: true, message: response.data.message || t('documentManagement.ignoredFiles.removedSuccess'), severity: 'success' }); setSelectedIgnoredFiles(new Set()); setBulkDeleteIgnoredDialog(false); fetchIgnoredFiles(); fetchIgnoredFilesStats(); } catch (error: any) { setSnackbar({ open: true, message: error.response?.data?.message || t('documentManagement.ignoredFiles.deleteFailed'), severity: 'error' }); } finally { setDeletingIgnoredFiles(false); } }; const handleDeleteSingleIgnoredFile = async (fileId: string) => { try { const response = await api.delete(`/ignored-files/${fileId}`); setSnackbar({ open: true, message: response.data.message || t('documentManagement.ignoredFiles.fileRemovedSuccess'), severity: 'success' }); fetchIgnoredFiles(); fetchIgnoredFilesStats(); } catch (error: any) { setSnackbar({ open: true, message: error.response?.data?.message || t('documentManagement.ignoredFiles.fileDeleteFailed'), severity: 'error' }); } }; const getSourceIcon = (sourceType?: string) => { switch (sourceType) { case 'webdav': return ; case 'local_folder': return ; case 's3': return ; default: return ; } }; const getSourceTypeDisplay = (sourceType?: string) => { switch (sourceType) { case 'webdav': return 'WebDAV'; case 'local_folder': return 'Local Folder'; case 's3': return 'S3'; default: return sourceType || 'Unknown'; } }; const toggleDuplicateGroupExpansion = (groupHash: string) => { const newExpanded = new Set(expandedDuplicateGroups); if (newExpanded.has(groupHash)) { newExpanded.delete(groupHash); } else { newExpanded.add(groupHash); } setExpandedDuplicateGroups(newExpanded); }; const handleTabChange = (event: React.SyntheticEvent, newValue: number) => { setCurrentTab(newValue); }; const refreshCurrentTab = () => { if (currentTab === 0) { fetchFailedDocuments(); } else if (currentTab === 1) { // Refresh both low confidence and failed documents for the merged cleanup tab handlePreviewLowConfidence(); handlePreviewFailedDocuments(); } else if (currentTab === 2) { fetchDuplicates(); } else if (currentTab === 3) { fetchIgnoredFiles(); fetchIgnoredFilesStats(); } }; // Low confidence document handlers const handlePreviewLowConfidence = async () => { try { setLowConfidenceLoading(true); const response = await documentService.deleteLowConfidence(confidenceThreshold, true); setPreviewData(response.data); setSnackbar({ open: true, message: response.data.message, severity: 'info' }); } catch (error) { setSnackbar({ open: true, message: t('documentManagement.cleanup.previewFailed'), severity: 'error' }); } finally { setLowConfidenceLoading(false); } }; const handleDeleteLowConfidence = async () => { if (!previewData || previewData.matched_count === 0) { setSnackbar({ open: true, message: t('documentManagement.cleanup.noDocuments'), severity: 'warning' }); return; } try { setLowConfidenceLoading(true); const response = await documentService.deleteLowConfidence(confidenceThreshold, false); setSnackbar({ open: true, message: response.data.message, severity: 'success' }); setPreviewData(null); setConfirmDeleteOpen(false); // Refresh other tabs if they have data affected if (currentTab === 0) { fetchFailedDocuments(); } } catch (error) { setSnackbar({ open: true, message: t('documentManagement.cleanup.deleteFailed'), severity: 'error' }); } finally { setLowConfidenceLoading(false); } }; // Failed documents handlers const handlePreviewFailedDocuments = async () => { try { setFailedDocsLoading(true); const response = await documentService.deleteFailedOcr(true); setFailedPreviewData(response.data); } catch (error) { setSnackbar({ open: true, message: t('documentManagement.cleanup.previewFailedDocs'), severity: 'error' }); } finally { setFailedDocsLoading(false); } }; const handleDeleteFailedDocuments = async () => { try { setFailedDocsLoading(true); const response = await documentService.deleteFailedOcr(false); setSnackbar({ open: true, message: response.data.message, severity: 'success' }); setFailedPreviewData(null); setConfirmDeleteFailedOpen(false); // Refresh failed OCR tab if currently viewing it if (currentTab === 0) { fetchFailedDocuments(); } } catch (error) { setSnackbar({ open: true, message: t('documentManagement.cleanup.deleteFailedDocs'), severity: 'error' }); } finally { setFailedDocsLoading(false); } }; if (loading && (!documents || documents.length === 0)) { return ( ); } return ( {t('documentManagement.title')} } label={t('documentManagement.tabs.failedDocuments', { count: statistics ? statistics.total_failed : 0, showCount: statistics ? true : false })} iconPosition="start" /> } label={t('documentManagement.tabs.cleanup', { count: (previewData?.matched_count || 0) + (failedPreviewData?.matched_count || 0), showCount: (previewData?.matched_count || 0) + (failedPreviewData?.matched_count || 0) > 0 })} iconPosition="start" /> } label={t('documentManagement.tabs.duplicates', { count: duplicateStatistics ? duplicateStatistics.total_duplicate_groups : 0, showCount: duplicateStatistics ? true : false })} iconPosition="start" /> } label={t('documentManagement.tabs.ignoredFiles', { count: ignoredFilesStats ? ignoredFilesStats.total_ignored_files : 0, showCount: ignoredFilesStats ? true : false })} iconPosition="start" /> {/* Failed OCR Tab Content */} {currentTab === 0 && ( <> {/* Statistics Overview */} {statistics && ( {t('documentManagement.stats.totalFailed')} {statistics.total_failed} {t('documentManagement.stats.failureCategories')} {statistics?.by_reason ? Object.entries(statistics.by_reason).map(([reason, count]) => ( )) : ( {t('documentManagement.stats.noFailureData')} )} )} {/* Advanced Retry Components */} {t('documentManagement.advancedRetry.title')} {t('documentManagement.advancedRetry.description')} {/* Filter Controls */} {t('documentManagement.filters.title')} setFailedDocumentsFilters(prev => ({ ...prev, stage: e.target.value || undefined }))} fullWidth > {t('documentManagement.filters.allStages')} {t('documentManagement.filters.stages.ocr')} {t('documentManagement.filters.stages.ingestion')} {t('documentManagement.filters.stages.validation')} {t('documentManagement.filters.stages.storage')} {t('documentManagement.filters.stages.processing')} {t('documentManagement.filters.stages.sync')} setFailedDocumentsFilters(prev => ({ ...prev, reason: e.target.value || undefined }))} fullWidth > {t('documentManagement.filters.allReasons')} {t('documentManagement.filters.reasons.duplicateContent')} {t('documentManagement.filters.reasons.lowConfidence')} {t('documentManagement.filters.reasons.unsupportedFormat')} {t('documentManagement.filters.reasons.fileTooLarge')} {t('documentManagement.filters.reasons.fileCorrupted')} {t('documentManagement.filters.reasons.ocrTimeout')} {t('documentManagement.filters.reasons.pdfParsingError')} {t('documentManagement.filters.reasons.other')} {(!documents || documents.length === 0) ? ( {t('documentManagement.alerts.noFailedTitle')} {t('documentManagement.alerts.noFailedMessage')} ) : ( <> {t('documentManagement.alerts.overviewTitle')} {t('documentManagement.alerts.overviewMessage')} {t('documentManagement.table.document')} {t('documentManagement.table.failureType')} {t('documentManagement.table.retryCount')} {t('documentManagement.table.lastFailed')} {t('documentManagement.table.actions')} {(documents || []).map((document) => ( toggleRowExpansion(document.id)} > {expandedRows.has(document.id) ? : } {document.filename} {formatFileSize(document.file_size)} • {document.mime_type} {t('documentManagement.table.attempts', { count: document.retry_count })} {document.updated_at ? format(new Date(document.updated_at), 'MMM dd, yyyy HH:mm') : t('documentManagement.table.unknown')} handleRetryOcr(document)} disabled={retrying === document.id || !document.can_retry} > {retrying === document.id ? ( ) : ( )} showDocumentDetails(document)} > handleShowRetryHistory(document.id)} > { try { await documentService.downloadFile(document.id, document.original_filename || document.filename); } catch (error) { console.error('Download failed:', error); } }} > theme.palette.mode === 'dark' ? 'grey.900' : 'grey.50', borderRadius: 1 }}> {t('documentManagement.details.errorDetails')} {t('documentManagement.details.failureReason')}: {document.failure_reason || document.ocr_failure_reason || t('documentManagement.details.notSpecified')} {/* Show OCR confidence and word count for low confidence failures */} {(document.failure_reason === 'low_ocr_confidence' || document.ocr_failure_reason === 'low_ocr_confidence') && ( <> {t('documentManagement.details.ocrResults')}: {document.ocr_confidence !== undefined && document.ocr_confidence !== null && ( } label={t('documentManagement.details.confidencePercent', { percent: document.ocr_confidence.toFixed(1) })} color="warning" variant="outlined" /> )} {document.ocr_word_count !== undefined && document.ocr_word_count !== null && ( } label={t('documentManagement.details.wordsFound', { count: document.ocr_word_count })} color="info" variant="outlined" /> )} )} {t('documentManagement.details.errorMessage')}: theme.palette.mode === 'dark' ? 'grey.800' : 'grey.100', p: 1, borderRadius: 1, fontSize: '0.75rem', wordBreak: 'break-word' }} > {document.error_message || document.ocr_error || t('documentManagement.details.noErrorMessage')} {t('documentManagement.details.lastAttempt')}: {document.last_attempt_at ? format(new Date(document.last_attempt_at), 'PPpp') : t('documentManagement.details.noPreviousAttempts')} {t('documentManagement.details.fileCreated')}: {format(new Date(document.created_at), 'PPpp')} ))}
{/* Pagination */} {totalPages > 1 && ( setPagination(prev => ({ ...prev, page }))} color="primary" /> )} )} )} {/* Duplicate Files Tab Content */} {currentTab === 2 && ( <> {/* Duplicate Statistics Overview */} {duplicateStatistics && ( Total Duplicate Groups {duplicateStatistics.total_duplicate_groups} )} {duplicatesLoading ? ( ) : duplicates.length === 0 ? ( No duplicates found! You don't have any duplicate documents. All your files have unique content. ) : ( <> Duplicate Documents Found These documents have identical content but may have different filenames. You can expand each group to see all files with the same content and choose which ones to keep. What should you do?
  • Review each group: Click to expand and see all duplicate files
  • Keep the best version: Choose the file with the most descriptive name
  • Check content: Use View/Download to verify files are truly identical
  • Note for admin: Consider implementing bulk delete functionality for duplicates
  • Content Hash Duplicate Count First Uploaded Last Uploaded Actions {duplicates.map((group) => ( toggleDuplicateGroupExpansion(group.file_hash)} > {expandedDuplicateGroups.has(group.file_hash) ? : } {group.file_hash.substring(0, 16)}... {format(new Date(group.first_uploaded), 'MMM dd, yyyy')} {format(new Date(group.last_uploaded), 'MMM dd, yyyy')} View files below Duplicate Files ({group.duplicate_count} total) Storage Impact: These {group.duplicate_count} files contain identical content. Consider keeping only the best-named version to save space. {group.documents.map((doc, index) => ( {doc.filename} {index === 0 && ( )} {doc.original_filename !== doc.filename && ( Original: {doc.original_filename} )} {formatFileSize(doc.file_size)} • {doc.mime_type} Uploaded: {format(new Date(doc.created_at), 'MMM dd, yyyy HH:mm')} window.open(`/api/documents/${doc.id}/view`, '_blank')} sx={{ color: theme.palette.primary.main }} > { try { await documentService.downloadFile(doc.id, doc.original_filename || doc.filename); } catch (error) { console.error('Download failed:', error); } }} sx={{ color: theme.palette.secondary.main }} > ))} ))}
    {/* Duplicates Pagination */} {duplicatesTotalPages > 1 && ( setDuplicatesPagination(prev => ({ ...prev, page }))} color="primary" /> )} )} )} {/* Document Cleanup Tab Content - Merged Low Quality Manager and Bulk Cleanup */} {currentTab === 1 && ( <> Document Cleanup Center Clean up your document library by removing problematic documents. You can delete:
  • Documents with low OCR confidence scores (below a threshold you set)
  • Documents where OCR processing failed completely
  • Always use the preview feature before deleting to see which documents will be affected.
    {/* Low Confidence Documents Section */} Low Confidence Documents setConfidenceThreshold(Math.max(0, Math.min(100, Number(e.target.value))))} fullWidth inputProps={{ min: 0, max: 100, step: 1 }} helperText="Documents with confidence below this value will be deleted" /> {/* Preview Results */} {previewData && ( Preview Results 0 ? 'warning.main' : 'success.main'}> {previewData.message} {previewData.matched_count > 0 && previewData.documents && ( Documents that would be deleted: Filename Size OCR Confidence Status Date {previewData.documents.slice(0, 20).map((doc: any) => ( {doc.original_filename || doc.filename} {formatFileSize(doc.file_size)} {doc.ocr_confidence !== undefined && doc.ocr_confidence !== null ? `${doc.ocr_confidence.toFixed(1)}%` : 'N/A'} {new Date(doc.created_at).toLocaleDateString()} ))}
    {previewData.documents.length > 20 && ( ... and {previewData.documents.length - 20} more documents )}
    )}
    )} {/* Loading State */} {lowConfidenceLoading && !previewData && ( Processing request... )} {/* Divider between sections */} {/* Failed Documents Section */} Failed OCR Documents Delete Failed OCR Documents This section allows you to delete all documents where OCR processing failed completely. This includes documents with NULL confidence values or explicit failure status. {/* Preview Results for Failed Documents */} {failedPreviewData && ( Preview Results 0 ? 'error.main' : 'success.main'}> {failedPreviewData.message} {failedPreviewData.matched_count > 0 && ( Document IDs that would be deleted: {failedPreviewData.document_ids.slice(0, 10).join(', ')} {failedPreviewData.document_ids.length > 10 && ` ... and ${failedPreviewData.document_ids.length - 10} more`} )} )} {/* Loading State for Failed Documents */} {failedDocsLoading && !failedPreviewData && ( Processing request... )} )} {/* Ignored Files Tab Content */} {currentTab === 3 && ( <> Ignored Files Management Files that have been marked as ignored during sync operations from various sources. You can remove files from the ignored list to allow them to be synced again. {/* Statistics Cards */} {ignoredFilesStats && ( Total Ignored {ignoredFilesStats.total_ignored_files} Total Size {formatFileSize(ignoredFilesStats.total_size_bytes)} {ignoredFilesStats.most_recent_ignored_at && ( Most Recent {format(new Date(ignoredFilesStats.most_recent_ignored_at), 'MMM dd, yyyy')} )} )} {/* Filters and Search */} { setIgnoredFilesSearchTerm(e.target.value); setIgnoredFilesPagination(prev => ({ ...prev, page: 1 })); }} InputProps={{ startAdornment: ( ), }} sx={{ flexGrow: 1, minWidth: '200px' }} /> { setIgnoredFilesSourceTypeFilter(e.target.value); setIgnoredFilesPagination(prev => ({ ...prev, page: 1 })); }} sx={{ minWidth: '150px' }} > All Sources WebDAV Local Folder S3 {/* Bulk Actions */} {selectedIgnoredFiles.size > 0 && ( {selectedIgnoredFiles.size} file{selectedIgnoredFiles.size !== 1 ? 's' : ''} selected )} {ignoredFilesLoading ? ( ) : ignoredFiles.length === 0 ? ( No ignored files found! You don't have any files in the ignored list. All your files are being processed normally. ) : ( <> 0 && selectedIgnoredFiles.size < ignoredFiles.length} checked={ignoredFiles.length > 0 && selectedIgnoredFiles.size === ignoredFiles.length} onChange={handleIgnoredFilesSelectAll} /> Filename Source Size Ignored Date Reason Actions {ignoredFiles.map((file) => ( handleIgnoredFileSelect(file.id)} /> {file.filename} {file.filename !== file.original_filename && ( Original: {file.original_filename} )} {file.mime_type} {getSourceIcon(file.source_type)} {getSourceTypeDisplay(file.source_type)} {file.source_path && ( {file.source_path} )} {formatFileSize(file.file_size)} {format(new Date(file.ignored_at), 'MMM dd, yyyy')} {format(new Date(file.ignored_at), 'HH:mm')} {file.reason || 'No reason provided'} handleDeleteSingleIgnoredFile(file.id)} color="success" > ))}
    {/* Pagination */} {ignoredFilesTotalPages > 1 && ( setIgnoredFilesPagination(prev => ({ ...prev, page }))} color="primary" /> )} )} )} {/* Confirmation Dialog */} setConfirmDeleteOpen(false)} maxWidth="sm" fullWidth > Confirm Low Confidence Document Deletion Are you sure you want to delete {previewData?.matched_count || 0} documents with OCR confidence below {confidenceThreshold}%? This action cannot be undone. The documents and their files will be permanently deleted. {/* Confirmation Dialog for Failed Documents */} setConfirmDeleteFailedOpen(false)} maxWidth="sm" fullWidth > Confirm Failed Document Deletion Are you sure you want to delete {failedPreviewData?.matched_count || 0} documents with failed OCR processing? This action cannot be undone. The documents and their files will be permanently deleted. {/* Document Details Dialog */} setDetailsOpen(false)} maxWidth="lg" fullWidth > Document Details: {selectedDocument?.filename} {selectedDocument && ( {/* File Preview Section */} File Preview { if (selectedDocument) { navigate(`/documents/${selectedDocument.id}`); } }} sx={{ cursor: 'pointer', border: '2px dashed', borderColor: 'primary.main', borderRadius: 2, p: 1, transition: 'all 0.2s ease-in-out', '&:hover': { borderColor: 'primary.dark', boxShadow: 2, }, }} > Click to open full document details page {/* Document Information Section */} Document Information Original Filename: {selectedDocument.original_filename} File Size: {formatFileSize(selectedDocument.file_size)} MIME Type: {selectedDocument.mime_type} Failure Category: {/* Source Metadata Section */} {selectedDocument.original_created_at && ( <> Original Created: {format(new Date(selectedDocument.original_created_at), 'PPpp')} )} {selectedDocument.original_modified_at && ( <> Original Modified: {format(new Date(selectedDocument.original_modified_at), 'PPpp')} )} {selectedDocument.source_metadata && Object.keys(selectedDocument.source_metadata).length > 0 && ( )} Retry Count: {selectedDocument.retry_count} attempts Created: {format(new Date(selectedDocument.created_at), 'PPpp')} Last Updated: {format(new Date(selectedDocument.updated_at), 'PPpp')} Tags: {selectedDocument.tags.length > 0 ? ( selectedDocument.tags.map((tag) => ( )) ) : ( No tags )} {/* Error Details Section */} Error Details Full Error Message: theme.palette.mode === 'dark' ? 'grey.800' : 'grey.50', borderRadius: 1 }}> {selectedDocument.error_message || selectedDocument.ocr_error || 'No error message available'} )} {selectedDocument?.can_retry && ( )} {/* Bulk Delete Ignored Files Confirmation Dialog */} setBulkDeleteIgnoredDialog(false)}> Confirm Bulk Delete Are you sure you want to remove {selectedIgnoredFiles.size} file{selectedIgnoredFiles.size !== 1 ? 's' : ''} from the ignored list? These files will be eligible for syncing again if encountered from their sources. This action allows them to be re-imported during future syncs. {/* Confirm Retry All Documents Dialog */} setConfirmRetryAllOpen(false)}> Retry All Documents This will retry OCR processing for all documents in your library, regardless of their current OCR status. This includes documents that have already been successfully processed. Note: This is a resource-intensive operation that may take a significant amount of time depending on the number of documents. {/* Advanced Retry Modal */} setBulkRetryModalOpen(false)} onSuccess={handleBulkRetrySuccess} selectedDocumentIds={selectedDocumentIds} /> {/* Retry History Modal */} setRetryHistoryModalOpen(false)} documentId={selectedDocumentForHistory || ''} documentName={selectedDocumentForHistory ? documents.find(d => d.id === selectedDocumentForHistory)?.filename : undefined} /> {/* Success/Error Snackbar */} setSnackbar(prev => ({ ...prev, open: false }))} > setSnackbar(prev => ({ ...prev, open: false }))} severity={snackbar.severity} sx={{ width: '100%' }} > {snackbar.message}
    ); }; export default DocumentManagementPage;