diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx
index 3c5de8f..30b0371 100644
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@@ -18,6 +18,7 @@ import WatchFolderPage from './pages/WatchFolderPage';
import DocumentManagementPage from './pages/DocumentManagementPage';
import LabelsPage from './pages/LabelsPage';
import IgnoredFilesPage from './pages/IgnoredFilesPage';
+import DebugPage from './pages/DebugPage';
function App(): React.ReactElement {
const { user, loading } = useAuth();
@@ -77,6 +78,7 @@ function App(): React.ReactElement {
} />
} />
} />
+ } />
Profile Page - Coming Soon} />
diff --git a/frontend/src/components/Layout/AppLayout.tsx b/frontend/src/components/Layout/AppLayout.tsx
index 1fb91aa..5855e11 100644
--- a/frontend/src/components/Layout/AppLayout.tsx
+++ b/frontend/src/components/Layout/AppLayout.tsx
@@ -37,6 +37,7 @@ import {
Block as BlockIcon,
Api as ApiIcon,
ManageAccounts as ManageIcon,
+ BugReport as BugReportIcon,
} from '@mui/icons-material';
import { useNavigate, useLocation } from 'react-router-dom';
import { useAuth } from '../../contexts/AuthContext';
@@ -72,6 +73,7 @@ const navigationItems: NavigationItem[] = [
{ text: 'Watch Folder', icon: FolderIcon, path: '/watch' },
{ text: 'Document Management', icon: ManageIcon, path: '/documents/management' },
{ text: 'Ignored Files', icon: BlockIcon, path: '/ignored-files' },
+ { text: 'Debug', icon: BugReportIcon, path: '/debug' },
];
const AppLayout: React.FC = ({ children }) => {
diff --git a/frontend/src/pages/DebugPage.tsx b/frontend/src/pages/DebugPage.tsx
new file mode 100644
index 0000000..790205e
--- /dev/null
+++ b/frontend/src/pages/DebugPage.tsx
@@ -0,0 +1,1057 @@
+import React, { useState, useCallback, useEffect } from 'react';
+import {
+ Box,
+ Card,
+ CardContent,
+ Typography,
+ TextField,
+ Button,
+ Grid,
+ Paper,
+ Stepper,
+ Step,
+ StepLabel,
+ StepContent,
+ Alert,
+ Chip,
+ Table,
+ TableBody,
+ TableCell,
+ TableContainer,
+ TableHead,
+ TableRow,
+ Accordion,
+ AccordionSummary,
+ AccordionDetails,
+ CircularProgress,
+ Container,
+ Tabs,
+ Tab,
+ LinearProgress,
+ Divider,
+} from '@mui/material';
+import {
+ ExpandMore as ExpandMoreIcon,
+ BugReport as BugReportIcon,
+ CheckCircle as CheckCircleIcon,
+ Error as ErrorIcon,
+ Warning as WarningIcon,
+ Pending as PendingIcon,
+ PlayArrow as PlayArrowIcon,
+ CloudUpload as UploadIcon,
+ Search as SearchIcon,
+ Refresh as RefreshIcon,
+ Visibility as PreviewIcon,
+} from '@mui/icons-material';
+import { api } from '../services/api';
+
+interface DebugStep {
+ step: number;
+ name: string;
+ status: string;
+ details: any;
+ success: boolean;
+ error?: string;
+}
+
+interface DebugInfo {
+ document_id: string;
+ filename: string;
+ overall_status: string;
+ pipeline_steps: DebugStep[];
+ failed_document_info?: any;
+ user_settings: any;
+ debug_timestamp: string;
+}
+
+const DebugPage: React.FC = () => {
+ const [activeTab, setActiveTab] = useState(0);
+ const [documentId, setDocumentId] = useState('');
+ const [debugInfo, setDebugInfo] = useState(null);
+ const [loading, setLoading] = useState(false);
+ const [error, setError] = useState('');
+
+ // Upload functionality
+ const [selectedFile, setSelectedFile] = useState(null);
+ const [uploading, setUploading] = useState(false);
+ const [uploadProgress, setUploadProgress] = useState(0);
+ const [uploadedDocumentId, setUploadedDocumentId] = useState('');
+ const [monitoringInterval, setMonitoringInterval] = useState(null);
+ const [processingStatus, setProcessingStatus] = useState('');
+
+ const getStepIcon = (status: string, success: boolean) => {
+ if (status === 'processing') return ;
+ if (success || status === 'completed' || status === 'passed') return ;
+ if (status === 'failed' || status === 'error') return ;
+ if (status === 'pending' || status === 'not_reached') return ;
+ if (status === 'not_queued' || status === 'ocr_disabled') return ;
+ return ;
+ };
+
+ const getStatusColor = (status: string, success: boolean): "default" | "primary" | "secondary" | "error" | "info" | "success" | "warning" => {
+ if (status === 'processing') return 'info';
+ if (success || status === 'completed' || status === 'passed') return 'success';
+ if (status === 'failed' || status === 'error') return 'error';
+ if (status === 'pending' || status === 'not_reached') return 'default';
+ if (status === 'not_queued' || status === 'ocr_disabled') return 'warning';
+ return 'primary';
+ };
+
+ const fetchDebugInfo = useCallback(async (docId?: string, retryCount = 0) => {
+ const targetDocId = docId || documentId;
+ if (!targetDocId.trim()) {
+ setError('Please enter a document ID');
+ return;
+ }
+
+ setLoading(true);
+ if (retryCount === 0) {
+ setError(''); // Only clear error on first attempt
+ }
+
+ try {
+ const response = await api.get(`/documents/${targetDocId}/debug`);
+ setDebugInfo(response.data);
+ setError(''); // Clear any previous errors
+ } catch (err: any) {
+ console.error('Debug fetch error:', err);
+
+ // If it's a 404 and we haven't retried much, try again after a short delay
+ if (err.response?.status === 404 && retryCount < 3) {
+ console.log(`Document not found, retrying in ${(retryCount + 1) * 1000}ms... (attempt ${retryCount + 1})`);
+ setTimeout(() => {
+ fetchDebugInfo(docId, retryCount + 1);
+ }, (retryCount + 1) * 1000);
+ return;
+ }
+
+ const errorMessage = err.response?.status === 404
+ ? `Document ${targetDocId} not found. It may still be processing or may have been moved to failed documents.`
+ : err.response?.data?.message || `Failed to fetch debug information: ${err.message}`;
+ setError(errorMessage);
+ setDebugInfo(null);
+ } finally {
+ if (retryCount === 0) {
+ setLoading(false);
+ }
+ }
+ }, [documentId]);
+
+ const handleFileSelect = (event: React.ChangeEvent) => {
+ const file = event.target.files?.[0];
+ if (file) {
+ setSelectedFile(file);
+ setError('');
+ }
+ };
+
+ const uploadDocument = useCallback(async () => {
+ if (!selectedFile) {
+ setError('Please select a file to upload');
+ return;
+ }
+
+ setUploading(true);
+ setUploadProgress(0);
+ setError('');
+ setProcessingStatus('Uploading file...');
+
+ try {
+ const formData = new FormData();
+ formData.append('file', selectedFile);
+
+ const response = await api.post('/documents', formData, {
+ headers: {
+ 'Content-Type': 'multipart/form-data',
+ },
+ onUploadProgress: (progressEvent) => {
+ const progress = progressEvent.total
+ ? Math.round((progressEvent.loaded * 100) / progressEvent.total)
+ : 0;
+ setUploadProgress(progress);
+ },
+ });
+
+ const uploadedDoc = response.data;
+ setUploadedDocumentId(uploadedDoc.id);
+ setDocumentId(uploadedDoc.id);
+ setProcessingStatus('Document uploaded successfully. Starting OCR processing...');
+
+ // Start monitoring the processing
+ startProcessingMonitor(uploadedDoc.id);
+ } catch (err: any) {
+ setError(err.response?.data?.message || 'Failed to upload document');
+ setProcessingStatus('Upload failed');
+ } finally {
+ setUploading(false);
+ setUploadProgress(0);
+ }
+ }, [selectedFile]);
+
+ const startProcessingMonitor = useCallback((docId: string) => {
+ // Clear any existing interval
+ if (monitoringInterval) {
+ clearInterval(monitoringInterval);
+ }
+
+ const interval = setInterval(async () => {
+ try {
+ const response = await api.get(`/documents/${docId}`);
+ const doc = response.data;
+
+ if (doc.ocr_status === 'completed' || doc.ocr_status === 'failed') {
+ setProcessingStatus(`Processing ${doc.ocr_status}!`);
+ clearInterval(interval);
+ setMonitoringInterval(null);
+
+ // Auto-fetch debug info when processing is complete OR failed (but don't switch tabs)
+ setTimeout(() => {
+ fetchDebugInfo(docId);
+ // Don't auto-switch tabs - let user decide when to view debug info
+ }, 2000); // Give it a bit more time to ensure document is saved
+ } else if (doc.ocr_status === 'processing') {
+ setProcessingStatus('OCR processing in progress...');
+ } else if (doc.ocr_status === 'pending') {
+ setProcessingStatus('Document queued for OCR processing...');
+ } else {
+ setProcessingStatus('Checking processing status...');
+ }
+ } catch (err) {
+ console.error('Error monitoring processing:', err);
+ }
+ }, 2000); // Check every 2 seconds
+
+ setMonitoringInterval(interval);
+
+ // Auto-clear monitoring after 5 minutes
+ setTimeout(() => {
+ clearInterval(interval);
+ setMonitoringInterval(null);
+ setProcessingStatus('Monitoring stopped (timeout)');
+ }, 300000);
+ }, [monitoringInterval, fetchDebugInfo]);
+
+ // Cleanup interval on unmount
+ useEffect(() => {
+ return () => {
+ if (monitoringInterval) {
+ clearInterval(monitoringInterval);
+ }
+ };
+ }, [monitoringInterval]);
+
+ const renderStepDetails = (step: DebugStep) => {
+ const details = step.details;
+
+ return (
+
+ {step.error && (
+
+ {step.error}
+
+ )}
+
+ {step.step === 1 && ( // File Upload & Ingestion
+
+
+
+
+ File Information
+ Filename: {details.filename}
+ Original: {details.original_filename}
+ Size: {(details.file_size / 1024 / 1024).toFixed(2)} MB
+ MIME Type: {details.mime_type}
+ File Exists:
+
+
+
+
+ File Metadata
+ {details.file_metadata ? (
+ <>
+ Actual Size: {(details.file_metadata.size / 1024 / 1024).toFixed(2)} MB
+ Is File: {details.file_metadata.is_file ? 'Yes' : 'No'}
+ Modified: {details.file_metadata.modified ? new Date(details.file_metadata.modified.secs_since_epoch * 1000).toLocaleString() : 'Unknown'}
+ >
+ ) : (
+ File metadata not available
+ )}
+ Created: {new Date(details.created_at).toLocaleString()}
+
+
+
+
+ {details.file_analysis && (
+
+ Detailed File Analysis
+
+
+
+ Basic Analysis
+ File Type: {details.file_analysis.file_type}
+ Size: {(details.file_analysis.file_size_bytes / 1024 / 1024).toFixed(2)} MB
+ Readable:
+ {details.file_analysis.error_details && (
+
+ File Error: {details.file_analysis.error_details}
+
+ )}
+
+
+
+ {details.file_analysis.pdf_info ? (
+
+ PDF Analysis
+ Valid PDF:
+ PDF Version: {details.file_analysis.pdf_info.pdf_version || 'Unknown'}
+ Pages: {details.file_analysis.pdf_info.page_count || 'Unknown'}
+ Has Text:
+ Has Images:
+ Encrypted:
+ Font Count: {details.file_analysis.pdf_info.font_count}
+ Text Length: {details.file_analysis.pdf_info.estimated_text_length} chars
+ {details.file_analysis.pdf_info.text_extraction_error && (
+
+ PDF Text Extraction Error: {details.file_analysis.pdf_info.text_extraction_error}
+
+ )}
+
+ ) : details.file_analysis.text_preview ? (
+
+ Text Preview
+
+ {details.file_analysis.text_preview}
+
+
+ ) : (
+
+ File Content
+ No preview available for this file type
+
+ )}
+
+
+
+ )}
+
+ )}
+
+ {step.step === 2 && ( // OCR Queue Enrollment
+
+
+
+
+ Queue Status
+ User OCR Enabled:
+ Queue Entries: {details.queue_entries_count}
+
+
+
+
+ {details.queue_history && details.queue_history.length > 0 && (
+
+ Queue History
+
+
+
+
+ Status
+ Priority
+ Created
+ Started
+ Completed
+ Attempts
+ Worker
+
+
+
+ {details.queue_history.map((entry: any, index: number) => (
+
+
+
+
+ {entry.priority}
+ {new Date(entry.created_at).toLocaleString()}
+ {entry.started_at ? new Date(entry.started_at).toLocaleString() : '-'}
+ {entry.completed_at ? new Date(entry.completed_at).toLocaleString() : '-'}
+ {entry.attempts}
+ {entry.worker_id || '-'}
+
+ ))}
+
+
+
+
+ )}
+
+ )}
+
+ {step.step === 3 && ( // OCR Processing
+
+
+
+ OCR Results
+ Text Length: {details.ocr_text_length} characters
+ Confidence: {details.ocr_confidence ? `${details.ocr_confidence.toFixed(1)}%` : 'N/A'}
+ Word Count: {details.ocr_word_count || 0}
+ Processing Time: {details.ocr_processing_time_ms ? `${details.ocr_processing_time_ms}ms` : 'N/A'}
+ Completed: {details.ocr_completed_at ? new Date(details.ocr_completed_at).toLocaleString() : 'Not completed'}
+
+
+
+
+ Processing Details
+ Has Processed Image:
+ {details.processed_image_info && (
+ <>
+ Image Size: {details.processed_image_info.image_width}x{details.processed_image_info.image_height}
+ File Size: {(details.processed_image_info.file_size / 1024).toFixed(1)} KB
+ Processing Steps: {details.processed_image_info.processing_steps?.join(', ') || 'None'}
+ {details.processed_image_info.processing_parameters && (
+ Processing Parameters: {JSON.stringify(details.processed_image_info.processing_parameters)}
+ )}
+ >
+ )}
+
+
+
+ )}
+
+ {step.step === 4 && ( // Quality Validation
+
+
+
+
+ Quality Thresholds
+ Min Confidence: {details.quality_thresholds.min_confidence}%
+ Brightness: {details.quality_thresholds.brightness_threshold}
+ Contrast: {details.quality_thresholds.contrast_threshold}
+ Noise: {details.quality_thresholds.noise_threshold}
+ Sharpness: {details.quality_thresholds.sharpness_threshold}
+
+
+
+
+ Actual Values
+ Confidence: {details.actual_values.confidence ? `${details.actual_values.confidence.toFixed(1)}%` : 'N/A'}
+ Word Count: {details.actual_values.word_count || 0}
+ Processed Image Available:
+ {details.actual_values.processing_parameters && (
+ Processing Parameters: {JSON.stringify(details.actual_values.processing_parameters)}
+ )}
+
+
+
+
+
+ Quality Checks
+
+ {Object.entries(details.quality_checks).map(([check, passed]: [string, any]) => (
+
+ : passed === false ? : }
+ />
+
+ ))}
+
+
+
+ )}
+
+ );
+ };
+
+ const renderUploadTab = () => (
+
+
+
+
+ Upload Document for Debug Analysis
+
+
+ Upload a PDF or image file to analyze the processing pipeline in real-time.
+
+
+
+
+
+
+ {selectedFile && (
+
+
+ Selected: {selectedFile.name} ({(selectedFile.size / 1024 / 1024).toFixed(2)} MB)
+
+
+ )}
+
+ {selectedFile && (
+ : }
+ sx={{ mt: 2 }}
+ >
+ {uploading ? 'Uploading...' : 'Upload & Debug'}
+
+ )}
+
+
+ {uploading && uploadProgress > 0 && (
+
+
+ Upload Progress: {uploadProgress}%
+
+
+
+ )}
+
+ {processingStatus && (
+
+ {processingStatus}
+ {monitoringInterval && (
+
+
+
+ )}
+
+ )}
+
+ {uploadedDocumentId && (
+
+
+ Document ID: {uploadedDocumentId}
+
+
+
+
+
+
+
+ )}
+
+ {selectedFile && selectedFile.type.startsWith('image/') && (
+
+ Preview
+
+
+ )}
+
+
+
+ );
+
+ const renderSearchTab = () => (
+
+
+
+
+ Debug Existing Document
+
+
+ Enter a document ID to analyze the processing pipeline for an existing document.
+
+
+
+ setDocumentId(e.target.value)}
+ placeholder="e.g., 123e4567-e89b-12d3-a456-426614174000"
+ fullWidth
+ size="small"
+ />
+
+
+
+ {error && (
+
+ {error}
+
+ )}
+
+
+
+ );
+
+ return (
+
+
+
+
+ Document Processing Debug
+
+
+ Upload documents or analyze existing ones to troubleshoot OCR processing issues.
+
+
+
+
+
+ setActiveTab(newValue)}>
+ }
+ iconPosition="start"
+ />
+ }
+ iconPosition="start"
+ />
+ {debugInfo && (
+ }
+ iconPosition="start"
+ />
+ )}
+
+
+
+
+ {activeTab === 0 && renderUploadTab()}
+ {activeTab === 1 && renderSearchTab()}
+
+
+
+ {error && (
+
+ Debug Error
+ {error}
+
+ )}
+
+ {debugInfo && activeTab === 2 && (
+
+
+
+
+ Document: {debugInfo.filename}
+
+
+
+
+ Debug run at: {new Date(debugInfo.debug_timestamp).toLocaleString()}
+
+
+
+
+
+
+
+
+ Processing Pipeline
+
+
+ {debugInfo.pipeline_steps.map((step) => (
+
+
+
+ {step.name}
+
+
+
+
+ {renderStepDetails(step)}
+
+
+ ))}
+
+
+
+
+ {debugInfo.failed_document_info && (
+
+
+
+ Failed Document Information
+
+
+
+
+ Failure Details
+ Failure Reason: {debugInfo.failed_document_info.failure_reason}
+ Failure Stage: {debugInfo.failed_document_info.failure_stage}
+ Retry Count: {debugInfo.failed_document_info.retry_count || 0}
+ Created: {new Date(debugInfo.failed_document_info.created_at).toLocaleString()}
+ {debugInfo.failed_document_info.last_retry_at && (
+ Last Retry: {new Date(debugInfo.failed_document_info.last_retry_at).toLocaleString()}
+ )}
+
+
+
+
+ Failed OCR Results
+ {debugInfo.failed_document_info.failed_ocr_text ? (
+ <>
+ OCR Text Length: {debugInfo.failed_document_info.failed_ocr_text.length} chars
+ OCR Confidence: {debugInfo.failed_document_info.failed_ocr_confidence?.toFixed(1)}%
+ Word Count: {debugInfo.failed_document_info.failed_ocr_word_count || 0}
+ Processing Time: {debugInfo.failed_document_info.failed_ocr_processing_time_ms || 0}ms
+ >
+ ) : (
+ No OCR results available
+ )}
+
+
+ {debugInfo.failed_document_info.error_message && (
+
+
+ Error Message: {debugInfo.failed_document_info.error_message}
+
+
+ )}
+ {debugInfo.failed_document_info.content_preview && (
+
+
+ Content Preview
+
+ {debugInfo.failed_document_info.content_preview}
+
+
+
+ )}
+
+
+
+ )}
+
+ {debugInfo.detailed_processing_logs && debugInfo.detailed_processing_logs.length > 0 && (
+
+
+
+ Detailed Processing Logs
+
+
+ Complete history of all OCR processing attempts for this document.
+
+
+
+
+
+ Attempt
+ Status
+ Priority
+ Created
+ Started
+ Completed
+ Duration
+ Wait Time
+ Attempts
+ Worker
+ Error
+
+
+
+ {debugInfo.detailed_processing_logs.map((log: any, index: number) => (
+
+ {index + 1}
+
+
+
+ {log.priority}
+ {new Date(log.created_at).toLocaleString()}
+ {log.started_at ? new Date(log.started_at).toLocaleString() : '-'}
+ {log.completed_at ? new Date(log.completed_at).toLocaleString() : '-'}
+ {log.processing_duration_ms ? `${log.processing_duration_ms}ms` : '-'}
+ {log.queue_wait_time_ms ? `${log.queue_wait_time_ms}ms` : '-'}
+ {log.attempts || 0}
+ {log.worker_id || '-'}
+
+ {log.error_message ? (
+
+ {log.error_message}
+
+ ) : '-'}
+
+
+ ))}
+
+
+
+
+
+ )}
+
+ {debugInfo.file_analysis && (
+
+
+
+ File Analysis Summary
+
+
+
+
+ File Properties
+ File Type: {debugInfo.file_analysis.file_type}
+ Size: {(debugInfo.file_analysis.file_size_bytes / 1024 / 1024).toFixed(2)} MB
+ Readable:
+
+
+
+ {debugInfo.file_analysis.pdf_info && (
+
+ PDF Properties
+ Valid PDF:
+ Has Text Content:
+ Text Length: {debugInfo.file_analysis.pdf_info.estimated_text_length} chars
+ Page Count: {debugInfo.file_analysis.pdf_info.page_count || 'Unknown'}
+ Encrypted:
+
+ )}
+
+ {debugInfo.file_analysis.pdf_info?.text_extraction_error && (
+
+
+ PDF Text Extraction Issue: {debugInfo.file_analysis.pdf_info.text_extraction_error}
+
+
+ )}
+
+
+
+ )}
+
+ {debugInfo.pipeline_steps.some(step => step.step === 3 && step.details.has_processed_image) && (
+
+
+
+ Processed Images
+
+
+
+
+ Original Document
+
+
+
+
+
+ Processed Image (OCR Input)
+ {
+ (e.target as HTMLImageElement).style.display = 'none';
+ (e.target as HTMLImageElement).parentNode?.appendChild(
+ document.createTextNode('Processed image not available')
+ );
+ }}
+ sx={{
+ maxWidth: '100%',
+ maxHeight: '300px',
+ objectFit: 'contain',
+ border: '1px solid',
+ borderColor: 'divider',
+ borderRadius: 1
+ }}
+ />
+
+
+
+
+
+ )}
+
+
+
+
+ }>
+ User Settings
+
+
+
+
+
+ OCR Settings
+ Background OCR: {debugInfo.user_settings.enable_background_ocr ? 'Enabled' : 'Disabled'}
+ Min Confidence: {debugInfo.user_settings.ocr_min_confidence}%
+ Max File Size: {debugInfo.user_settings.max_file_size_mb} MB
+
+
+
+
+ Quality Thresholds
+ Brightness: {debugInfo.user_settings.quality_thresholds.brightness}
+ Contrast: {debugInfo.user_settings.quality_thresholds.contrast}
+ Noise: {debugInfo.user_settings.quality_thresholds.noise}
+ Sharpness: {debugInfo.user_settings.quality_thresholds.sharpness}
+
+
+
+
+
+
+
+
+ )}
+
+ );
+};
+
+export default DebugPage;
\ No newline at end of file
diff --git a/src/routes/documents.rs b/src/routes/documents.rs
index f8c5a43..cec3549 100644
--- a/src/routes/documents.rs
+++ b/src/routes/documents.rs
@@ -58,6 +58,7 @@ pub fn router() -> Router> {
.route("/{id}/ocr", get(get_document_ocr))
.route("/{id}/processed-image", get(get_processed_image))
.route("/{id}/retry-ocr", post(retry_ocr))
+ .route("/{id}/debug", get(get_document_debug_info))
.route("/duplicates", get(get_user_duplicates))
.route("/failed", get(get_failed_documents))
.route("/failed/{id}/view", get(view_failed_document))
@@ -645,6 +646,560 @@ async fn retry_ocr(
}
}
+#[utoipa::path(
+ get,
+ path = "/api/documents/{id}/debug",
+ tag = "documents",
+ security(
+ ("bearer_auth" = [])
+ ),
+ params(
+ ("id" = uuid::Uuid, Path, description = "Document ID")
+ ),
+ responses(
+ (status = 200, description = "Debug information for document processing pipeline", body = String),
+ (status = 404, description = "Document not found"),
+ (status = 401, description = "Unauthorized")
+ )
+)]
+async fn get_document_debug_info(
+ State(state): State>,
+ auth_user: AuthUser,
+ Path(document_id): Path,
+) -> Result, StatusCode> {
+ tracing::info!("Starting debug analysis for document {} by user {}", document_id, auth_user.user.id);
+
+ // Get the document
+ let document = match state
+ .db
+ .get_document_by_id(document_id, auth_user.user.id, auth_user.user.role)
+ .await
+ {
+ Ok(Some(doc)) => {
+ tracing::info!("Found document: {} ({})", doc.filename, doc.mime_type);
+ doc
+ }
+ Ok(None) => {
+ tracing::warn!("Document {} not found for user {}", document_id, auth_user.user.id);
+ return Err(StatusCode::NOT_FOUND);
+ }
+ Err(e) => {
+ tracing::error!("Database error fetching document {}: {}", document_id, e);
+ return Err(StatusCode::INTERNAL_SERVER_ERROR);
+ }
+ };
+
+ // Get user settings
+ tracing::info!("Fetching user settings for user {}", auth_user.user.id);
+ let settings = match state
+ .db
+ .get_user_settings(auth_user.user.id)
+ .await
+ {
+ Ok(Some(s)) => {
+ tracing::info!("Found user settings: OCR enabled={}, min_confidence={}", s.enable_background_ocr, s.ocr_min_confidence);
+ s
+ }
+ Ok(None) => {
+ tracing::info!("No user settings found, using defaults");
+ crate::models::Settings::default()
+ }
+ Err(e) => {
+ tracing::error!("Error fetching user settings: {}", e);
+ return Err(StatusCode::INTERNAL_SERVER_ERROR);
+ }
+ };
+
+ // Get OCR queue history for this document
+ tracing::info!("Fetching OCR queue history for document {}", document_id);
+ let queue_history = match sqlx::query(
+ r#"
+ SELECT id, status, priority, created_at, started_at, completed_at,
+ error_message, attempts, worker_id
+ FROM ocr_queue
+ WHERE document_id = $1
+ ORDER BY created_at DESC
+ LIMIT 10
+ "#
+ )
+ .bind(document_id)
+ .fetch_all(state.db.get_pool())
+ .await {
+ Ok(history) => {
+ tracing::info!("Queue history query successful, found {} entries", history.len());
+ history
+ },
+ Err(e) => {
+ tracing::error!("Queue history query error: {}", e);
+ return Err(StatusCode::INTERNAL_SERVER_ERROR);
+ }
+ };
+
+ // Get processed image info if it exists
+ tracing::info!("Fetching processed image for document {}", document_id);
+ let processed_image = match state
+ .db
+ .get_processed_image_by_document_id(document_id, auth_user.user.id)
+ .await {
+ Ok(Some(img)) => {
+ tracing::info!("Found processed image for document {}", document_id);
+ Some(img)
+ },
+ Ok(None) => {
+ tracing::info!("No processed image found for document {}", document_id);
+ None
+ },
+ Err(e) => {
+ tracing::warn!("Error fetching processed image for document {}: {}", document_id, e);
+ None
+ }
+ };
+
+ // Get failed document record if it exists
+ tracing::info!("Fetching failed document record for document {}", document_id);
+ let failed_document = match sqlx::query(
+ r#"
+ SELECT failure_reason, failure_stage, error_message, retry_count,
+ last_retry_at, created_at, content, ocr_text, ocr_confidence,
+ ocr_word_count, ocr_processing_time_ms
+ FROM failed_documents
+ WHERE id = $1 OR existing_document_id = $1
+ ORDER BY created_at DESC
+ LIMIT 1
+ "#
+ )
+ .bind(document_id)
+ .fetch_optional(state.db.get_pool())
+ .await {
+ Ok(result) => {
+ tracing::info!("Failed document query successful, found: {}", result.is_some());
+ result
+ },
+ Err(e) => {
+ tracing::error!("Failed document query error: {}", e);
+ return Err(StatusCode::INTERNAL_SERVER_ERROR);
+ }
+ };
+
+ // Get detailed OCR processing logs and attempts
+ tracing::info!("Fetching detailed OCR processing logs for document {}", document_id);
+ let ocr_processing_logs = match sqlx::query(
+ r#"
+ SELECT id, status, priority, created_at, started_at, completed_at,
+ error_message, attempts, worker_id, processing_time_ms, file_size
+ FROM ocr_queue
+ WHERE document_id = $1
+ ORDER BY created_at ASC
+ "#
+ )
+ .bind(document_id)
+ .fetch_all(state.db.get_pool())
+ .await {
+ Ok(logs) => {
+ tracing::info!("OCR processing logs query successful, found {} entries", logs.len());
+ logs
+ },
+ Err(e) => {
+ tracing::error!("OCR processing logs query error: {}", e);
+ return Err(StatusCode::INTERNAL_SERVER_ERROR);
+ }
+ };
+
+ // File service for file info
+ let file_service = FileService::new(state.config.upload_path.clone());
+
+ // Check if file exists
+ let file_exists = tokio::fs::metadata(&document.file_path).await.is_ok();
+ let file_metadata = if file_exists {
+ tokio::fs::metadata(&document.file_path).await.ok()
+ } else {
+ None
+ };
+
+ // Try to analyze file content for additional diagnostic info
+ tracing::info!("Analyzing file content for document {} (exists: {})", document_id, file_exists);
+ let file_analysis = if file_exists {
+ match analyze_file_content(&document.file_path, &document.mime_type).await {
+ Ok(analysis) => {
+ tracing::info!("File analysis successful for document {}", document_id);
+ analysis
+ },
+ Err(e) => {
+ tracing::warn!("Failed to analyze file content for {}: {}", document_id, e);
+ FileAnalysis {
+ error_details: Some(format!("File analysis failed: {}", e)),
+ ..Default::default()
+ }
+ }
+ }
+ } else {
+ tracing::warn!("File does not exist for document {}, skipping analysis", document_id);
+ FileAnalysis::default()
+ };
+
+ // Pipeline steps analysis
+ let mut pipeline_steps = Vec::new();
+
+ // Step 1: File Upload & Ingestion
+ pipeline_steps.push(serde_json::json!({
+ "step": 1,
+ "name": "File Upload & Ingestion",
+ "status": "completed", // Document exists if we got this far
+ "details": {
+ "filename": document.filename,
+ "original_filename": document.original_filename,
+ "file_size": document.file_size,
+ "mime_type": document.mime_type,
+ "file_exists": file_exists,
+ "file_path": document.file_path,
+ "created_at": document.created_at,
+ "file_metadata": file_metadata.as_ref().map(|m| serde_json::json!({
+ "size": m.len(),
+ "modified": m.modified().ok(),
+ "is_file": m.is_file(),
+ "is_dir": m.is_dir()
+ })),
+ "file_analysis": file_analysis
+ },
+ "success": true,
+ "error": None::
+ }));
+
+ // Step 2: OCR Queue Enrollment
+ let queue_enrollment_status = if queue_history.is_empty() {
+ if settings.enable_background_ocr {
+ "not_queued"
+ } else {
+ "ocr_disabled"
+ }
+ } else {
+ "queued"
+ };
+
+ pipeline_steps.push(serde_json::json!({
+ "step": 2,
+ "name": "OCR Queue Enrollment",
+ "status": queue_enrollment_status,
+ "details": {
+ "user_ocr_enabled": settings.enable_background_ocr,
+ "queue_entries_count": queue_history.len(),
+ "queue_history": queue_history.iter().map(|row| serde_json::json!({
+ "id": row.get::("id"),
+ "status": row.get::("status"),
+ "priority": row.get::("priority"),
+ "created_at": row.get::, _>("created_at"),
+ "started_at": row.get::