Merge pull request #89 from readur/feat/setup-debug-functionality

feat(debug): setup debug functionality
This commit is contained in:
Jon Fuller 2025-07-02 15:06:16 -07:00 committed by GitHub
commit 9034bf5d70
26 changed files with 624 additions and 76 deletions

View File

@ -19,6 +19,7 @@ import {
Dialog,
DialogTitle,
DialogContent,
DialogContentText,
DialogActions,
Pagination,
CircularProgress,
@ -233,6 +234,7 @@ const DocumentManagementPage: React.FC = () => {
const [retryHistoryModalOpen, setRetryHistoryModalOpen] = useState(false);
const [selectedDocumentForHistory, setSelectedDocumentForHistory] = useState<string | null>(null);
const [selectedDocumentIds, setSelectedDocumentIds] = useState<string[]>([]);
const [confirmRetryAllOpen, setConfirmRetryAllOpen] = useState(false);
const fetchFailedDocuments = async () => {
try {
@ -358,6 +360,42 @@ const DocumentManagementPage: React.FC = () => {
}
};
const handleRetryAllDocuments = async () => {
try {
setRetryingAll(true);
const response = await documentService.bulkRetryOcr({
mode: 'all',
preview_only: false
});
if (response.data.queued_count > 0) {
setSnackbar({
open: true,
message: `Successfully queued ${response.data.queued_count} documents for OCR retry. Estimated processing time: ${Math.ceil(response.data.estimated_total_time_minutes)} minutes.`,
severity: 'success'
});
// Refresh all tabs since we're retrying all documents
await refreshCurrentTab();
} else {
setSnackbar({
open: true,
message: 'No documents found to retry',
severity: 'info'
});
}
} catch (error) {
console.error('Error retrying all documents:', error);
setSnackbar({
open: true,
message: 'Failed to retry documents. Please try again.',
severity: 'error'
});
} finally {
setRetryingAll(false);
}
};
const handleRetryAllFailed = async () => {
try {
setRetryingAll(true);
@ -735,14 +773,33 @@ const DocumentManagementPage: React.FC = () => {
<Typography variant="h4" component="h1">
Document Management
</Typography>
<Button
variant="outlined"
startIcon={<RefreshIcon />}
onClick={refreshCurrentTab}
disabled={loading || duplicatesLoading || retryingAll}
>
Refresh
</Button>
<Box display="flex" gap={2}>
<Button
variant="contained"
color="primary"
size="large"
startIcon={retryingAll ? <CircularProgress size={20} color="inherit" /> : <RefreshIcon />}
onClick={() => setConfirmRetryAllOpen(true)}
disabled={retryingAll}
sx={{
minWidth: 200,
boxShadow: 3,
'&:hover': {
boxShadow: 6,
}
}}
>
{retryingAll ? 'Retrying All...' : 'Retry All Documents'}
</Button>
<Button
variant="outlined"
startIcon={<RefreshIcon />}
onClick={refreshCurrentTab}
disabled={loading || duplicatesLoading || retryingAll}
>
Refresh
</Button>
</Box>
</Box>
<Paper sx={{ mb: 3, borderRadius: 2, overflow: 'hidden' }}>
@ -825,7 +882,7 @@ const DocumentManagementPage: React.FC = () => {
size="small"
fullWidth
>
{retryingAll ? 'Retrying All...' : 'Retry All Failed OCR'}
{retryingAll ? 'Retrying...' : 'Retry Failed Only'}
</Button>
</Box>
</CardContent>
@ -2219,6 +2276,43 @@ const DocumentManagementPage: React.FC = () => {
</DialogActions>
</Dialog>
{/* Confirm Retry All Documents Dialog */}
<Dialog open={confirmRetryAllOpen} onClose={() => setConfirmRetryAllOpen(false)}>
<DialogTitle>
<Box display="flex" alignItems="center">
<RefreshIcon sx={{ mr: 1, color: 'primary.main' }} />
Retry All Documents
</Box>
</DialogTitle>
<DialogContent>
<DialogContentText>
This will retry OCR processing for <strong>all documents</strong> in your library, regardless of their current OCR status.
This includes documents that have already been successfully processed.
</DialogContentText>
<Box sx={{ mt: 2, p: 2, bgcolor: 'warning.light', borderRadius: 1 }}>
<Typography variant="body2" color="warning.dark">
<strong>Note:</strong> This is a resource-intensive operation that may take a significant amount of time depending on the number of documents.
</Typography>
</Box>
</DialogContent>
<DialogActions>
<Button onClick={() => setConfirmRetryAllOpen(false)}>
Cancel
</Button>
<Button
onClick={() => {
setConfirmRetryAllOpen(false);
handleRetryAllDocuments();
}}
variant="contained"
color="primary"
startIcon={<RefreshIcon />}
>
Retry All Documents
</Button>
</DialogActions>
</Dialog>
{/* Advanced Retry Modal */}
<BulkRetryModal
open={bulkRetryModalOpen}

View File

@ -0,0 +1,9 @@
-- Add OCR retry tracking fields to documents table
-- These fields were added to the Document struct but missing from the database schema
ALTER TABLE documents ADD COLUMN IF NOT EXISTS ocr_retry_count INTEGER DEFAULT 0;
ALTER TABLE documents ADD COLUMN IF NOT EXISTS ocr_failure_reason TEXT DEFAULT NULL;
-- Add helpful comments
COMMENT ON COLUMN documents.ocr_retry_count IS 'Number of times OCR processing has been retried for this document';
COMMENT ON COLUMN documents.ocr_failure_reason IS 'Reason for the most recent OCR failure, if any';

View File

@ -10,9 +10,9 @@ impl Database {
pub async fn create_document(&self, document: Document) -> Result<Document> {
let row = sqlx::query(
r#"
INSERT INTO documents (id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22)
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
INSERT INTO documents (id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24)
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
"#
)
.bind(document.id)
@ -29,6 +29,8 @@ impl Database {
.bind(&document.ocr_status)
.bind(&document.ocr_error)
.bind(document.ocr_completed_at)
.bind(document.ocr_retry_count)
.bind(&document.ocr_failure_reason)
.bind(&document.tags)
.bind(document.created_at)
.bind(document.updated_at)
@ -55,6 +57,8 @@ impl Database {
ocr_status: row.get("ocr_status"),
ocr_error: row.get("ocr_error"),
ocr_completed_at: row.get("ocr_completed_at"),
ocr_retry_count: row.get("ocr_retry_count"),
ocr_failure_reason: row.get("ocr_failure_reason"),
tags: row.get("tags"),
created_at: row.get("created_at"),
updated_at: row.get("updated_at"),
@ -70,7 +74,7 @@ impl Database {
let query = if user_role == crate::models::UserRole::Admin {
// Admins can see all documents
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata, original_created_at, original_modified_at, source_metadata
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents
ORDER BY created_at DESC
LIMIT $1 OFFSET $2
@ -78,7 +82,7 @@ impl Database {
} else {
// Regular users can only see their own documents
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata, original_created_at, original_modified_at, source_metadata
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents
WHERE user_id = $3
ORDER BY created_at DESC
@ -118,6 +122,8 @@ impl Database {
ocr_status: row.get("ocr_status"),
ocr_error: row.get("ocr_error"),
ocr_completed_at: row.get("ocr_completed_at"),
ocr_retry_count: row.get("ocr_retry_count"),
ocr_failure_reason: row.get("ocr_failure_reason"),
tags: row.get("tags"),
created_at: row.get("created_at"),
updated_at: row.get("updated_at"),
@ -138,7 +144,7 @@ impl Database {
// Admin with OCR filter
sqlx::query(
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents
WHERE ocr_status = $3
ORDER BY created_at DESC
@ -155,7 +161,7 @@ impl Database {
// Admin without OCR filter
sqlx::query(
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents
ORDER BY created_at DESC
LIMIT $1 OFFSET $2
@ -170,7 +176,7 @@ impl Database {
// Regular user with OCR filter
sqlx::query(
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents
WHERE user_id = $3 AND ocr_status = $4
ORDER BY created_at DESC
@ -188,7 +194,7 @@ impl Database {
// Regular user without OCR filter
sqlx::query(
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents
WHERE user_id = $3
ORDER BY created_at DESC
@ -220,6 +226,8 @@ impl Database {
ocr_status: row.get("ocr_status"),
ocr_error: row.get("ocr_error"),
ocr_completed_at: row.get("ocr_completed_at"),
ocr_retry_count: row.get("ocr_retry_count"),
ocr_failure_reason: row.get("ocr_failure_reason"),
tags: row.get("tags"),
created_at: row.get("created_at"),
updated_at: row.get("updated_at"),
@ -280,7 +288,7 @@ impl Database {
pub async fn get_documents_by_user(&self, user_id: Uuid, limit: i64, offset: i64) -> Result<Vec<Document>> {
let rows = sqlx::query(
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents
WHERE user_id = $1
ORDER BY created_at DESC
@ -310,6 +318,8 @@ impl Database {
ocr_status: row.get("ocr_status"),
ocr_error: row.get("ocr_error"),
ocr_completed_at: row.get("ocr_completed_at"),
ocr_retry_count: row.get("ocr_retry_count"),
ocr_failure_reason: row.get("ocr_failure_reason"),
tags: row.get("tags"),
created_at: row.get("created_at"),
updated_at: row.get("updated_at"),
@ -327,7 +337,7 @@ impl Database {
pub async fn find_documents_by_filename(&self, filename: &str) -> Result<Vec<Document>> {
let rows = sqlx::query(
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents
WHERE filename = $1 OR original_filename = $1
ORDER BY created_at DESC
@ -354,6 +364,8 @@ impl Database {
ocr_status: row.get("ocr_status"),
ocr_error: row.get("ocr_error"),
ocr_completed_at: row.get("ocr_completed_at"),
ocr_retry_count: row.get("ocr_retry_count"),
ocr_failure_reason: row.get("ocr_failure_reason"),
tags: row.get("tags"),
created_at: row.get("created_at"),
updated_at: row.get("updated_at"),
@ -371,7 +383,7 @@ impl Database {
pub async fn search_documents(&self, user_id: Uuid, search: SearchRequest) -> Result<(Vec<Document>, i64)> {
let mut query_builder = QueryBuilder::new(
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
ts_rank(to_tsvector('english', COALESCE(content, '') || ' ' || COALESCE(ocr_text, '')), plainto_tsquery('english', "#
);
@ -428,6 +440,8 @@ impl Database {
ocr_status: row.get("ocr_status"),
ocr_error: row.get("ocr_error"),
ocr_completed_at: row.get("ocr_completed_at"),
ocr_retry_count: row.get("ocr_retry_count"),
ocr_failure_reason: row.get("ocr_failure_reason"),
tags: row.get("tags"),
created_at: row.get("created_at"),
updated_at: row.get("updated_at"),
@ -477,7 +491,7 @@ impl Database {
// Use trigram similarity for substring matching
let mut builder = QueryBuilder::new(
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
GREATEST(
similarity(filename, "#
);
@ -520,7 +534,7 @@ impl Database {
let mut builder = QueryBuilder::new(&format!(
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
GREATEST(
CASE WHEN filename ILIKE '%' || "#
));
@ -666,7 +680,7 @@ impl Database {
// Use trigram similarity for substring matching
let mut builder = QueryBuilder::new(
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
GREATEST(
similarity(filename, "#
);
@ -705,7 +719,7 @@ impl Database {
let mut builder = QueryBuilder::new(&format!(
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
GREATEST(
CASE WHEN filename ILIKE '%' || "#
));
@ -982,7 +996,7 @@ impl Database {
pub async fn get_recent_documents_for_source(&self, source_id: Uuid, limit: i64) -> Result<Vec<Document>> {
let rows = sqlx::query(
r#"SELECT * FROM documents
r#"SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata FROM documents
WHERE source_id = $1
ORDER BY created_at DESC
LIMIT $2"#
@ -1009,6 +1023,8 @@ impl Database {
ocr_status: row.get("ocr_status"),
ocr_error: row.get("ocr_error"),
ocr_completed_at: row.get("ocr_completed_at"),
ocr_retry_count: row.get("ocr_retry_count"),
ocr_failure_reason: row.get("ocr_failure_reason"),
tags: row.get("tags"),
created_at: row.get("created_at"),
updated_at: row.get("updated_at"),
@ -1105,14 +1121,14 @@ impl Database {
let query = if user_role == crate::models::UserRole::Admin {
// Admins can see any document
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents
WHERE id = $1
"#
} else {
// Regular users can only see their own documents
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents
WHERE id = $1 AND user_id = $2
"#
@ -1147,6 +1163,8 @@ impl Database {
ocr_status: row.get("ocr_status"),
ocr_error: row.get("ocr_error"),
ocr_completed_at: row.get("ocr_completed_at"),
ocr_retry_count: row.get("ocr_retry_count"),
ocr_failure_reason: row.get("ocr_failure_reason"),
tags: row.get("tags"),
created_at: row.get("created_at"),
updated_at: row.get("updated_at"),
@ -1164,7 +1182,7 @@ impl Database {
pub async fn get_document_by_user_and_hash(&self, user_id: Uuid, file_hash: &str) -> Result<Option<Document>> {
let row = sqlx::query(
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents
WHERE user_id = $1 AND file_hash = $2
LIMIT 1
@ -1191,6 +1209,8 @@ impl Database {
ocr_status: row.get("ocr_status"),
ocr_error: row.get("ocr_error"),
ocr_completed_at: row.get("ocr_completed_at"),
ocr_retry_count: row.get("ocr_retry_count"),
ocr_failure_reason: row.get("ocr_failure_reason"),
tags: row.get("tags"),
created_at: row.get("created_at"),
updated_at: row.get("updated_at"),
@ -1396,7 +1416,7 @@ impl Database {
r#"
DELETE FROM documents
WHERE id = $1
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
"#,
)
.bind(document_id)
@ -1418,6 +1438,8 @@ impl Database {
ocr_status: r.get("ocr_status"),
ocr_error: r.get("ocr_error"),
ocr_completed_at: r.get("ocr_completed_at"),
ocr_retry_count: r.get("ocr_retry_count"),
ocr_failure_reason: r.get("ocr_failure_reason"),
tags: r.get("tags"),
created_at: r.get("created_at"),
updated_at: r.get("updated_at"),
@ -1432,7 +1454,7 @@ impl Database {
r#"
DELETE FROM documents
WHERE id = $1 AND user_id = $2
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
"#,
)
.bind(document_id)
@ -1455,6 +1477,8 @@ impl Database {
ocr_status: r.get("ocr_status"),
ocr_error: r.get("ocr_error"),
ocr_completed_at: r.get("ocr_completed_at"),
ocr_retry_count: r.get("ocr_retry_count"),
ocr_failure_reason: r.get("ocr_failure_reason"),
tags: r.get("tags"),
created_at: r.get("created_at"),
updated_at: r.get("updated_at"),
@ -1479,7 +1503,7 @@ impl Database {
r#"
DELETE FROM documents
WHERE id = ANY($1)
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
"#,
)
.bind(document_ids)
@ -1501,6 +1525,8 @@ impl Database {
ocr_status: r.get("ocr_status"),
ocr_error: r.get("ocr_error"),
ocr_completed_at: r.get("ocr_completed_at"),
ocr_retry_count: r.get("ocr_retry_count"),
ocr_failure_reason: r.get("ocr_failure_reason"),
tags: r.get("tags"),
created_at: r.get("created_at"),
updated_at: r.get("updated_at"),
@ -1515,7 +1541,7 @@ impl Database {
r#"
DELETE FROM documents
WHERE id = ANY($1) AND user_id = $2
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
"#,
)
.bind(document_ids)
@ -1538,6 +1564,8 @@ impl Database {
ocr_status: r.get("ocr_status"),
ocr_error: r.get("ocr_error"),
ocr_completed_at: r.get("ocr_completed_at"),
ocr_retry_count: r.get("ocr_retry_count"),
ocr_failure_reason: r.get("ocr_failure_reason"),
tags: r.get("tags"),
created_at: r.get("created_at"),
updated_at: r.get("updated_at"),
@ -1557,7 +1585,7 @@ impl Database {
let documents = if user_role == crate::models::UserRole::Admin {
let rows = sqlx::query(
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents
WHERE ocr_confidence IS NOT NULL AND ocr_confidence < $1
ORDER BY ocr_confidence ASC, created_at DESC
@ -1582,6 +1610,8 @@ impl Database {
ocr_status: r.get("ocr_status"),
ocr_error: r.get("ocr_error"),
ocr_completed_at: r.get("ocr_completed_at"),
ocr_retry_count: r.get("ocr_retry_count"),
ocr_failure_reason: r.get("ocr_failure_reason"),
tags: r.get("tags"),
created_at: r.get("created_at"),
updated_at: r.get("updated_at"),
@ -1594,7 +1624,7 @@ impl Database {
} else {
let rows = sqlx::query(
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents
WHERE ocr_confidence IS NOT NULL AND ocr_confidence < $1 AND user_id = $2
ORDER BY ocr_confidence ASC, created_at DESC
@ -1620,6 +1650,8 @@ impl Database {
ocr_status: r.get("ocr_status"),
ocr_error: r.get("ocr_error"),
ocr_completed_at: r.get("ocr_completed_at"),
ocr_retry_count: r.get("ocr_retry_count"),
ocr_failure_reason: r.get("ocr_failure_reason"),
tags: r.get("tags"),
created_at: r.get("created_at"),
updated_at: r.get("updated_at"),
@ -1639,7 +1671,7 @@ impl Database {
let documents = if user_role == crate::models::UserRole::Admin {
let rows = sqlx::query(
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents
WHERE ocr_status = 'failed' OR (ocr_confidence IS NULL AND ocr_status != 'pending' AND ocr_status != 'processing')
ORDER BY created_at DESC
@ -1663,6 +1695,8 @@ impl Database {
ocr_status: r.get("ocr_status"),
ocr_error: r.get("ocr_error"),
ocr_completed_at: r.get("ocr_completed_at"),
ocr_retry_count: r.get("ocr_retry_count"),
ocr_failure_reason: r.get("ocr_failure_reason"),
tags: r.get("tags"),
created_at: r.get("created_at"),
updated_at: r.get("updated_at"),
@ -1675,7 +1709,7 @@ impl Database {
} else {
let rows = sqlx::query(
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents
WHERE (ocr_status = 'failed' OR (ocr_confidence IS NULL AND ocr_status != 'pending' AND ocr_status != 'processing')) AND user_id = $1
ORDER BY created_at DESC
@ -1700,6 +1734,8 @@ impl Database {
ocr_status: r.get("ocr_status"),
ocr_error: r.get("ocr_error"),
ocr_completed_at: r.get("ocr_completed_at"),
ocr_retry_count: r.get("ocr_retry_count"),
ocr_failure_reason: r.get("ocr_failure_reason"),
tags: r.get("tags"),
created_at: r.get("created_at"),
updated_at: r.get("updated_at"),
@ -1719,7 +1755,7 @@ impl Database {
let documents = if user_role == crate::models::UserRole::Admin {
let rows = sqlx::query(
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents
WHERE (ocr_confidence IS NOT NULL AND ocr_confidence < $1)
OR ocr_status = 'failed'
@ -1747,6 +1783,8 @@ impl Database {
ocr_status: r.get("ocr_status"),
ocr_error: r.get("ocr_error"),
ocr_completed_at: r.get("ocr_completed_at"),
ocr_retry_count: r.get("ocr_retry_count"),
ocr_failure_reason: r.get("ocr_failure_reason"),
tags: r.get("tags"),
created_at: r.get("created_at"),
updated_at: r.get("updated_at"),
@ -1759,7 +1797,7 @@ impl Database {
} else {
let rows = sqlx::query(
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents
WHERE ((ocr_confidence IS NOT NULL AND ocr_confidence < $1)
OR ocr_status = 'failed')
@ -1789,6 +1827,8 @@ impl Database {
ocr_status: r.get("ocr_status"),
ocr_error: r.get("ocr_error"),
ocr_completed_at: r.get("ocr_completed_at"),
ocr_retry_count: r.get("ocr_retry_count"),
ocr_failure_reason: r.get("ocr_failure_reason"),
tags: r.get("tags"),
created_at: r.get("created_at"),
updated_at: r.get("updated_at"),

View File

@ -27,7 +27,21 @@ pub async fn record_ocr_retry(
priority: i32,
queue_id: Option<Uuid>,
) -> Result<Uuid> {
crate::debug_log!("OCR_RETRY_HISTORY",
"document_id" => document_id,
"user_id" => user_id,
"retry_reason" => retry_reason,
"priority" => priority,
"queue_id" => queue_id.unwrap_or_default(),
"message" => "Recording OCR retry attempt"
);
// First get the current OCR status
crate::debug_log!("OCR_RETRY_HISTORY",
"document_id" => document_id,
"message" => "Fetching current OCR status"
);
let current_status = sqlx::query(
r#"
SELECT ocr_status, ocr_failure_reason, ocr_error
@ -37,19 +51,38 @@ pub async fn record_ocr_retry(
)
.bind(document_id)
.fetch_optional(pool)
.await?;
.await
.map_err(|e| {
crate::debug_error!("OCR_RETRY_HISTORY", format!("Failed to fetch current status for document {}: {}", document_id, e));
e
})?;
let (previous_status, previous_failure_reason, previous_error) = if let Some(row) = current_status {
(
row.get::<Option<String>, _>("ocr_status"),
row.get::<Option<String>, _>("ocr_failure_reason"),
row.get::<Option<String>, _>("ocr_error"),
)
let status = row.get::<Option<String>, _>("ocr_status");
let failure = row.get::<Option<String>, _>("ocr_failure_reason");
let error = row.get::<Option<String>, _>("ocr_error");
crate::debug_log!("OCR_RETRY_HISTORY",
"document_id" => document_id,
"status" => status.as_deref().unwrap_or("none"),
"failure_reason" => failure.as_deref().unwrap_or("none"),
"has_error" => error.is_some(),
"message" => "Found current document status"
);
(status, failure, error)
} else {
crate::debug_warn!("OCR_RETRY_HISTORY", "Document not found when recording retry history");
(None, None, None)
};
// Insert retry history record
crate::debug_log!("OCR_RETRY_HISTORY",
"document_id" => document_id,
"previous_status" => previous_status.as_deref().unwrap_or("none"),
"message" => "Inserting retry history record"
);
let retry_id: Uuid = sqlx::query_scalar(
r#"
INSERT INTO ocr_retry_history (
@ -63,15 +96,25 @@ pub async fn record_ocr_retry(
.bind(document_id)
.bind(user_id)
.bind(retry_reason)
.bind(previous_status)
.bind(previous_failure_reason)
.bind(previous_error)
.bind(&previous_status)
.bind(&previous_failure_reason)
.bind(&previous_error)
.bind(priority)
.bind(queue_id)
.fetch_one(pool)
.await?;
.await
.map_err(|e| {
crate::debug_error!("OCR_RETRY_HISTORY", format!("Failed to insert retry history for document {}: {}", document_id, e));
e
})?;
// Increment retry count
crate::debug_log!("OCR_RETRY_HISTORY",
"document_id" => document_id,
"retry_id" => retry_id,
"message" => "Incrementing retry count"
);
sqlx::query(
r#"
UPDATE documents
@ -82,7 +125,18 @@ pub async fn record_ocr_retry(
)
.bind(document_id)
.execute(pool)
.await?;
.await
.map_err(|e| {
crate::debug_error!("OCR_RETRY_HISTORY", format!("Failed to increment retry count for document {}: {}", document_id, e));
e
})?;
crate::debug_log!("OCR_RETRY_HISTORY",
"document_id" => document_id,
"retry_id" => retry_id,
"user_id" => user_id,
"message" => "Successfully recorded retry history"
);
Ok(retry_id)
}

View File

@ -12,6 +12,7 @@ pub mod scheduling;
pub mod seed;
pub mod services;
pub mod swagger;
pub mod utils;
pub mod webdav_xml_parser;
#[cfg(test)]

View File

@ -129,6 +129,8 @@ pub struct Document {
pub ocr_status: Option<String>,
pub ocr_error: Option<String>,
pub ocr_completed_at: Option<DateTime<Utc>>,
pub ocr_retry_count: Option<i32>,
pub ocr_failure_reason: Option<String>,
pub tags: Vec<String>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,

View File

@ -75,6 +75,13 @@ impl OcrQueueService {
/// Add a document to the OCR queue
pub async fn enqueue_document(&self, document_id: Uuid, priority: i32, file_size: i64) -> Result<Uuid> {
crate::debug_log!("OCR_QUEUE",
"document_id" => document_id,
"priority" => priority,
"file_size" => file_size,
"message" => "Enqueueing document"
);
let row = sqlx::query(
r#"
INSERT INTO ocr_queue (document_id, priority, file_size)
@ -86,10 +93,22 @@ impl OcrQueueService {
.bind(priority)
.bind(file_size)
.fetch_one(&self.pool)
.await?;
.await
.map_err(|e| {
crate::debug_error!("OCR_QUEUE", format!("Failed to insert document {} into queue: {}", document_id, e));
e
})?;
let id: Uuid = row.get("id");
crate::debug_log!("OCR_QUEUE",
"document_id" => document_id,
"queue_id" => id,
"priority" => priority,
"file_size" => file_size,
"message" => "Successfully enqueued document"
);
info!("Enqueued document {} with priority {} for OCR processing", document_id, priority);
Ok(id)
}

View File

@ -571,28 +571,55 @@ async fn retry_ocr(
auth_user: AuthUser,
Path(document_id): Path<uuid::Uuid>,
) -> Result<Json<serde_json::Value>, StatusCode> {
crate::debug_log!("OCR_RETRY",
"document_id" => document_id,
"user_id" => auth_user.user.id,
"message" => "Starting OCR retry request"
);
// Check if document exists and belongs to user
let document = state
.db
.get_document_by_id(document_id, auth_user.user.id, auth_user.user.role)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
.ok_or(StatusCode::NOT_FOUND)?;
.map_err(|e| {
crate::debug_error!("OCR_RETRY", format!("Failed to get document {}: {}", document_id, e));
StatusCode::INTERNAL_SERVER_ERROR
})?
.ok_or_else(|| {
crate::debug_log!("OCR_RETRY", &format!("Document {} not found or access denied for user {}", document_id, auth_user.user.id));
StatusCode::NOT_FOUND
})?;
// Check if document is eligible for OCR retry (failed or not processed)
let eligible = document.ocr_status.as_ref().map_or(true, |status| {
status == "failed" || status == "pending"
});
// Check if document is eligible for OCR retry (all documents are now retryable)
let current_status = document.ocr_status.as_deref().unwrap_or("unknown");
let eligible = true; // All documents are retryable
crate::debug_log!("OCR_RETRY",
"document_id" => document_id,
"filename" => &document.filename,
"current_status" => current_status,
"eligible" => eligible,
"file_size" => document.file_size,
"retry_count" => document.ocr_retry_count.unwrap_or(0),
"message" => "Checking document eligibility"
);
if !eligible {
crate::debug_log!("OCR_RETRY", &format!("Document {} is not eligible for retry - current status: {}", document_id, current_status));
return Ok(Json(serde_json::json!({
"success": false,
"message": "Document is not eligible for OCR retry. Current status: {}",
"message": format!("Document is not eligible for OCR retry. Current status: {}", current_status),
"current_status": document.ocr_status
})));
}
// Reset document OCR fields
crate::debug_log!("OCR_RETRY",
"document_id" => document_id,
"message" => "Resetting document OCR fields"
);
let reset_result = sqlx::query(
r#"
UPDATE documents
@ -611,12 +638,22 @@ async fn retry_ocr(
.bind(document_id)
.execute(state.db.get_pool())
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
.map_err(|e| {
crate::debug_error!("OCR_RETRY", format!("Failed to reset OCR fields for document {}: {}", document_id, e));
StatusCode::INTERNAL_SERVER_ERROR
})?;
if reset_result.rows_affected() == 0 {
crate::debug_error!("OCR_RETRY", format!("No rows affected when resetting OCR fields for document {}", document_id));
return Err(StatusCode::NOT_FOUND);
}
crate::debug_log!("OCR_RETRY",
"document_id" => document_id,
"rows_affected" => reset_result.rows_affected(),
"message" => "Successfully reset OCR fields"
);
// Calculate priority based on file size (higher priority for retries)
let priority = match document.file_size {
0..=1048576 => 15, // <= 1MB: highest priority (boosted for retry)
@ -626,10 +663,38 @@ async fn retry_ocr(
_ => 6, // > 50MB: lowest priority
};
crate::debug_log!("OCR_RETRY",
"document_id" => document_id,
"file_size" => document.file_size,
"priority" => priority,
"message" => "Calculated retry priority"
);
// Add to OCR queue with detailed logging
crate::debug_log!("OCR_RETRY",
"document_id" => document_id,
"priority" => priority,
"file_size" => document.file_size,
"message" => "Enqueueing document for OCR processing"
);
match state.queue_service.enqueue_document(document_id, priority, document.file_size).await {
Ok(queue_id) => {
crate::debug_log!("OCR_RETRY",
"document_id" => document_id,
"queue_id" => queue_id,
"priority" => priority,
"message" => "Successfully enqueued document"
);
// Record retry history
crate::debug_log!("OCR_RETRY",
"document_id" => document_id,
"user_id" => auth_user.user.id,
"queue_id" => queue_id,
"message" => "Recording retry history"
);
if let Err(e) = crate::db::ocr_retry::record_ocr_retry(
state.db.get_pool(),
document_id,
@ -638,9 +703,25 @@ async fn retry_ocr(
priority,
Some(queue_id),
).await {
crate::debug_error!("OCR_RETRY", format!("Failed to record retry history for document {}: {}", document_id, e));
tracing::warn!("Failed to record retry history for document {}: {}", document_id, e);
} else {
crate::debug_log!("OCR_RETRY",
"document_id" => document_id,
"queue_id" => queue_id,
"message" => "Successfully recorded retry history"
);
}
crate::debug_log!("OCR_RETRY",
"document_id" => document_id,
"filename" => &document.filename,
"queue_id" => queue_id,
"priority" => priority,
"file_size" => document.file_size,
"message" => "OCR retry process completed successfully"
);
tracing::info!(
"OCR retry queued for document {} ({}): queue_id={}, priority={}, size={}",
document_id, document.filename, queue_id, priority, document.file_size
@ -656,6 +737,7 @@ async fn retry_ocr(
})))
}
Err(e) => {
crate::debug_error!("OCR_RETRY", format!("Failed to enqueue document {}: {}", document_id, e));
tracing::error!("Failed to queue OCR retry for document {}: {}", document_id, e);
Err(StatusCode::INTERNAL_SERVER_ERROR)
}

View File

@ -101,62 +101,120 @@ pub async fn bulk_retry_ocr(
auth_user: AuthUser,
Json(request): Json<BulkOcrRetryRequest>,
) -> Result<Json<BulkOcrRetryResponse>, StatusCode> {
crate::debug_log!("BULK_OCR_RETRY",
"user_id" => auth_user.user.id,
"mode" => format!("{:?}", request.mode),
"preview_only" => request.preview_only.unwrap_or(false),
"priority_override" => request.priority_override.unwrap_or(-1),
"message" => "Starting bulk OCR retry request"
);
info!("Bulk OCR retry requested by user {} with mode: {:?}", auth_user.user.id, request.mode);
let preview_only = request.preview_only.unwrap_or(false);
// Build query based on selection mode
crate::debug_log!("BULK_OCR_RETRY", "Building document query based on selection mode");
let documents = match request.mode {
SelectionMode::All => {
crate::debug_log!("BULK_OCR_RETRY", "Fetching all documents for retry");
get_all_failed_ocr_documents(&state, &auth_user).await?
}
SelectionMode::Specific => {
if let Some(ids) = request.document_ids {
get_specific_documents(&state, &auth_user, ids).await?
if let Some(ids) = &request.document_ids {
crate::debug_log!("BULK_OCR_RETRY",
"document_count" => ids.len(),
"message" => "Fetching specific documents"
);
get_specific_documents(&state, &auth_user, ids.clone()).await?
} else {
crate::debug_error!("BULK_OCR_RETRY", "Specific mode requested but no document IDs provided");
return Err(StatusCode::BAD_REQUEST);
}
}
SelectionMode::Filter => {
if let Some(filter) = request.filter {
get_filtered_documents(&state, &auth_user, filter).await?
if let Some(filter) = &request.filter {
crate::debug_log!("BULK_OCR_RETRY",
"filter_mime_types" => filter.mime_types.as_ref().map(|v| v.len()).unwrap_or(0),
"filter_failure_reasons" => filter.failure_reasons.as_ref().map(|v| v.len()).unwrap_or(0),
"message" => "Fetching filtered documents"
);
get_filtered_documents(&state, &auth_user, filter.clone()).await?
} else {
crate::debug_error!("BULK_OCR_RETRY", "Filter mode requested but no filter provided");
return Err(StatusCode::BAD_REQUEST);
}
}
};
let matched_count = documents.len();
crate::debug_log!("BULK_OCR_RETRY",
"matched_count" => matched_count,
"message" => "Document query completed"
);
let mut retry_documents = Vec::new();
let mut queued_count = 0;
let mut total_estimated_time = 0.0;
for doc in documents {
for (index, doc) in documents.iter().enumerate() {
let priority = calculate_priority(doc.file_size, request.priority_override);
crate::debug_log!("BULK_OCR_RETRY",
"index" => index + 1,
"total" => matched_count,
"document_id" => doc.id,
"filename" => &doc.filename,
"file_size" => doc.file_size,
"priority" => priority,
"failure_reason" => doc.ocr_failure_reason.as_deref().unwrap_or("none"),
"message" => "Processing document"
);
let mut doc_info = OcrRetryDocumentInfo {
id: doc.id,
filename: doc.filename.clone(),
file_size: doc.file_size,
mime_type: doc.mime_type,
ocr_failure_reason: doc.ocr_failure_reason,
mime_type: doc.mime_type.clone(),
ocr_failure_reason: doc.ocr_failure_reason.clone(),
priority,
queue_id: None,
};
if !preview_only {
// Reset OCR fields
crate::debug_log!("BULK_OCR_RETRY",
"document_id" => doc.id,
"message" => "Resetting OCR status for document"
);
if let Err(e) = reset_document_ocr_status(&state, doc.id).await {
crate::debug_error!("BULK_OCR_RETRY", format!("Failed to reset OCR status for document {}: {}", doc.id, e));
warn!("Failed to reset OCR status for document {}: {}", doc.id, e);
continue;
}
// Queue for OCR
crate::debug_log!("BULK_OCR_RETRY",
"document_id" => doc.id,
"priority" => priority,
"file_size" => doc.file_size,
"message" => "Enqueueing document for OCR"
);
match state.queue_service.enqueue_document(doc.id, priority, doc.file_size).await {
Ok(queue_id) => {
doc_info.queue_id = Some(queue_id);
queued_count += 1;
crate::debug_log!("BULK_OCR_RETRY",
"document_id" => doc.id,
"queue_id" => queue_id,
"priority" => priority,
"queued_count" => queued_count,
"message" => "Successfully enqueued document"
);
// Record retry history
let retry_reason = match &request.mode {
SelectionMode::All => "bulk_retry_all",
@ -164,6 +222,13 @@ pub async fn bulk_retry_ocr(
SelectionMode::Filter => "bulk_retry_filtered",
};
crate::debug_log!("BULK_OCR_RETRY",
"document_id" => doc.id,
"retry_reason" => retry_reason,
"queue_id" => queue_id,
"message" => "Recording retry history"
);
if let Err(e) = crate::db::ocr_retry::record_ocr_retry(
state.db.get_pool(),
doc.id,
@ -172,12 +237,20 @@ pub async fn bulk_retry_ocr(
priority,
Some(queue_id),
).await {
crate::debug_error!("BULK_OCR_RETRY", format!("Failed to record retry history for document {}: {}", doc.id, e));
warn!("Failed to record retry history for document {}: {}", doc.id, e);
} else {
crate::debug_log!("BULK_OCR_RETRY",
"document_id" => doc.id,
"queue_id" => queue_id,
"message" => "Successfully recorded retry history"
);
}
info!("Queued document {} for OCR retry with priority {}", doc.id, priority);
}
Err(e) => {
crate::debug_error!("BULK_OCR_RETRY", format!("Failed to enqueue document {}: {}", doc.id, e));
error!("Failed to queue document {} for OCR retry: {}", doc.id, e);
}
}
@ -188,6 +261,15 @@ pub async fn bulk_retry_ocr(
retry_documents.push(doc_info);
}
crate::debug_log!("BULK_OCR_RETRY",
"matched_count" => matched_count,
"queued_count" => queued_count,
"preview_only" => preview_only,
"estimated_time_minutes" => (total_estimated_time / 60.0) as i32,
"user_id" => auth_user.user.id,
"message" => "Bulk retry operation completed"
);
let response = BulkOcrRetryResponse {
success: true,
message: if preview_only {
@ -303,8 +385,7 @@ pub async fn get_ocr_retry_stats(
MIN(created_at) as first_occurrence,
MAX(updated_at) as last_occurrence
FROM documents
WHERE ocr_status = 'failed'
AND ($1::uuid IS NULL OR user_id = $1)
WHERE ($1::uuid IS NULL OR user_id = $1)
GROUP BY ocr_failure_reason
ORDER BY count DESC
"#
@ -322,8 +403,7 @@ pub async fn get_ocr_retry_stats(
COUNT(*) as count,
AVG(file_size) as avg_file_size
FROM documents
WHERE ocr_status = 'failed'
AND ($1::uuid IS NULL OR user_id = $1)
WHERE ($1::uuid IS NULL OR user_id = $1)
GROUP BY mime_type
ORDER BY count DESC
"#
@ -441,8 +521,7 @@ async fn get_all_failed_ocr_documents(
r#"
SELECT id, filename, file_size, mime_type, ocr_failure_reason
FROM documents
WHERE ocr_status = 'failed'
AND ($1::uuid IS NULL OR user_id = $1)
WHERE ($1::uuid IS NULL OR user_id = $1)
ORDER BY created_at DESC
"#
)
@ -465,12 +544,33 @@ async fn get_specific_documents(
Some(auth_user.user.id)
};
// First let's debug what documents we're looking for and their current status
for doc_id in &document_ids {
if let Ok(Some(row)) = sqlx::query("SELECT id, filename, ocr_status FROM documents WHERE id = $1")
.bind(doc_id)
.fetch_optional(state.db.get_pool())
.await {
let status: Option<String> = row.get("ocr_status");
let filename: String = row.get("filename");
crate::debug_log!("BULK_OCR_RETRY",
"requested_document_id" => doc_id,
"filename" => &filename,
"current_ocr_status" => status.as_deref().unwrap_or("NULL"),
"message" => "Document found in database"
);
} else {
crate::debug_log!("BULK_OCR_RETRY",
"requested_document_id" => doc_id,
"message" => "Document NOT found in database"
);
}
}
let documents = sqlx::query_as::<_, DocumentInfo>(
r#"
SELECT id, filename, file_size, mime_type, ocr_failure_reason
FROM documents
WHERE id = ANY($1)
AND ocr_status = 'failed'
AND ($2::uuid IS NULL OR user_id = $2)
"#
)
@ -489,7 +589,7 @@ async fn get_filtered_documents(
filter: OcrRetryFilter
) -> Result<Vec<DocumentInfo>, StatusCode> {
let mut query = sqlx::QueryBuilder::new(
"SELECT id, filename, file_size, mime_type, ocr_failure_reason FROM documents WHERE ocr_status = 'failed'"
"SELECT id, filename, file_size, mime_type, ocr_failure_reason FROM documents WHERE 1=1"
);
// User filter
@ -585,6 +685,7 @@ async fn reset_document_ocr_status(state: &Arc<AppState>, document_id: Uuid) ->
ocr_text = NULL,
ocr_error = NULL,
ocr_failure_reason = NULL,
ocr_retry_count = NULL,
ocr_confidence = NULL,
ocr_word_count = NULL,
ocr_processing_time_ms = NULL,

View File

@ -177,6 +177,8 @@ impl FileService {
ocr_status: Some("pending".to_string()),
ocr_error: None,
ocr_completed_at: None,
ocr_retry_count: None,
ocr_failure_reason: None,
tags: Vec::new(),
created_at: Utc::now(),
updated_at: Utc::now(),

View File

@ -52,6 +52,8 @@ mod tests {
original_created_at: None,
original_modified_at: None,
source_metadata: None,
ocr_retry_count: None,
ocr_failure_reason: None,
}
}

View File

@ -103,6 +103,8 @@ mod tests {
original_created_at: Some(DateTime::parse_from_rfc3339("2023-12-01T10:00:00Z").unwrap().with_timezone(&Utc)),
original_modified_at: Some(DateTime::parse_from_rfc3339("2023-12-15T15:30:00Z").unwrap().with_timezone(&Utc)),
source_metadata: Some(serde_json::json!({"permissions": "644", "owner": "user1"})),
ocr_retry_count: None,
ocr_failure_reason: None,
};
// Convert to DocumentResponse

View File

@ -63,6 +63,8 @@ mod document_routes_deletion_tests {
original_created_at: None,
original_modified_at: None,
source_metadata: None,
ocr_retry_count: None,
ocr_failure_reason: None,
}
}
@ -400,6 +402,8 @@ mod document_routes_deletion_tests {
original_created_at: None,
original_modified_at: None,
source_metadata: None,
ocr_retry_count: None,
ocr_failure_reason: None,
}
}

View File

@ -29,6 +29,8 @@ fn create_test_document(user_id: Uuid) -> Document {
original_created_at: None,
original_modified_at: None,
source_metadata: None,
ocr_retry_count: None,
ocr_failure_reason: None,
}
}
@ -57,6 +59,8 @@ fn create_test_document_without_ocr(user_id: Uuid) -> Document {
original_created_at: None,
original_modified_at: None,
source_metadata: None,
ocr_retry_count: None,
ocr_failure_reason: None,
}
}
@ -85,6 +89,8 @@ fn create_test_document_with_ocr_error(user_id: Uuid) -> Document {
original_created_at: None,
original_modified_at: None,
source_metadata: None,
ocr_retry_count: None,
ocr_failure_reason: None,
}
}
@ -1564,6 +1570,8 @@ mod deletion_error_handling_tests {
original_created_at: None,
original_modified_at: None,
source_metadata: None,
ocr_retry_count: None,
ocr_failure_reason: None,
}
}

View File

@ -942,6 +942,8 @@ mod tests {
original_created_at: None,
original_modified_at: None,
source_metadata: None,
ocr_retry_count: None,
ocr_failure_reason: None,
};
db.create_document(document).await.unwrap();

View File

@ -195,6 +195,8 @@ mod file_deletion_tests {
original_created_at: None,
original_modified_at: None,
source_metadata: None,
ocr_retry_count: None,
ocr_failure_reason: None,
};
(
@ -333,6 +335,8 @@ mod file_deletion_tests {
original_created_at: None,
original_modified_at: None,
source_metadata: None,
ocr_retry_count: None,
ocr_failure_reason: None,
};
// Try to delete nonexistent files (should not fail)
@ -387,6 +391,8 @@ mod file_deletion_tests {
original_created_at: None,
original_modified_at: None,
source_metadata: None,
ocr_retry_count: None,
ocr_failure_reason: None,
};
// Verify files exist
@ -445,6 +451,8 @@ mod file_deletion_tests {
original_created_at: None,
original_modified_at: None,
source_metadata: None,
ocr_retry_count: None,
ocr_failure_reason: None,
};
// Verify files exist
@ -494,6 +502,8 @@ mod file_deletion_tests {
original_created_at: None,
original_modified_at: None,
source_metadata: None,
ocr_retry_count: None,
ocr_failure_reason: None,
};
// Verify file exists

View File

@ -84,6 +84,8 @@ mod tests {
original_created_at: None,
original_modified_at: None,
source_metadata: None,
ocr_retry_count: None,
ocr_failure_reason: None,
};
sqlx::query("INSERT INTO documents (id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19)")

85
src/utils/debug.rs Normal file
View File

@ -0,0 +1,85 @@
use std::env;
use tracing::{debug, info, warn, error};
/// Check if DEBUG environment variable is set to enable verbose debug output
pub fn is_debug_enabled() -> bool {
env::var("DEBUG")
.map(|val| !val.is_empty() && val != "0" && val.to_lowercase() != "false")
.unwrap_or(false)
}
/// Log debug message only if DEBUG environment variable is set
pub fn debug_log(message: &str) {
if is_debug_enabled() {
info!("🐛 DEBUG: {}", message);
}
}
/// Log debug message with context only if DEBUG environment variable is set
pub fn debug_log_context(context: &str, message: &str) {
if is_debug_enabled() {
info!("🐛 DEBUG [{}]: {}", context, message);
}
}
/// Log debug message with structured data only if DEBUG environment variable is set
pub fn debug_log_structured(context: &str, key_values: &[(&str, &dyn std::fmt::Display)]) {
if is_debug_enabled() {
let mut formatted = String::new();
for (i, (key, value)) in key_values.iter().enumerate() {
if i > 0 {
formatted.push_str(", ");
}
formatted.push_str(&format!("{}={}", key, value));
}
info!("🐛 DEBUG [{}]: {}", context, formatted);
}
}
/// Log error with debug context
pub fn debug_error(context: &str, error: &dyn std::fmt::Display) {
if is_debug_enabled() {
error!("🐛 DEBUG ERROR [{}]: {}", context, error);
} else {
error!("[{}]: {}", context, error);
}
}
/// Log warning with debug context
pub fn debug_warn(context: &str, message: &str) {
if is_debug_enabled() {
warn!("🐛 DEBUG WARN [{}]: {}", context, message);
} else {
warn!("[{}]: {}", context, message);
}
}
/// Macro for easier debug logging with automatic context
#[macro_export]
macro_rules! debug_log {
($msg:expr) => {
crate::utils::debug::debug_log($msg)
};
($context:expr, $msg:expr) => {
crate::utils::debug::debug_log_context($context, $msg)
};
($context:expr, $($key:expr => $value:expr),+ $(,)?) => {
crate::utils::debug::debug_log_structured($context, &[$(($key, &$value)),+])
};
}
/// Macro for debug error logging
#[macro_export]
macro_rules! debug_error {
($context:expr, $error:expr) => {
crate::utils::debug::debug_error($context, &$error)
};
}
/// Macro for debug warning logging
#[macro_export]
macro_rules! debug_warn {
($context:expr, $msg:expr) => {
crate::utils::debug::debug_warn($context, $msg)
};
}

1
src/utils/mod.rs Normal file
View File

@ -0,0 +1 @@
pub mod debug;

View File

@ -109,6 +109,13 @@ async fn debug_ocr_content() {
.await
.expect("Upload should work");
println!("📤 Document 1 upload response status: {}", doc1_response.status());
if !doc1_response.status().is_success() {
let status = doc1_response.status();
let error_text = doc1_response.text().await.unwrap_or_else(|_| "No response body".to_string());
panic!("Document 1 upload failed with status {}: {}", status, error_text);
}
let doc2_response = client
.post(&format!("{}/api/documents", get_base_url()))
.header("Authorization", format!("Bearer {}", token))
@ -117,8 +124,15 @@ async fn debug_ocr_content() {
.await
.expect("Upload should work");
let doc1: DocumentResponse = doc1_response.json().await.expect("Valid JSON");
let doc2: DocumentResponse = doc2_response.json().await.expect("Valid JSON");
println!("📤 Document 2 upload response status: {}", doc2_response.status());
if !doc2_response.status().is_success() {
let status = doc2_response.status();
let error_text = doc2_response.text().await.unwrap_or_else(|_| "No response body".to_string());
panic!("Document 2 upload failed with status {}: {}", status, error_text);
}
let doc1: DocumentResponse = doc1_response.json().await.expect("Valid JSON for doc1");
let doc2: DocumentResponse = doc2_response.json().await.expect("Valid JSON for doc2");
println!("📄 Document 1: {}", doc1.id);
println!("📄 Document 2: {}", doc2.id);

View File

@ -36,6 +36,8 @@ fn create_test_document(user_id: Uuid, filename: &str, file_hash: String) -> Doc
ocr_status: Some("pending".to_string()),
ocr_error: None,
ocr_completed_at: None,
ocr_retry_count: None,
ocr_failure_reason: None,
tags: Vec::new(),
created_at: Utc::now(),
updated_at: Utc::now(),

View File

@ -54,6 +54,8 @@ fn create_test_document(user_id: Uuid, filename: &str, file_hash: Option<String>
ocr_status: Some("pending".to_string()),
ocr_error: None,
ocr_completed_at: None,
ocr_retry_count: None,
ocr_failure_reason: None,
tags: Vec::new(),
created_at: Utc::now(),
updated_at: Utc::now(),

View File

@ -351,6 +351,8 @@ async fn test_create_ignored_file_from_document() -> Result<()> {
ocr_status: Some("completed".to_string()),
ocr_error: None,
ocr_completed_at: Some(chrono::Utc::now()),
ocr_retry_count: None,
ocr_failure_reason: None,
tags: vec!["test".to_string()],
created_at: chrono::Utc::now(),
updated_at: chrono::Utc::now(),

View File

@ -54,6 +54,8 @@ fn create_test_document(user_id: Uuid, filename: &str, file_hash: String) -> Doc
ocr_status: Some("pending".to_string()),
ocr_error: None,
ocr_completed_at: None,
ocr_retry_count: None,
ocr_failure_reason: None,
tags: Vec::new(),
created_at: Utc::now(),
updated_at: Utc::now(),

View File

@ -54,6 +54,8 @@ fn create_test_document(user_id: Uuid, filename: &str, file_hash: String) -> Doc
ocr_status: Some("pending".to_string()),
ocr_error: None,
ocr_completed_at: None,
ocr_retry_count: None,
ocr_failure_reason: None,
tags: Vec::new(),
created_at: Utc::now(),
updated_at: Utc::now(),

View File

@ -20,6 +20,8 @@ fn test_document_response_conversion_with_ocr() {
ocr_status: Some("completed".to_string()),
ocr_error: None,
ocr_completed_at: Some(Utc::now()),
ocr_retry_count: None,
ocr_failure_reason: None,
tags: vec!["test".to_string()],
created_at: Utc::now(),
updated_at: Utc::now(),
@ -57,6 +59,8 @@ fn test_document_response_conversion_without_ocr() {
ocr_status: Some("pending".to_string()),
ocr_error: None,
ocr_completed_at: None,
ocr_retry_count: None,
ocr_failure_reason: None,
tags: vec![],
created_at: Utc::now(),
updated_at: Utc::now(),