Merge pull request #89 from readur/feat/setup-debug-functionality

feat(debug): setup debug functionality
This commit is contained in:
Jon Fuller 2025-07-02 15:06:16 -07:00 committed by GitHub
commit 9034bf5d70
26 changed files with 624 additions and 76 deletions

View File

@ -19,6 +19,7 @@ import {
Dialog, Dialog,
DialogTitle, DialogTitle,
DialogContent, DialogContent,
DialogContentText,
DialogActions, DialogActions,
Pagination, Pagination,
CircularProgress, CircularProgress,
@ -233,6 +234,7 @@ const DocumentManagementPage: React.FC = () => {
const [retryHistoryModalOpen, setRetryHistoryModalOpen] = useState(false); const [retryHistoryModalOpen, setRetryHistoryModalOpen] = useState(false);
const [selectedDocumentForHistory, setSelectedDocumentForHistory] = useState<string | null>(null); const [selectedDocumentForHistory, setSelectedDocumentForHistory] = useState<string | null>(null);
const [selectedDocumentIds, setSelectedDocumentIds] = useState<string[]>([]); const [selectedDocumentIds, setSelectedDocumentIds] = useState<string[]>([]);
const [confirmRetryAllOpen, setConfirmRetryAllOpen] = useState(false);
const fetchFailedDocuments = async () => { const fetchFailedDocuments = async () => {
try { try {
@ -358,6 +360,42 @@ const DocumentManagementPage: React.FC = () => {
} }
}; };
const handleRetryAllDocuments = async () => {
try {
setRetryingAll(true);
const response = await documentService.bulkRetryOcr({
mode: 'all',
preview_only: false
});
if (response.data.queued_count > 0) {
setSnackbar({
open: true,
message: `Successfully queued ${response.data.queued_count} documents for OCR retry. Estimated processing time: ${Math.ceil(response.data.estimated_total_time_minutes)} minutes.`,
severity: 'success'
});
// Refresh all tabs since we're retrying all documents
await refreshCurrentTab();
} else {
setSnackbar({
open: true,
message: 'No documents found to retry',
severity: 'info'
});
}
} catch (error) {
console.error('Error retrying all documents:', error);
setSnackbar({
open: true,
message: 'Failed to retry documents. Please try again.',
severity: 'error'
});
} finally {
setRetryingAll(false);
}
};
const handleRetryAllFailed = async () => { const handleRetryAllFailed = async () => {
try { try {
setRetryingAll(true); setRetryingAll(true);
@ -735,14 +773,33 @@ const DocumentManagementPage: React.FC = () => {
<Typography variant="h4" component="h1"> <Typography variant="h4" component="h1">
Document Management Document Management
</Typography> </Typography>
<Button <Box display="flex" gap={2}>
variant="outlined" <Button
startIcon={<RefreshIcon />} variant="contained"
onClick={refreshCurrentTab} color="primary"
disabled={loading || duplicatesLoading || retryingAll} size="large"
> startIcon={retryingAll ? <CircularProgress size={20} color="inherit" /> : <RefreshIcon />}
Refresh onClick={() => setConfirmRetryAllOpen(true)}
</Button> disabled={retryingAll}
sx={{
minWidth: 200,
boxShadow: 3,
'&:hover': {
boxShadow: 6,
}
}}
>
{retryingAll ? 'Retrying All...' : 'Retry All Documents'}
</Button>
<Button
variant="outlined"
startIcon={<RefreshIcon />}
onClick={refreshCurrentTab}
disabled={loading || duplicatesLoading || retryingAll}
>
Refresh
</Button>
</Box>
</Box> </Box>
<Paper sx={{ mb: 3, borderRadius: 2, overflow: 'hidden' }}> <Paper sx={{ mb: 3, borderRadius: 2, overflow: 'hidden' }}>
@ -825,7 +882,7 @@ const DocumentManagementPage: React.FC = () => {
size="small" size="small"
fullWidth fullWidth
> >
{retryingAll ? 'Retrying All...' : 'Retry All Failed OCR'} {retryingAll ? 'Retrying...' : 'Retry Failed Only'}
</Button> </Button>
</Box> </Box>
</CardContent> </CardContent>
@ -2219,6 +2276,43 @@ const DocumentManagementPage: React.FC = () => {
</DialogActions> </DialogActions>
</Dialog> </Dialog>
{/* Confirm Retry All Documents Dialog */}
<Dialog open={confirmRetryAllOpen} onClose={() => setConfirmRetryAllOpen(false)}>
<DialogTitle>
<Box display="flex" alignItems="center">
<RefreshIcon sx={{ mr: 1, color: 'primary.main' }} />
Retry All Documents
</Box>
</DialogTitle>
<DialogContent>
<DialogContentText>
This will retry OCR processing for <strong>all documents</strong> in your library, regardless of their current OCR status.
This includes documents that have already been successfully processed.
</DialogContentText>
<Box sx={{ mt: 2, p: 2, bgcolor: 'warning.light', borderRadius: 1 }}>
<Typography variant="body2" color="warning.dark">
<strong>Note:</strong> This is a resource-intensive operation that may take a significant amount of time depending on the number of documents.
</Typography>
</Box>
</DialogContent>
<DialogActions>
<Button onClick={() => setConfirmRetryAllOpen(false)}>
Cancel
</Button>
<Button
onClick={() => {
setConfirmRetryAllOpen(false);
handleRetryAllDocuments();
}}
variant="contained"
color="primary"
startIcon={<RefreshIcon />}
>
Retry All Documents
</Button>
</DialogActions>
</Dialog>
{/* Advanced Retry Modal */} {/* Advanced Retry Modal */}
<BulkRetryModal <BulkRetryModal
open={bulkRetryModalOpen} open={bulkRetryModalOpen}

View File

@ -0,0 +1,9 @@
-- Add OCR retry tracking fields to documents table
-- These fields were added to the Document struct but missing from the database schema
ALTER TABLE documents ADD COLUMN IF NOT EXISTS ocr_retry_count INTEGER DEFAULT 0;
ALTER TABLE documents ADD COLUMN IF NOT EXISTS ocr_failure_reason TEXT DEFAULT NULL;
-- Add helpful comments
COMMENT ON COLUMN documents.ocr_retry_count IS 'Number of times OCR processing has been retried for this document';
COMMENT ON COLUMN documents.ocr_failure_reason IS 'Reason for the most recent OCR failure, if any';

View File

@ -10,9 +10,9 @@ impl Database {
pub async fn create_document(&self, document: Document) -> Result<Document> { pub async fn create_document(&self, document: Document) -> Result<Document> {
let row = sqlx::query( let row = sqlx::query(
r#" r#"
INSERT INTO documents (id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata) INSERT INTO documents (id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24)
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
"# "#
) )
.bind(document.id) .bind(document.id)
@ -29,6 +29,8 @@ impl Database {
.bind(&document.ocr_status) .bind(&document.ocr_status)
.bind(&document.ocr_error) .bind(&document.ocr_error)
.bind(document.ocr_completed_at) .bind(document.ocr_completed_at)
.bind(document.ocr_retry_count)
.bind(&document.ocr_failure_reason)
.bind(&document.tags) .bind(&document.tags)
.bind(document.created_at) .bind(document.created_at)
.bind(document.updated_at) .bind(document.updated_at)
@ -55,6 +57,8 @@ impl Database {
ocr_status: row.get("ocr_status"), ocr_status: row.get("ocr_status"),
ocr_error: row.get("ocr_error"), ocr_error: row.get("ocr_error"),
ocr_completed_at: row.get("ocr_completed_at"), ocr_completed_at: row.get("ocr_completed_at"),
ocr_retry_count: row.get("ocr_retry_count"),
ocr_failure_reason: row.get("ocr_failure_reason"),
tags: row.get("tags"), tags: row.get("tags"),
created_at: row.get("created_at"), created_at: row.get("created_at"),
updated_at: row.get("updated_at"), updated_at: row.get("updated_at"),
@ -70,7 +74,7 @@ impl Database {
let query = if user_role == crate::models::UserRole::Admin { let query = if user_role == crate::models::UserRole::Admin {
// Admins can see all documents // Admins can see all documents
r#" r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata, original_created_at, original_modified_at, source_metadata SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents FROM documents
ORDER BY created_at DESC ORDER BY created_at DESC
LIMIT $1 OFFSET $2 LIMIT $1 OFFSET $2
@ -78,7 +82,7 @@ impl Database {
} else { } else {
// Regular users can only see their own documents // Regular users can only see their own documents
r#" r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata, original_created_at, original_modified_at, source_metadata SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents FROM documents
WHERE user_id = $3 WHERE user_id = $3
ORDER BY created_at DESC ORDER BY created_at DESC
@ -118,6 +122,8 @@ impl Database {
ocr_status: row.get("ocr_status"), ocr_status: row.get("ocr_status"),
ocr_error: row.get("ocr_error"), ocr_error: row.get("ocr_error"),
ocr_completed_at: row.get("ocr_completed_at"), ocr_completed_at: row.get("ocr_completed_at"),
ocr_retry_count: row.get("ocr_retry_count"),
ocr_failure_reason: row.get("ocr_failure_reason"),
tags: row.get("tags"), tags: row.get("tags"),
created_at: row.get("created_at"), created_at: row.get("created_at"),
updated_at: row.get("updated_at"), updated_at: row.get("updated_at"),
@ -138,7 +144,7 @@ impl Database {
// Admin with OCR filter // Admin with OCR filter
sqlx::query( sqlx::query(
r#" r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents FROM documents
WHERE ocr_status = $3 WHERE ocr_status = $3
ORDER BY created_at DESC ORDER BY created_at DESC
@ -155,7 +161,7 @@ impl Database {
// Admin without OCR filter // Admin without OCR filter
sqlx::query( sqlx::query(
r#" r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents FROM documents
ORDER BY created_at DESC ORDER BY created_at DESC
LIMIT $1 OFFSET $2 LIMIT $1 OFFSET $2
@ -170,7 +176,7 @@ impl Database {
// Regular user with OCR filter // Regular user with OCR filter
sqlx::query( sqlx::query(
r#" r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents FROM documents
WHERE user_id = $3 AND ocr_status = $4 WHERE user_id = $3 AND ocr_status = $4
ORDER BY created_at DESC ORDER BY created_at DESC
@ -188,7 +194,7 @@ impl Database {
// Regular user without OCR filter // Regular user without OCR filter
sqlx::query( sqlx::query(
r#" r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents FROM documents
WHERE user_id = $3 WHERE user_id = $3
ORDER BY created_at DESC ORDER BY created_at DESC
@ -220,6 +226,8 @@ impl Database {
ocr_status: row.get("ocr_status"), ocr_status: row.get("ocr_status"),
ocr_error: row.get("ocr_error"), ocr_error: row.get("ocr_error"),
ocr_completed_at: row.get("ocr_completed_at"), ocr_completed_at: row.get("ocr_completed_at"),
ocr_retry_count: row.get("ocr_retry_count"),
ocr_failure_reason: row.get("ocr_failure_reason"),
tags: row.get("tags"), tags: row.get("tags"),
created_at: row.get("created_at"), created_at: row.get("created_at"),
updated_at: row.get("updated_at"), updated_at: row.get("updated_at"),
@ -280,7 +288,7 @@ impl Database {
pub async fn get_documents_by_user(&self, user_id: Uuid, limit: i64, offset: i64) -> Result<Vec<Document>> { pub async fn get_documents_by_user(&self, user_id: Uuid, limit: i64, offset: i64) -> Result<Vec<Document>> {
let rows = sqlx::query( let rows = sqlx::query(
r#" r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents FROM documents
WHERE user_id = $1 WHERE user_id = $1
ORDER BY created_at DESC ORDER BY created_at DESC
@ -310,6 +318,8 @@ impl Database {
ocr_status: row.get("ocr_status"), ocr_status: row.get("ocr_status"),
ocr_error: row.get("ocr_error"), ocr_error: row.get("ocr_error"),
ocr_completed_at: row.get("ocr_completed_at"), ocr_completed_at: row.get("ocr_completed_at"),
ocr_retry_count: row.get("ocr_retry_count"),
ocr_failure_reason: row.get("ocr_failure_reason"),
tags: row.get("tags"), tags: row.get("tags"),
created_at: row.get("created_at"), created_at: row.get("created_at"),
updated_at: row.get("updated_at"), updated_at: row.get("updated_at"),
@ -327,7 +337,7 @@ impl Database {
pub async fn find_documents_by_filename(&self, filename: &str) -> Result<Vec<Document>> { pub async fn find_documents_by_filename(&self, filename: &str) -> Result<Vec<Document>> {
let rows = sqlx::query( let rows = sqlx::query(
r#" r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents FROM documents
WHERE filename = $1 OR original_filename = $1 WHERE filename = $1 OR original_filename = $1
ORDER BY created_at DESC ORDER BY created_at DESC
@ -354,6 +364,8 @@ impl Database {
ocr_status: row.get("ocr_status"), ocr_status: row.get("ocr_status"),
ocr_error: row.get("ocr_error"), ocr_error: row.get("ocr_error"),
ocr_completed_at: row.get("ocr_completed_at"), ocr_completed_at: row.get("ocr_completed_at"),
ocr_retry_count: row.get("ocr_retry_count"),
ocr_failure_reason: row.get("ocr_failure_reason"),
tags: row.get("tags"), tags: row.get("tags"),
created_at: row.get("created_at"), created_at: row.get("created_at"),
updated_at: row.get("updated_at"), updated_at: row.get("updated_at"),
@ -371,7 +383,7 @@ impl Database {
pub async fn search_documents(&self, user_id: Uuid, search: SearchRequest) -> Result<(Vec<Document>, i64)> { pub async fn search_documents(&self, user_id: Uuid, search: SearchRequest) -> Result<(Vec<Document>, i64)> {
let mut query_builder = QueryBuilder::new( let mut query_builder = QueryBuilder::new(
r#" r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata, SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
ts_rank(to_tsvector('english', COALESCE(content, '') || ' ' || COALESCE(ocr_text, '')), plainto_tsquery('english', "# ts_rank(to_tsvector('english', COALESCE(content, '') || ' ' || COALESCE(ocr_text, '')), plainto_tsquery('english', "#
); );
@ -428,6 +440,8 @@ impl Database {
ocr_status: row.get("ocr_status"), ocr_status: row.get("ocr_status"),
ocr_error: row.get("ocr_error"), ocr_error: row.get("ocr_error"),
ocr_completed_at: row.get("ocr_completed_at"), ocr_completed_at: row.get("ocr_completed_at"),
ocr_retry_count: row.get("ocr_retry_count"),
ocr_failure_reason: row.get("ocr_failure_reason"),
tags: row.get("tags"), tags: row.get("tags"),
created_at: row.get("created_at"), created_at: row.get("created_at"),
updated_at: row.get("updated_at"), updated_at: row.get("updated_at"),
@ -477,7 +491,7 @@ impl Database {
// Use trigram similarity for substring matching // Use trigram similarity for substring matching
let mut builder = QueryBuilder::new( let mut builder = QueryBuilder::new(
r#" r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata, SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
GREATEST( GREATEST(
similarity(filename, "# similarity(filename, "#
); );
@ -520,7 +534,7 @@ impl Database {
let mut builder = QueryBuilder::new(&format!( let mut builder = QueryBuilder::new(&format!(
r#" r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata, SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
GREATEST( GREATEST(
CASE WHEN filename ILIKE '%' || "# CASE WHEN filename ILIKE '%' || "#
)); ));
@ -666,7 +680,7 @@ impl Database {
// Use trigram similarity for substring matching // Use trigram similarity for substring matching
let mut builder = QueryBuilder::new( let mut builder = QueryBuilder::new(
r#" r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata, SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
GREATEST( GREATEST(
similarity(filename, "# similarity(filename, "#
); );
@ -705,7 +719,7 @@ impl Database {
let mut builder = QueryBuilder::new(&format!( let mut builder = QueryBuilder::new(&format!(
r#" r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata, SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
GREATEST( GREATEST(
CASE WHEN filename ILIKE '%' || "# CASE WHEN filename ILIKE '%' || "#
)); ));
@ -982,7 +996,7 @@ impl Database {
pub async fn get_recent_documents_for_source(&self, source_id: Uuid, limit: i64) -> Result<Vec<Document>> { pub async fn get_recent_documents_for_source(&self, source_id: Uuid, limit: i64) -> Result<Vec<Document>> {
let rows = sqlx::query( let rows = sqlx::query(
r#"SELECT * FROM documents r#"SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata FROM documents
WHERE source_id = $1 WHERE source_id = $1
ORDER BY created_at DESC ORDER BY created_at DESC
LIMIT $2"# LIMIT $2"#
@ -1009,6 +1023,8 @@ impl Database {
ocr_status: row.get("ocr_status"), ocr_status: row.get("ocr_status"),
ocr_error: row.get("ocr_error"), ocr_error: row.get("ocr_error"),
ocr_completed_at: row.get("ocr_completed_at"), ocr_completed_at: row.get("ocr_completed_at"),
ocr_retry_count: row.get("ocr_retry_count"),
ocr_failure_reason: row.get("ocr_failure_reason"),
tags: row.get("tags"), tags: row.get("tags"),
created_at: row.get("created_at"), created_at: row.get("created_at"),
updated_at: row.get("updated_at"), updated_at: row.get("updated_at"),
@ -1105,14 +1121,14 @@ impl Database {
let query = if user_role == crate::models::UserRole::Admin { let query = if user_role == crate::models::UserRole::Admin {
// Admins can see any document // Admins can see any document
r#" r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents FROM documents
WHERE id = $1 WHERE id = $1
"# "#
} else { } else {
// Regular users can only see their own documents // Regular users can only see their own documents
r#" r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents FROM documents
WHERE id = $1 AND user_id = $2 WHERE id = $1 AND user_id = $2
"# "#
@ -1147,6 +1163,8 @@ impl Database {
ocr_status: row.get("ocr_status"), ocr_status: row.get("ocr_status"),
ocr_error: row.get("ocr_error"), ocr_error: row.get("ocr_error"),
ocr_completed_at: row.get("ocr_completed_at"), ocr_completed_at: row.get("ocr_completed_at"),
ocr_retry_count: row.get("ocr_retry_count"),
ocr_failure_reason: row.get("ocr_failure_reason"),
tags: row.get("tags"), tags: row.get("tags"),
created_at: row.get("created_at"), created_at: row.get("created_at"),
updated_at: row.get("updated_at"), updated_at: row.get("updated_at"),
@ -1164,7 +1182,7 @@ impl Database {
pub async fn get_document_by_user_and_hash(&self, user_id: Uuid, file_hash: &str) -> Result<Option<Document>> { pub async fn get_document_by_user_and_hash(&self, user_id: Uuid, file_hash: &str) -> Result<Option<Document>> {
let row = sqlx::query( let row = sqlx::query(
r#" r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents FROM documents
WHERE user_id = $1 AND file_hash = $2 WHERE user_id = $1 AND file_hash = $2
LIMIT 1 LIMIT 1
@ -1191,6 +1209,8 @@ impl Database {
ocr_status: row.get("ocr_status"), ocr_status: row.get("ocr_status"),
ocr_error: row.get("ocr_error"), ocr_error: row.get("ocr_error"),
ocr_completed_at: row.get("ocr_completed_at"), ocr_completed_at: row.get("ocr_completed_at"),
ocr_retry_count: row.get("ocr_retry_count"),
ocr_failure_reason: row.get("ocr_failure_reason"),
tags: row.get("tags"), tags: row.get("tags"),
created_at: row.get("created_at"), created_at: row.get("created_at"),
updated_at: row.get("updated_at"), updated_at: row.get("updated_at"),
@ -1396,7 +1416,7 @@ impl Database {
r#" r#"
DELETE FROM documents DELETE FROM documents
WHERE id = $1 WHERE id = $1
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
"#, "#,
) )
.bind(document_id) .bind(document_id)
@ -1418,6 +1438,8 @@ impl Database {
ocr_status: r.get("ocr_status"), ocr_status: r.get("ocr_status"),
ocr_error: r.get("ocr_error"), ocr_error: r.get("ocr_error"),
ocr_completed_at: r.get("ocr_completed_at"), ocr_completed_at: r.get("ocr_completed_at"),
ocr_retry_count: r.get("ocr_retry_count"),
ocr_failure_reason: r.get("ocr_failure_reason"),
tags: r.get("tags"), tags: r.get("tags"),
created_at: r.get("created_at"), created_at: r.get("created_at"),
updated_at: r.get("updated_at"), updated_at: r.get("updated_at"),
@ -1432,7 +1454,7 @@ impl Database {
r#" r#"
DELETE FROM documents DELETE FROM documents
WHERE id = $1 AND user_id = $2 WHERE id = $1 AND user_id = $2
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
"#, "#,
) )
.bind(document_id) .bind(document_id)
@ -1455,6 +1477,8 @@ impl Database {
ocr_status: r.get("ocr_status"), ocr_status: r.get("ocr_status"),
ocr_error: r.get("ocr_error"), ocr_error: r.get("ocr_error"),
ocr_completed_at: r.get("ocr_completed_at"), ocr_completed_at: r.get("ocr_completed_at"),
ocr_retry_count: r.get("ocr_retry_count"),
ocr_failure_reason: r.get("ocr_failure_reason"),
tags: r.get("tags"), tags: r.get("tags"),
created_at: r.get("created_at"), created_at: r.get("created_at"),
updated_at: r.get("updated_at"), updated_at: r.get("updated_at"),
@ -1479,7 +1503,7 @@ impl Database {
r#" r#"
DELETE FROM documents DELETE FROM documents
WHERE id = ANY($1) WHERE id = ANY($1)
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
"#, "#,
) )
.bind(document_ids) .bind(document_ids)
@ -1501,6 +1525,8 @@ impl Database {
ocr_status: r.get("ocr_status"), ocr_status: r.get("ocr_status"),
ocr_error: r.get("ocr_error"), ocr_error: r.get("ocr_error"),
ocr_completed_at: r.get("ocr_completed_at"), ocr_completed_at: r.get("ocr_completed_at"),
ocr_retry_count: r.get("ocr_retry_count"),
ocr_failure_reason: r.get("ocr_failure_reason"),
tags: r.get("tags"), tags: r.get("tags"),
created_at: r.get("created_at"), created_at: r.get("created_at"),
updated_at: r.get("updated_at"), updated_at: r.get("updated_at"),
@ -1515,7 +1541,7 @@ impl Database {
r#" r#"
DELETE FROM documents DELETE FROM documents
WHERE id = ANY($1) AND user_id = $2 WHERE id = ANY($1) AND user_id = $2
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
"#, "#,
) )
.bind(document_ids) .bind(document_ids)
@ -1538,6 +1564,8 @@ impl Database {
ocr_status: r.get("ocr_status"), ocr_status: r.get("ocr_status"),
ocr_error: r.get("ocr_error"), ocr_error: r.get("ocr_error"),
ocr_completed_at: r.get("ocr_completed_at"), ocr_completed_at: r.get("ocr_completed_at"),
ocr_retry_count: r.get("ocr_retry_count"),
ocr_failure_reason: r.get("ocr_failure_reason"),
tags: r.get("tags"), tags: r.get("tags"),
created_at: r.get("created_at"), created_at: r.get("created_at"),
updated_at: r.get("updated_at"), updated_at: r.get("updated_at"),
@ -1557,7 +1585,7 @@ impl Database {
let documents = if user_role == crate::models::UserRole::Admin { let documents = if user_role == crate::models::UserRole::Admin {
let rows = sqlx::query( let rows = sqlx::query(
r#" r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents FROM documents
WHERE ocr_confidence IS NOT NULL AND ocr_confidence < $1 WHERE ocr_confidence IS NOT NULL AND ocr_confidence < $1
ORDER BY ocr_confidence ASC, created_at DESC ORDER BY ocr_confidence ASC, created_at DESC
@ -1582,6 +1610,8 @@ impl Database {
ocr_status: r.get("ocr_status"), ocr_status: r.get("ocr_status"),
ocr_error: r.get("ocr_error"), ocr_error: r.get("ocr_error"),
ocr_completed_at: r.get("ocr_completed_at"), ocr_completed_at: r.get("ocr_completed_at"),
ocr_retry_count: r.get("ocr_retry_count"),
ocr_failure_reason: r.get("ocr_failure_reason"),
tags: r.get("tags"), tags: r.get("tags"),
created_at: r.get("created_at"), created_at: r.get("created_at"),
updated_at: r.get("updated_at"), updated_at: r.get("updated_at"),
@ -1594,7 +1624,7 @@ impl Database {
} else { } else {
let rows = sqlx::query( let rows = sqlx::query(
r#" r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents FROM documents
WHERE ocr_confidence IS NOT NULL AND ocr_confidence < $1 AND user_id = $2 WHERE ocr_confidence IS NOT NULL AND ocr_confidence < $1 AND user_id = $2
ORDER BY ocr_confidence ASC, created_at DESC ORDER BY ocr_confidence ASC, created_at DESC
@ -1620,6 +1650,8 @@ impl Database {
ocr_status: r.get("ocr_status"), ocr_status: r.get("ocr_status"),
ocr_error: r.get("ocr_error"), ocr_error: r.get("ocr_error"),
ocr_completed_at: r.get("ocr_completed_at"), ocr_completed_at: r.get("ocr_completed_at"),
ocr_retry_count: r.get("ocr_retry_count"),
ocr_failure_reason: r.get("ocr_failure_reason"),
tags: r.get("tags"), tags: r.get("tags"),
created_at: r.get("created_at"), created_at: r.get("created_at"),
updated_at: r.get("updated_at"), updated_at: r.get("updated_at"),
@ -1639,7 +1671,7 @@ impl Database {
let documents = if user_role == crate::models::UserRole::Admin { let documents = if user_role == crate::models::UserRole::Admin {
let rows = sqlx::query( let rows = sqlx::query(
r#" r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents FROM documents
WHERE ocr_status = 'failed' OR (ocr_confidence IS NULL AND ocr_status != 'pending' AND ocr_status != 'processing') WHERE ocr_status = 'failed' OR (ocr_confidence IS NULL AND ocr_status != 'pending' AND ocr_status != 'processing')
ORDER BY created_at DESC ORDER BY created_at DESC
@ -1663,6 +1695,8 @@ impl Database {
ocr_status: r.get("ocr_status"), ocr_status: r.get("ocr_status"),
ocr_error: r.get("ocr_error"), ocr_error: r.get("ocr_error"),
ocr_completed_at: r.get("ocr_completed_at"), ocr_completed_at: r.get("ocr_completed_at"),
ocr_retry_count: r.get("ocr_retry_count"),
ocr_failure_reason: r.get("ocr_failure_reason"),
tags: r.get("tags"), tags: r.get("tags"),
created_at: r.get("created_at"), created_at: r.get("created_at"),
updated_at: r.get("updated_at"), updated_at: r.get("updated_at"),
@ -1675,7 +1709,7 @@ impl Database {
} else { } else {
let rows = sqlx::query( let rows = sqlx::query(
r#" r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents FROM documents
WHERE (ocr_status = 'failed' OR (ocr_confidence IS NULL AND ocr_status != 'pending' AND ocr_status != 'processing')) AND user_id = $1 WHERE (ocr_status = 'failed' OR (ocr_confidence IS NULL AND ocr_status != 'pending' AND ocr_status != 'processing')) AND user_id = $1
ORDER BY created_at DESC ORDER BY created_at DESC
@ -1700,6 +1734,8 @@ impl Database {
ocr_status: r.get("ocr_status"), ocr_status: r.get("ocr_status"),
ocr_error: r.get("ocr_error"), ocr_error: r.get("ocr_error"),
ocr_completed_at: r.get("ocr_completed_at"), ocr_completed_at: r.get("ocr_completed_at"),
ocr_retry_count: r.get("ocr_retry_count"),
ocr_failure_reason: r.get("ocr_failure_reason"),
tags: r.get("tags"), tags: r.get("tags"),
created_at: r.get("created_at"), created_at: r.get("created_at"),
updated_at: r.get("updated_at"), updated_at: r.get("updated_at"),
@ -1719,7 +1755,7 @@ impl Database {
let documents = if user_role == crate::models::UserRole::Admin { let documents = if user_role == crate::models::UserRole::Admin {
let rows = sqlx::query( let rows = sqlx::query(
r#" r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents FROM documents
WHERE (ocr_confidence IS NOT NULL AND ocr_confidence < $1) WHERE (ocr_confidence IS NOT NULL AND ocr_confidence < $1)
OR ocr_status = 'failed' OR ocr_status = 'failed'
@ -1747,6 +1783,8 @@ impl Database {
ocr_status: r.get("ocr_status"), ocr_status: r.get("ocr_status"),
ocr_error: r.get("ocr_error"), ocr_error: r.get("ocr_error"),
ocr_completed_at: r.get("ocr_completed_at"), ocr_completed_at: r.get("ocr_completed_at"),
ocr_retry_count: r.get("ocr_retry_count"),
ocr_failure_reason: r.get("ocr_failure_reason"),
tags: r.get("tags"), tags: r.get("tags"),
created_at: r.get("created_at"), created_at: r.get("created_at"),
updated_at: r.get("updated_at"), updated_at: r.get("updated_at"),
@ -1759,7 +1797,7 @@ impl Database {
} else { } else {
let rows = sqlx::query( let rows = sqlx::query(
r#" r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
FROM documents FROM documents
WHERE ((ocr_confidence IS NOT NULL AND ocr_confidence < $1) WHERE ((ocr_confidence IS NOT NULL AND ocr_confidence < $1)
OR ocr_status = 'failed') OR ocr_status = 'failed')
@ -1789,6 +1827,8 @@ impl Database {
ocr_status: r.get("ocr_status"), ocr_status: r.get("ocr_status"),
ocr_error: r.get("ocr_error"), ocr_error: r.get("ocr_error"),
ocr_completed_at: r.get("ocr_completed_at"), ocr_completed_at: r.get("ocr_completed_at"),
ocr_retry_count: r.get("ocr_retry_count"),
ocr_failure_reason: r.get("ocr_failure_reason"),
tags: r.get("tags"), tags: r.get("tags"),
created_at: r.get("created_at"), created_at: r.get("created_at"),
updated_at: r.get("updated_at"), updated_at: r.get("updated_at"),

View File

@ -27,7 +27,21 @@ pub async fn record_ocr_retry(
priority: i32, priority: i32,
queue_id: Option<Uuid>, queue_id: Option<Uuid>,
) -> Result<Uuid> { ) -> Result<Uuid> {
crate::debug_log!("OCR_RETRY_HISTORY",
"document_id" => document_id,
"user_id" => user_id,
"retry_reason" => retry_reason,
"priority" => priority,
"queue_id" => queue_id.unwrap_or_default(),
"message" => "Recording OCR retry attempt"
);
// First get the current OCR status // First get the current OCR status
crate::debug_log!("OCR_RETRY_HISTORY",
"document_id" => document_id,
"message" => "Fetching current OCR status"
);
let current_status = sqlx::query( let current_status = sqlx::query(
r#" r#"
SELECT ocr_status, ocr_failure_reason, ocr_error SELECT ocr_status, ocr_failure_reason, ocr_error
@ -37,19 +51,38 @@ pub async fn record_ocr_retry(
) )
.bind(document_id) .bind(document_id)
.fetch_optional(pool) .fetch_optional(pool)
.await?; .await
.map_err(|e| {
crate::debug_error!("OCR_RETRY_HISTORY", format!("Failed to fetch current status for document {}: {}", document_id, e));
e
})?;
let (previous_status, previous_failure_reason, previous_error) = if let Some(row) = current_status { let (previous_status, previous_failure_reason, previous_error) = if let Some(row) = current_status {
( let status = row.get::<Option<String>, _>("ocr_status");
row.get::<Option<String>, _>("ocr_status"), let failure = row.get::<Option<String>, _>("ocr_failure_reason");
row.get::<Option<String>, _>("ocr_failure_reason"), let error = row.get::<Option<String>, _>("ocr_error");
row.get::<Option<String>, _>("ocr_error"),
) crate::debug_log!("OCR_RETRY_HISTORY",
"document_id" => document_id,
"status" => status.as_deref().unwrap_or("none"),
"failure_reason" => failure.as_deref().unwrap_or("none"),
"has_error" => error.is_some(),
"message" => "Found current document status"
);
(status, failure, error)
} else { } else {
crate::debug_warn!("OCR_RETRY_HISTORY", "Document not found when recording retry history");
(None, None, None) (None, None, None)
}; };
// Insert retry history record // Insert retry history record
crate::debug_log!("OCR_RETRY_HISTORY",
"document_id" => document_id,
"previous_status" => previous_status.as_deref().unwrap_or("none"),
"message" => "Inserting retry history record"
);
let retry_id: Uuid = sqlx::query_scalar( let retry_id: Uuid = sqlx::query_scalar(
r#" r#"
INSERT INTO ocr_retry_history ( INSERT INTO ocr_retry_history (
@ -63,15 +96,25 @@ pub async fn record_ocr_retry(
.bind(document_id) .bind(document_id)
.bind(user_id) .bind(user_id)
.bind(retry_reason) .bind(retry_reason)
.bind(previous_status) .bind(&previous_status)
.bind(previous_failure_reason) .bind(&previous_failure_reason)
.bind(previous_error) .bind(&previous_error)
.bind(priority) .bind(priority)
.bind(queue_id) .bind(queue_id)
.fetch_one(pool) .fetch_one(pool)
.await?; .await
.map_err(|e| {
crate::debug_error!("OCR_RETRY_HISTORY", format!("Failed to insert retry history for document {}: {}", document_id, e));
e
})?;
// Increment retry count // Increment retry count
crate::debug_log!("OCR_RETRY_HISTORY",
"document_id" => document_id,
"retry_id" => retry_id,
"message" => "Incrementing retry count"
);
sqlx::query( sqlx::query(
r#" r#"
UPDATE documents UPDATE documents
@ -82,7 +125,18 @@ pub async fn record_ocr_retry(
) )
.bind(document_id) .bind(document_id)
.execute(pool) .execute(pool)
.await?; .await
.map_err(|e| {
crate::debug_error!("OCR_RETRY_HISTORY", format!("Failed to increment retry count for document {}: {}", document_id, e));
e
})?;
crate::debug_log!("OCR_RETRY_HISTORY",
"document_id" => document_id,
"retry_id" => retry_id,
"user_id" => user_id,
"message" => "Successfully recorded retry history"
);
Ok(retry_id) Ok(retry_id)
} }

View File

@ -12,6 +12,7 @@ pub mod scheduling;
pub mod seed; pub mod seed;
pub mod services; pub mod services;
pub mod swagger; pub mod swagger;
pub mod utils;
pub mod webdav_xml_parser; pub mod webdav_xml_parser;
#[cfg(test)] #[cfg(test)]

View File

@ -129,6 +129,8 @@ pub struct Document {
pub ocr_status: Option<String>, pub ocr_status: Option<String>,
pub ocr_error: Option<String>, pub ocr_error: Option<String>,
pub ocr_completed_at: Option<DateTime<Utc>>, pub ocr_completed_at: Option<DateTime<Utc>>,
pub ocr_retry_count: Option<i32>,
pub ocr_failure_reason: Option<String>,
pub tags: Vec<String>, pub tags: Vec<String>,
pub created_at: DateTime<Utc>, pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>, pub updated_at: DateTime<Utc>,

View File

@ -75,6 +75,13 @@ impl OcrQueueService {
/// Add a document to the OCR queue /// Add a document to the OCR queue
pub async fn enqueue_document(&self, document_id: Uuid, priority: i32, file_size: i64) -> Result<Uuid> { pub async fn enqueue_document(&self, document_id: Uuid, priority: i32, file_size: i64) -> Result<Uuid> {
crate::debug_log!("OCR_QUEUE",
"document_id" => document_id,
"priority" => priority,
"file_size" => file_size,
"message" => "Enqueueing document"
);
let row = sqlx::query( let row = sqlx::query(
r#" r#"
INSERT INTO ocr_queue (document_id, priority, file_size) INSERT INTO ocr_queue (document_id, priority, file_size)
@ -86,10 +93,22 @@ impl OcrQueueService {
.bind(priority) .bind(priority)
.bind(file_size) .bind(file_size)
.fetch_one(&self.pool) .fetch_one(&self.pool)
.await?; .await
.map_err(|e| {
crate::debug_error!("OCR_QUEUE", format!("Failed to insert document {} into queue: {}", document_id, e));
e
})?;
let id: Uuid = row.get("id"); let id: Uuid = row.get("id");
crate::debug_log!("OCR_QUEUE",
"document_id" => document_id,
"queue_id" => id,
"priority" => priority,
"file_size" => file_size,
"message" => "Successfully enqueued document"
);
info!("Enqueued document {} with priority {} for OCR processing", document_id, priority); info!("Enqueued document {} with priority {} for OCR processing", document_id, priority);
Ok(id) Ok(id)
} }

View File

@ -571,28 +571,55 @@ async fn retry_ocr(
auth_user: AuthUser, auth_user: AuthUser,
Path(document_id): Path<uuid::Uuid>, Path(document_id): Path<uuid::Uuid>,
) -> Result<Json<serde_json::Value>, StatusCode> { ) -> Result<Json<serde_json::Value>, StatusCode> {
crate::debug_log!("OCR_RETRY",
"document_id" => document_id,
"user_id" => auth_user.user.id,
"message" => "Starting OCR retry request"
);
// Check if document exists and belongs to user // Check if document exists and belongs to user
let document = state let document = state
.db .db
.get_document_by_id(document_id, auth_user.user.id, auth_user.user.role) .get_document_by_id(document_id, auth_user.user.id, auth_user.user.role)
.await .await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)? .map_err(|e| {
.ok_or(StatusCode::NOT_FOUND)?; crate::debug_error!("OCR_RETRY", format!("Failed to get document {}: {}", document_id, e));
StatusCode::INTERNAL_SERVER_ERROR
})?
.ok_or_else(|| {
crate::debug_log!("OCR_RETRY", &format!("Document {} not found or access denied for user {}", document_id, auth_user.user.id));
StatusCode::NOT_FOUND
})?;
// Check if document is eligible for OCR retry (failed or not processed) // Check if document is eligible for OCR retry (all documents are now retryable)
let eligible = document.ocr_status.as_ref().map_or(true, |status| { let current_status = document.ocr_status.as_deref().unwrap_or("unknown");
status == "failed" || status == "pending" let eligible = true; // All documents are retryable
});
crate::debug_log!("OCR_RETRY",
"document_id" => document_id,
"filename" => &document.filename,
"current_status" => current_status,
"eligible" => eligible,
"file_size" => document.file_size,
"retry_count" => document.ocr_retry_count.unwrap_or(0),
"message" => "Checking document eligibility"
);
if !eligible { if !eligible {
crate::debug_log!("OCR_RETRY", &format!("Document {} is not eligible for retry - current status: {}", document_id, current_status));
return Ok(Json(serde_json::json!({ return Ok(Json(serde_json::json!({
"success": false, "success": false,
"message": "Document is not eligible for OCR retry. Current status: {}", "message": format!("Document is not eligible for OCR retry. Current status: {}", current_status),
"current_status": document.ocr_status "current_status": document.ocr_status
}))); })));
} }
// Reset document OCR fields // Reset document OCR fields
crate::debug_log!("OCR_RETRY",
"document_id" => document_id,
"message" => "Resetting document OCR fields"
);
let reset_result = sqlx::query( let reset_result = sqlx::query(
r#" r#"
UPDATE documents UPDATE documents
@ -611,12 +638,22 @@ async fn retry_ocr(
.bind(document_id) .bind(document_id)
.execute(state.db.get_pool()) .execute(state.db.get_pool())
.await .await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; .map_err(|e| {
crate::debug_error!("OCR_RETRY", format!("Failed to reset OCR fields for document {}: {}", document_id, e));
StatusCode::INTERNAL_SERVER_ERROR
})?;
if reset_result.rows_affected() == 0 { if reset_result.rows_affected() == 0 {
crate::debug_error!("OCR_RETRY", format!("No rows affected when resetting OCR fields for document {}", document_id));
return Err(StatusCode::NOT_FOUND); return Err(StatusCode::NOT_FOUND);
} }
crate::debug_log!("OCR_RETRY",
"document_id" => document_id,
"rows_affected" => reset_result.rows_affected(),
"message" => "Successfully reset OCR fields"
);
// Calculate priority based on file size (higher priority for retries) // Calculate priority based on file size (higher priority for retries)
let priority = match document.file_size { let priority = match document.file_size {
0..=1048576 => 15, // <= 1MB: highest priority (boosted for retry) 0..=1048576 => 15, // <= 1MB: highest priority (boosted for retry)
@ -626,10 +663,38 @@ async fn retry_ocr(
_ => 6, // > 50MB: lowest priority _ => 6, // > 50MB: lowest priority
}; };
crate::debug_log!("OCR_RETRY",
"document_id" => document_id,
"file_size" => document.file_size,
"priority" => priority,
"message" => "Calculated retry priority"
);
// Add to OCR queue with detailed logging // Add to OCR queue with detailed logging
crate::debug_log!("OCR_RETRY",
"document_id" => document_id,
"priority" => priority,
"file_size" => document.file_size,
"message" => "Enqueueing document for OCR processing"
);
match state.queue_service.enqueue_document(document_id, priority, document.file_size).await { match state.queue_service.enqueue_document(document_id, priority, document.file_size).await {
Ok(queue_id) => { Ok(queue_id) => {
crate::debug_log!("OCR_RETRY",
"document_id" => document_id,
"queue_id" => queue_id,
"priority" => priority,
"message" => "Successfully enqueued document"
);
// Record retry history // Record retry history
crate::debug_log!("OCR_RETRY",
"document_id" => document_id,
"user_id" => auth_user.user.id,
"queue_id" => queue_id,
"message" => "Recording retry history"
);
if let Err(e) = crate::db::ocr_retry::record_ocr_retry( if let Err(e) = crate::db::ocr_retry::record_ocr_retry(
state.db.get_pool(), state.db.get_pool(),
document_id, document_id,
@ -638,9 +703,25 @@ async fn retry_ocr(
priority, priority,
Some(queue_id), Some(queue_id),
).await { ).await {
crate::debug_error!("OCR_RETRY", format!("Failed to record retry history for document {}: {}", document_id, e));
tracing::warn!("Failed to record retry history for document {}: {}", document_id, e); tracing::warn!("Failed to record retry history for document {}: {}", document_id, e);
} else {
crate::debug_log!("OCR_RETRY",
"document_id" => document_id,
"queue_id" => queue_id,
"message" => "Successfully recorded retry history"
);
} }
crate::debug_log!("OCR_RETRY",
"document_id" => document_id,
"filename" => &document.filename,
"queue_id" => queue_id,
"priority" => priority,
"file_size" => document.file_size,
"message" => "OCR retry process completed successfully"
);
tracing::info!( tracing::info!(
"OCR retry queued for document {} ({}): queue_id={}, priority={}, size={}", "OCR retry queued for document {} ({}): queue_id={}, priority={}, size={}",
document_id, document.filename, queue_id, priority, document.file_size document_id, document.filename, queue_id, priority, document.file_size
@ -656,6 +737,7 @@ async fn retry_ocr(
}))) })))
} }
Err(e) => { Err(e) => {
crate::debug_error!("OCR_RETRY", format!("Failed to enqueue document {}: {}", document_id, e));
tracing::error!("Failed to queue OCR retry for document {}: {}", document_id, e); tracing::error!("Failed to queue OCR retry for document {}: {}", document_id, e);
Err(StatusCode::INTERNAL_SERVER_ERROR) Err(StatusCode::INTERNAL_SERVER_ERROR)
} }

View File

@ -101,62 +101,120 @@ pub async fn bulk_retry_ocr(
auth_user: AuthUser, auth_user: AuthUser,
Json(request): Json<BulkOcrRetryRequest>, Json(request): Json<BulkOcrRetryRequest>,
) -> Result<Json<BulkOcrRetryResponse>, StatusCode> { ) -> Result<Json<BulkOcrRetryResponse>, StatusCode> {
crate::debug_log!("BULK_OCR_RETRY",
"user_id" => auth_user.user.id,
"mode" => format!("{:?}", request.mode),
"preview_only" => request.preview_only.unwrap_or(false),
"priority_override" => request.priority_override.unwrap_or(-1),
"message" => "Starting bulk OCR retry request"
);
info!("Bulk OCR retry requested by user {} with mode: {:?}", auth_user.user.id, request.mode); info!("Bulk OCR retry requested by user {} with mode: {:?}", auth_user.user.id, request.mode);
let preview_only = request.preview_only.unwrap_or(false); let preview_only = request.preview_only.unwrap_or(false);
// Build query based on selection mode // Build query based on selection mode
crate::debug_log!("BULK_OCR_RETRY", "Building document query based on selection mode");
let documents = match request.mode { let documents = match request.mode {
SelectionMode::All => { SelectionMode::All => {
crate::debug_log!("BULK_OCR_RETRY", "Fetching all documents for retry");
get_all_failed_ocr_documents(&state, &auth_user).await? get_all_failed_ocr_documents(&state, &auth_user).await?
} }
SelectionMode::Specific => { SelectionMode::Specific => {
if let Some(ids) = request.document_ids { if let Some(ids) = &request.document_ids {
get_specific_documents(&state, &auth_user, ids).await? crate::debug_log!("BULK_OCR_RETRY",
"document_count" => ids.len(),
"message" => "Fetching specific documents"
);
get_specific_documents(&state, &auth_user, ids.clone()).await?
} else { } else {
crate::debug_error!("BULK_OCR_RETRY", "Specific mode requested but no document IDs provided");
return Err(StatusCode::BAD_REQUEST); return Err(StatusCode::BAD_REQUEST);
} }
} }
SelectionMode::Filter => { SelectionMode::Filter => {
if let Some(filter) = request.filter { if let Some(filter) = &request.filter {
get_filtered_documents(&state, &auth_user, filter).await? crate::debug_log!("BULK_OCR_RETRY",
"filter_mime_types" => filter.mime_types.as_ref().map(|v| v.len()).unwrap_or(0),
"filter_failure_reasons" => filter.failure_reasons.as_ref().map(|v| v.len()).unwrap_or(0),
"message" => "Fetching filtered documents"
);
get_filtered_documents(&state, &auth_user, filter.clone()).await?
} else { } else {
crate::debug_error!("BULK_OCR_RETRY", "Filter mode requested but no filter provided");
return Err(StatusCode::BAD_REQUEST); return Err(StatusCode::BAD_REQUEST);
} }
} }
}; };
let matched_count = documents.len(); let matched_count = documents.len();
crate::debug_log!("BULK_OCR_RETRY",
"matched_count" => matched_count,
"message" => "Document query completed"
);
let mut retry_documents = Vec::new(); let mut retry_documents = Vec::new();
let mut queued_count = 0; let mut queued_count = 0;
let mut total_estimated_time = 0.0; let mut total_estimated_time = 0.0;
for doc in documents { for (index, doc) in documents.iter().enumerate() {
let priority = calculate_priority(doc.file_size, request.priority_override); let priority = calculate_priority(doc.file_size, request.priority_override);
crate::debug_log!("BULK_OCR_RETRY",
"index" => index + 1,
"total" => matched_count,
"document_id" => doc.id,
"filename" => &doc.filename,
"file_size" => doc.file_size,
"priority" => priority,
"failure_reason" => doc.ocr_failure_reason.as_deref().unwrap_or("none"),
"message" => "Processing document"
);
let mut doc_info = OcrRetryDocumentInfo { let mut doc_info = OcrRetryDocumentInfo {
id: doc.id, id: doc.id,
filename: doc.filename.clone(), filename: doc.filename.clone(),
file_size: doc.file_size, file_size: doc.file_size,
mime_type: doc.mime_type, mime_type: doc.mime_type.clone(),
ocr_failure_reason: doc.ocr_failure_reason, ocr_failure_reason: doc.ocr_failure_reason.clone(),
priority, priority,
queue_id: None, queue_id: None,
}; };
if !preview_only { if !preview_only {
// Reset OCR fields // Reset OCR fields
crate::debug_log!("BULK_OCR_RETRY",
"document_id" => doc.id,
"message" => "Resetting OCR status for document"
);
if let Err(e) = reset_document_ocr_status(&state, doc.id).await { if let Err(e) = reset_document_ocr_status(&state, doc.id).await {
crate::debug_error!("BULK_OCR_RETRY", format!("Failed to reset OCR status for document {}: {}", doc.id, e));
warn!("Failed to reset OCR status for document {}: {}", doc.id, e); warn!("Failed to reset OCR status for document {}: {}", doc.id, e);
continue; continue;
} }
// Queue for OCR // Queue for OCR
crate::debug_log!("BULK_OCR_RETRY",
"document_id" => doc.id,
"priority" => priority,
"file_size" => doc.file_size,
"message" => "Enqueueing document for OCR"
);
match state.queue_service.enqueue_document(doc.id, priority, doc.file_size).await { match state.queue_service.enqueue_document(doc.id, priority, doc.file_size).await {
Ok(queue_id) => { Ok(queue_id) => {
doc_info.queue_id = Some(queue_id); doc_info.queue_id = Some(queue_id);
queued_count += 1; queued_count += 1;
crate::debug_log!("BULK_OCR_RETRY",
"document_id" => doc.id,
"queue_id" => queue_id,
"priority" => priority,
"queued_count" => queued_count,
"message" => "Successfully enqueued document"
);
// Record retry history // Record retry history
let retry_reason = match &request.mode { let retry_reason = match &request.mode {
SelectionMode::All => "bulk_retry_all", SelectionMode::All => "bulk_retry_all",
@ -164,6 +222,13 @@ pub async fn bulk_retry_ocr(
SelectionMode::Filter => "bulk_retry_filtered", SelectionMode::Filter => "bulk_retry_filtered",
}; };
crate::debug_log!("BULK_OCR_RETRY",
"document_id" => doc.id,
"retry_reason" => retry_reason,
"queue_id" => queue_id,
"message" => "Recording retry history"
);
if let Err(e) = crate::db::ocr_retry::record_ocr_retry( if let Err(e) = crate::db::ocr_retry::record_ocr_retry(
state.db.get_pool(), state.db.get_pool(),
doc.id, doc.id,
@ -172,12 +237,20 @@ pub async fn bulk_retry_ocr(
priority, priority,
Some(queue_id), Some(queue_id),
).await { ).await {
crate::debug_error!("BULK_OCR_RETRY", format!("Failed to record retry history for document {}: {}", doc.id, e));
warn!("Failed to record retry history for document {}: {}", doc.id, e); warn!("Failed to record retry history for document {}: {}", doc.id, e);
} else {
crate::debug_log!("BULK_OCR_RETRY",
"document_id" => doc.id,
"queue_id" => queue_id,
"message" => "Successfully recorded retry history"
);
} }
info!("Queued document {} for OCR retry with priority {}", doc.id, priority); info!("Queued document {} for OCR retry with priority {}", doc.id, priority);
} }
Err(e) => { Err(e) => {
crate::debug_error!("BULK_OCR_RETRY", format!("Failed to enqueue document {}: {}", doc.id, e));
error!("Failed to queue document {} for OCR retry: {}", doc.id, e); error!("Failed to queue document {} for OCR retry: {}", doc.id, e);
} }
} }
@ -188,6 +261,15 @@ pub async fn bulk_retry_ocr(
retry_documents.push(doc_info); retry_documents.push(doc_info);
} }
crate::debug_log!("BULK_OCR_RETRY",
"matched_count" => matched_count,
"queued_count" => queued_count,
"preview_only" => preview_only,
"estimated_time_minutes" => (total_estimated_time / 60.0) as i32,
"user_id" => auth_user.user.id,
"message" => "Bulk retry operation completed"
);
let response = BulkOcrRetryResponse { let response = BulkOcrRetryResponse {
success: true, success: true,
message: if preview_only { message: if preview_only {
@ -303,8 +385,7 @@ pub async fn get_ocr_retry_stats(
MIN(created_at) as first_occurrence, MIN(created_at) as first_occurrence,
MAX(updated_at) as last_occurrence MAX(updated_at) as last_occurrence
FROM documents FROM documents
WHERE ocr_status = 'failed' WHERE ($1::uuid IS NULL OR user_id = $1)
AND ($1::uuid IS NULL OR user_id = $1)
GROUP BY ocr_failure_reason GROUP BY ocr_failure_reason
ORDER BY count DESC ORDER BY count DESC
"# "#
@ -322,8 +403,7 @@ pub async fn get_ocr_retry_stats(
COUNT(*) as count, COUNT(*) as count,
AVG(file_size) as avg_file_size AVG(file_size) as avg_file_size
FROM documents FROM documents
WHERE ocr_status = 'failed' WHERE ($1::uuid IS NULL OR user_id = $1)
AND ($1::uuid IS NULL OR user_id = $1)
GROUP BY mime_type GROUP BY mime_type
ORDER BY count DESC ORDER BY count DESC
"# "#
@ -441,8 +521,7 @@ async fn get_all_failed_ocr_documents(
r#" r#"
SELECT id, filename, file_size, mime_type, ocr_failure_reason SELECT id, filename, file_size, mime_type, ocr_failure_reason
FROM documents FROM documents
WHERE ocr_status = 'failed' WHERE ($1::uuid IS NULL OR user_id = $1)
AND ($1::uuid IS NULL OR user_id = $1)
ORDER BY created_at DESC ORDER BY created_at DESC
"# "#
) )
@ -465,12 +544,33 @@ async fn get_specific_documents(
Some(auth_user.user.id) Some(auth_user.user.id)
}; };
// First let's debug what documents we're looking for and their current status
for doc_id in &document_ids {
if let Ok(Some(row)) = sqlx::query("SELECT id, filename, ocr_status FROM documents WHERE id = $1")
.bind(doc_id)
.fetch_optional(state.db.get_pool())
.await {
let status: Option<String> = row.get("ocr_status");
let filename: String = row.get("filename");
crate::debug_log!("BULK_OCR_RETRY",
"requested_document_id" => doc_id,
"filename" => &filename,
"current_ocr_status" => status.as_deref().unwrap_or("NULL"),
"message" => "Document found in database"
);
} else {
crate::debug_log!("BULK_OCR_RETRY",
"requested_document_id" => doc_id,
"message" => "Document NOT found in database"
);
}
}
let documents = sqlx::query_as::<_, DocumentInfo>( let documents = sqlx::query_as::<_, DocumentInfo>(
r#" r#"
SELECT id, filename, file_size, mime_type, ocr_failure_reason SELECT id, filename, file_size, mime_type, ocr_failure_reason
FROM documents FROM documents
WHERE id = ANY($1) WHERE id = ANY($1)
AND ocr_status = 'failed'
AND ($2::uuid IS NULL OR user_id = $2) AND ($2::uuid IS NULL OR user_id = $2)
"# "#
) )
@ -489,7 +589,7 @@ async fn get_filtered_documents(
filter: OcrRetryFilter filter: OcrRetryFilter
) -> Result<Vec<DocumentInfo>, StatusCode> { ) -> Result<Vec<DocumentInfo>, StatusCode> {
let mut query = sqlx::QueryBuilder::new( let mut query = sqlx::QueryBuilder::new(
"SELECT id, filename, file_size, mime_type, ocr_failure_reason FROM documents WHERE ocr_status = 'failed'" "SELECT id, filename, file_size, mime_type, ocr_failure_reason FROM documents WHERE 1=1"
); );
// User filter // User filter
@ -585,6 +685,7 @@ async fn reset_document_ocr_status(state: &Arc<AppState>, document_id: Uuid) ->
ocr_text = NULL, ocr_text = NULL,
ocr_error = NULL, ocr_error = NULL,
ocr_failure_reason = NULL, ocr_failure_reason = NULL,
ocr_retry_count = NULL,
ocr_confidence = NULL, ocr_confidence = NULL,
ocr_word_count = NULL, ocr_word_count = NULL,
ocr_processing_time_ms = NULL, ocr_processing_time_ms = NULL,

View File

@ -177,6 +177,8 @@ impl FileService {
ocr_status: Some("pending".to_string()), ocr_status: Some("pending".to_string()),
ocr_error: None, ocr_error: None,
ocr_completed_at: None, ocr_completed_at: None,
ocr_retry_count: None,
ocr_failure_reason: None,
tags: Vec::new(), tags: Vec::new(),
created_at: Utc::now(), created_at: Utc::now(),
updated_at: Utc::now(), updated_at: Utc::now(),

View File

@ -52,6 +52,8 @@ mod tests {
original_created_at: None, original_created_at: None,
original_modified_at: None, original_modified_at: None,
source_metadata: None, source_metadata: None,
ocr_retry_count: None,
ocr_failure_reason: None,
} }
} }

View File

@ -103,6 +103,8 @@ mod tests {
original_created_at: Some(DateTime::parse_from_rfc3339("2023-12-01T10:00:00Z").unwrap().with_timezone(&Utc)), original_created_at: Some(DateTime::parse_from_rfc3339("2023-12-01T10:00:00Z").unwrap().with_timezone(&Utc)),
original_modified_at: Some(DateTime::parse_from_rfc3339("2023-12-15T15:30:00Z").unwrap().with_timezone(&Utc)), original_modified_at: Some(DateTime::parse_from_rfc3339("2023-12-15T15:30:00Z").unwrap().with_timezone(&Utc)),
source_metadata: Some(serde_json::json!({"permissions": "644", "owner": "user1"})), source_metadata: Some(serde_json::json!({"permissions": "644", "owner": "user1"})),
ocr_retry_count: None,
ocr_failure_reason: None,
}; };
// Convert to DocumentResponse // Convert to DocumentResponse

View File

@ -63,6 +63,8 @@ mod document_routes_deletion_tests {
original_created_at: None, original_created_at: None,
original_modified_at: None, original_modified_at: None,
source_metadata: None, source_metadata: None,
ocr_retry_count: None,
ocr_failure_reason: None,
} }
} }
@ -400,6 +402,8 @@ mod document_routes_deletion_tests {
original_created_at: None, original_created_at: None,
original_modified_at: None, original_modified_at: None,
source_metadata: None, source_metadata: None,
ocr_retry_count: None,
ocr_failure_reason: None,
} }
} }

View File

@ -29,6 +29,8 @@ fn create_test_document(user_id: Uuid) -> Document {
original_created_at: None, original_created_at: None,
original_modified_at: None, original_modified_at: None,
source_metadata: None, source_metadata: None,
ocr_retry_count: None,
ocr_failure_reason: None,
} }
} }
@ -57,6 +59,8 @@ fn create_test_document_without_ocr(user_id: Uuid) -> Document {
original_created_at: None, original_created_at: None,
original_modified_at: None, original_modified_at: None,
source_metadata: None, source_metadata: None,
ocr_retry_count: None,
ocr_failure_reason: None,
} }
} }
@ -85,6 +89,8 @@ fn create_test_document_with_ocr_error(user_id: Uuid) -> Document {
original_created_at: None, original_created_at: None,
original_modified_at: None, original_modified_at: None,
source_metadata: None, source_metadata: None,
ocr_retry_count: None,
ocr_failure_reason: None,
} }
} }
@ -1564,6 +1570,8 @@ mod deletion_error_handling_tests {
original_created_at: None, original_created_at: None,
original_modified_at: None, original_modified_at: None,
source_metadata: None, source_metadata: None,
ocr_retry_count: None,
ocr_failure_reason: None,
} }
} }

View File

@ -942,6 +942,8 @@ mod tests {
original_created_at: None, original_created_at: None,
original_modified_at: None, original_modified_at: None,
source_metadata: None, source_metadata: None,
ocr_retry_count: None,
ocr_failure_reason: None,
}; };
db.create_document(document).await.unwrap(); db.create_document(document).await.unwrap();

View File

@ -195,6 +195,8 @@ mod file_deletion_tests {
original_created_at: None, original_created_at: None,
original_modified_at: None, original_modified_at: None,
source_metadata: None, source_metadata: None,
ocr_retry_count: None,
ocr_failure_reason: None,
}; };
( (
@ -333,6 +335,8 @@ mod file_deletion_tests {
original_created_at: None, original_created_at: None,
original_modified_at: None, original_modified_at: None,
source_metadata: None, source_metadata: None,
ocr_retry_count: None,
ocr_failure_reason: None,
}; };
// Try to delete nonexistent files (should not fail) // Try to delete nonexistent files (should not fail)
@ -387,6 +391,8 @@ mod file_deletion_tests {
original_created_at: None, original_created_at: None,
original_modified_at: None, original_modified_at: None,
source_metadata: None, source_metadata: None,
ocr_retry_count: None,
ocr_failure_reason: None,
}; };
// Verify files exist // Verify files exist
@ -445,6 +451,8 @@ mod file_deletion_tests {
original_created_at: None, original_created_at: None,
original_modified_at: None, original_modified_at: None,
source_metadata: None, source_metadata: None,
ocr_retry_count: None,
ocr_failure_reason: None,
}; };
// Verify files exist // Verify files exist
@ -494,6 +502,8 @@ mod file_deletion_tests {
original_created_at: None, original_created_at: None,
original_modified_at: None, original_modified_at: None,
source_metadata: None, source_metadata: None,
ocr_retry_count: None,
ocr_failure_reason: None,
}; };
// Verify file exists // Verify file exists

View File

@ -84,6 +84,8 @@ mod tests {
original_created_at: None, original_created_at: None,
original_modified_at: None, original_modified_at: None,
source_metadata: None, source_metadata: None,
ocr_retry_count: None,
ocr_failure_reason: None,
}; };
sqlx::query("INSERT INTO documents (id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19)") sqlx::query("INSERT INTO documents (id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19)")

85
src/utils/debug.rs Normal file
View File

@ -0,0 +1,85 @@
use std::env;
use tracing::{debug, info, warn, error};
/// Check if DEBUG environment variable is set to enable verbose debug output
pub fn is_debug_enabled() -> bool {
env::var("DEBUG")
.map(|val| !val.is_empty() && val != "0" && val.to_lowercase() != "false")
.unwrap_or(false)
}
/// Log debug message only if DEBUG environment variable is set
pub fn debug_log(message: &str) {
if is_debug_enabled() {
info!("🐛 DEBUG: {}", message);
}
}
/// Log debug message with context only if DEBUG environment variable is set
pub fn debug_log_context(context: &str, message: &str) {
if is_debug_enabled() {
info!("🐛 DEBUG [{}]: {}", context, message);
}
}
/// Log debug message with structured data only if DEBUG environment variable is set
pub fn debug_log_structured(context: &str, key_values: &[(&str, &dyn std::fmt::Display)]) {
if is_debug_enabled() {
let mut formatted = String::new();
for (i, (key, value)) in key_values.iter().enumerate() {
if i > 0 {
formatted.push_str(", ");
}
formatted.push_str(&format!("{}={}", key, value));
}
info!("🐛 DEBUG [{}]: {}", context, formatted);
}
}
/// Log error with debug context
pub fn debug_error(context: &str, error: &dyn std::fmt::Display) {
if is_debug_enabled() {
error!("🐛 DEBUG ERROR [{}]: {}", context, error);
} else {
error!("[{}]: {}", context, error);
}
}
/// Log warning with debug context
pub fn debug_warn(context: &str, message: &str) {
if is_debug_enabled() {
warn!("🐛 DEBUG WARN [{}]: {}", context, message);
} else {
warn!("[{}]: {}", context, message);
}
}
/// Macro for easier debug logging with automatic context
#[macro_export]
macro_rules! debug_log {
($msg:expr) => {
crate::utils::debug::debug_log($msg)
};
($context:expr, $msg:expr) => {
crate::utils::debug::debug_log_context($context, $msg)
};
($context:expr, $($key:expr => $value:expr),+ $(,)?) => {
crate::utils::debug::debug_log_structured($context, &[$(($key, &$value)),+])
};
}
/// Macro for debug error logging
#[macro_export]
macro_rules! debug_error {
($context:expr, $error:expr) => {
crate::utils::debug::debug_error($context, &$error)
};
}
/// Macro for debug warning logging
#[macro_export]
macro_rules! debug_warn {
($context:expr, $msg:expr) => {
crate::utils::debug::debug_warn($context, $msg)
};
}

1
src/utils/mod.rs Normal file
View File

@ -0,0 +1 @@
pub mod debug;

View File

@ -109,6 +109,13 @@ async fn debug_ocr_content() {
.await .await
.expect("Upload should work"); .expect("Upload should work");
println!("📤 Document 1 upload response status: {}", doc1_response.status());
if !doc1_response.status().is_success() {
let status = doc1_response.status();
let error_text = doc1_response.text().await.unwrap_or_else(|_| "No response body".to_string());
panic!("Document 1 upload failed with status {}: {}", status, error_text);
}
let doc2_response = client let doc2_response = client
.post(&format!("{}/api/documents", get_base_url())) .post(&format!("{}/api/documents", get_base_url()))
.header("Authorization", format!("Bearer {}", token)) .header("Authorization", format!("Bearer {}", token))
@ -117,8 +124,15 @@ async fn debug_ocr_content() {
.await .await
.expect("Upload should work"); .expect("Upload should work");
let doc1: DocumentResponse = doc1_response.json().await.expect("Valid JSON"); println!("📤 Document 2 upload response status: {}", doc2_response.status());
let doc2: DocumentResponse = doc2_response.json().await.expect("Valid JSON"); if !doc2_response.status().is_success() {
let status = doc2_response.status();
let error_text = doc2_response.text().await.unwrap_or_else(|_| "No response body".to_string());
panic!("Document 2 upload failed with status {}: {}", status, error_text);
}
let doc1: DocumentResponse = doc1_response.json().await.expect("Valid JSON for doc1");
let doc2: DocumentResponse = doc2_response.json().await.expect("Valid JSON for doc2");
println!("📄 Document 1: {}", doc1.id); println!("📄 Document 1: {}", doc1.id);
println!("📄 Document 2: {}", doc2.id); println!("📄 Document 2: {}", doc2.id);

View File

@ -36,6 +36,8 @@ fn create_test_document(user_id: Uuid, filename: &str, file_hash: String) -> Doc
ocr_status: Some("pending".to_string()), ocr_status: Some("pending".to_string()),
ocr_error: None, ocr_error: None,
ocr_completed_at: None, ocr_completed_at: None,
ocr_retry_count: None,
ocr_failure_reason: None,
tags: Vec::new(), tags: Vec::new(),
created_at: Utc::now(), created_at: Utc::now(),
updated_at: Utc::now(), updated_at: Utc::now(),

View File

@ -54,6 +54,8 @@ fn create_test_document(user_id: Uuid, filename: &str, file_hash: Option<String>
ocr_status: Some("pending".to_string()), ocr_status: Some("pending".to_string()),
ocr_error: None, ocr_error: None,
ocr_completed_at: None, ocr_completed_at: None,
ocr_retry_count: None,
ocr_failure_reason: None,
tags: Vec::new(), tags: Vec::new(),
created_at: Utc::now(), created_at: Utc::now(),
updated_at: Utc::now(), updated_at: Utc::now(),

View File

@ -351,6 +351,8 @@ async fn test_create_ignored_file_from_document() -> Result<()> {
ocr_status: Some("completed".to_string()), ocr_status: Some("completed".to_string()),
ocr_error: None, ocr_error: None,
ocr_completed_at: Some(chrono::Utc::now()), ocr_completed_at: Some(chrono::Utc::now()),
ocr_retry_count: None,
ocr_failure_reason: None,
tags: vec!["test".to_string()], tags: vec!["test".to_string()],
created_at: chrono::Utc::now(), created_at: chrono::Utc::now(),
updated_at: chrono::Utc::now(), updated_at: chrono::Utc::now(),

View File

@ -54,6 +54,8 @@ fn create_test_document(user_id: Uuid, filename: &str, file_hash: String) -> Doc
ocr_status: Some("pending".to_string()), ocr_status: Some("pending".to_string()),
ocr_error: None, ocr_error: None,
ocr_completed_at: None, ocr_completed_at: None,
ocr_retry_count: None,
ocr_failure_reason: None,
tags: Vec::new(), tags: Vec::new(),
created_at: Utc::now(), created_at: Utc::now(),
updated_at: Utc::now(), updated_at: Utc::now(),

View File

@ -54,6 +54,8 @@ fn create_test_document(user_id: Uuid, filename: &str, file_hash: String) -> Doc
ocr_status: Some("pending".to_string()), ocr_status: Some("pending".to_string()),
ocr_error: None, ocr_error: None,
ocr_completed_at: None, ocr_completed_at: None,
ocr_retry_count: None,
ocr_failure_reason: None,
tags: Vec::new(), tags: Vec::new(),
created_at: Utc::now(), created_at: Utc::now(),
updated_at: Utc::now(), updated_at: Utc::now(),

View File

@ -20,6 +20,8 @@ fn test_document_response_conversion_with_ocr() {
ocr_status: Some("completed".to_string()), ocr_status: Some("completed".to_string()),
ocr_error: None, ocr_error: None,
ocr_completed_at: Some(Utc::now()), ocr_completed_at: Some(Utc::now()),
ocr_retry_count: None,
ocr_failure_reason: None,
tags: vec!["test".to_string()], tags: vec!["test".to_string()],
created_at: Utc::now(), created_at: Utc::now(),
updated_at: Utc::now(), updated_at: Utc::now(),
@ -57,6 +59,8 @@ fn test_document_response_conversion_without_ocr() {
ocr_status: Some("pending".to_string()), ocr_status: Some("pending".to_string()),
ocr_error: None, ocr_error: None,
ocr_completed_at: None, ocr_completed_at: None,
ocr_retry_count: None,
ocr_failure_reason: None,
tags: vec![], tags: vec![],
created_at: Utc::now(), created_at: Utc::now(),
updated_at: Utc::now(), updated_at: Utc::now(),