Merge pull request #89 from readur/feat/setup-debug-functionality
feat(debug): setup debug functionality
This commit is contained in:
commit
9034bf5d70
|
|
@ -19,6 +19,7 @@ import {
|
|||
Dialog,
|
||||
DialogTitle,
|
||||
DialogContent,
|
||||
DialogContentText,
|
||||
DialogActions,
|
||||
Pagination,
|
||||
CircularProgress,
|
||||
|
|
@ -233,6 +234,7 @@ const DocumentManagementPage: React.FC = () => {
|
|||
const [retryHistoryModalOpen, setRetryHistoryModalOpen] = useState(false);
|
||||
const [selectedDocumentForHistory, setSelectedDocumentForHistory] = useState<string | null>(null);
|
||||
const [selectedDocumentIds, setSelectedDocumentIds] = useState<string[]>([]);
|
||||
const [confirmRetryAllOpen, setConfirmRetryAllOpen] = useState(false);
|
||||
|
||||
const fetchFailedDocuments = async () => {
|
||||
try {
|
||||
|
|
@ -358,6 +360,42 @@ const DocumentManagementPage: React.FC = () => {
|
|||
}
|
||||
};
|
||||
|
||||
const handleRetryAllDocuments = async () => {
|
||||
try {
|
||||
setRetryingAll(true);
|
||||
const response = await documentService.bulkRetryOcr({
|
||||
mode: 'all',
|
||||
preview_only: false
|
||||
});
|
||||
|
||||
if (response.data.queued_count > 0) {
|
||||
setSnackbar({
|
||||
open: true,
|
||||
message: `Successfully queued ${response.data.queued_count} documents for OCR retry. Estimated processing time: ${Math.ceil(response.data.estimated_total_time_minutes)} minutes.`,
|
||||
severity: 'success'
|
||||
});
|
||||
|
||||
// Refresh all tabs since we're retrying all documents
|
||||
await refreshCurrentTab();
|
||||
} else {
|
||||
setSnackbar({
|
||||
open: true,
|
||||
message: 'No documents found to retry',
|
||||
severity: 'info'
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error retrying all documents:', error);
|
||||
setSnackbar({
|
||||
open: true,
|
||||
message: 'Failed to retry documents. Please try again.',
|
||||
severity: 'error'
|
||||
});
|
||||
} finally {
|
||||
setRetryingAll(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleRetryAllFailed = async () => {
|
||||
try {
|
||||
setRetryingAll(true);
|
||||
|
|
@ -735,14 +773,33 @@ const DocumentManagementPage: React.FC = () => {
|
|||
<Typography variant="h4" component="h1">
|
||||
Document Management
|
||||
</Typography>
|
||||
<Button
|
||||
variant="outlined"
|
||||
startIcon={<RefreshIcon />}
|
||||
onClick={refreshCurrentTab}
|
||||
disabled={loading || duplicatesLoading || retryingAll}
|
||||
>
|
||||
Refresh
|
||||
</Button>
|
||||
<Box display="flex" gap={2}>
|
||||
<Button
|
||||
variant="contained"
|
||||
color="primary"
|
||||
size="large"
|
||||
startIcon={retryingAll ? <CircularProgress size={20} color="inherit" /> : <RefreshIcon />}
|
||||
onClick={() => setConfirmRetryAllOpen(true)}
|
||||
disabled={retryingAll}
|
||||
sx={{
|
||||
minWidth: 200,
|
||||
boxShadow: 3,
|
||||
'&:hover': {
|
||||
boxShadow: 6,
|
||||
}
|
||||
}}
|
||||
>
|
||||
{retryingAll ? 'Retrying All...' : 'Retry All Documents'}
|
||||
</Button>
|
||||
<Button
|
||||
variant="outlined"
|
||||
startIcon={<RefreshIcon />}
|
||||
onClick={refreshCurrentTab}
|
||||
disabled={loading || duplicatesLoading || retryingAll}
|
||||
>
|
||||
Refresh
|
||||
</Button>
|
||||
</Box>
|
||||
</Box>
|
||||
|
||||
<Paper sx={{ mb: 3, borderRadius: 2, overflow: 'hidden' }}>
|
||||
|
|
@ -825,7 +882,7 @@ const DocumentManagementPage: React.FC = () => {
|
|||
size="small"
|
||||
fullWidth
|
||||
>
|
||||
{retryingAll ? 'Retrying All...' : 'Retry All Failed OCR'}
|
||||
{retryingAll ? 'Retrying...' : 'Retry Failed Only'}
|
||||
</Button>
|
||||
</Box>
|
||||
</CardContent>
|
||||
|
|
@ -2219,6 +2276,43 @@ const DocumentManagementPage: React.FC = () => {
|
|||
</DialogActions>
|
||||
</Dialog>
|
||||
|
||||
{/* Confirm Retry All Documents Dialog */}
|
||||
<Dialog open={confirmRetryAllOpen} onClose={() => setConfirmRetryAllOpen(false)}>
|
||||
<DialogTitle>
|
||||
<Box display="flex" alignItems="center">
|
||||
<RefreshIcon sx={{ mr: 1, color: 'primary.main' }} />
|
||||
Retry All Documents
|
||||
</Box>
|
||||
</DialogTitle>
|
||||
<DialogContent>
|
||||
<DialogContentText>
|
||||
This will retry OCR processing for <strong>all documents</strong> in your library, regardless of their current OCR status.
|
||||
This includes documents that have already been successfully processed.
|
||||
</DialogContentText>
|
||||
<Box sx={{ mt: 2, p: 2, bgcolor: 'warning.light', borderRadius: 1 }}>
|
||||
<Typography variant="body2" color="warning.dark">
|
||||
<strong>Note:</strong> This is a resource-intensive operation that may take a significant amount of time depending on the number of documents.
|
||||
</Typography>
|
||||
</Box>
|
||||
</DialogContent>
|
||||
<DialogActions>
|
||||
<Button onClick={() => setConfirmRetryAllOpen(false)}>
|
||||
Cancel
|
||||
</Button>
|
||||
<Button
|
||||
onClick={() => {
|
||||
setConfirmRetryAllOpen(false);
|
||||
handleRetryAllDocuments();
|
||||
}}
|
||||
variant="contained"
|
||||
color="primary"
|
||||
startIcon={<RefreshIcon />}
|
||||
>
|
||||
Retry All Documents
|
||||
</Button>
|
||||
</DialogActions>
|
||||
</Dialog>
|
||||
|
||||
{/* Advanced Retry Modal */}
|
||||
<BulkRetryModal
|
||||
open={bulkRetryModalOpen}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,9 @@
|
|||
-- Add OCR retry tracking fields to documents table
|
||||
-- These fields were added to the Document struct but missing from the database schema
|
||||
|
||||
ALTER TABLE documents ADD COLUMN IF NOT EXISTS ocr_retry_count INTEGER DEFAULT 0;
|
||||
ALTER TABLE documents ADD COLUMN IF NOT EXISTS ocr_failure_reason TEXT DEFAULT NULL;
|
||||
|
||||
-- Add helpful comments
|
||||
COMMENT ON COLUMN documents.ocr_retry_count IS 'Number of times OCR processing has been retried for this document';
|
||||
COMMENT ON COLUMN documents.ocr_failure_reason IS 'Reason for the most recent OCR failure, if any';
|
||||
|
|
@ -10,9 +10,9 @@ impl Database {
|
|||
pub async fn create_document(&self, document: Document) -> Result<Document> {
|
||||
let row = sqlx::query(
|
||||
r#"
|
||||
INSERT INTO documents (id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22)
|
||||
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
INSERT INTO documents (id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24)
|
||||
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
"#
|
||||
)
|
||||
.bind(document.id)
|
||||
|
|
@ -29,6 +29,8 @@ impl Database {
|
|||
.bind(&document.ocr_status)
|
||||
.bind(&document.ocr_error)
|
||||
.bind(document.ocr_completed_at)
|
||||
.bind(document.ocr_retry_count)
|
||||
.bind(&document.ocr_failure_reason)
|
||||
.bind(&document.tags)
|
||||
.bind(document.created_at)
|
||||
.bind(document.updated_at)
|
||||
|
|
@ -55,6 +57,8 @@ impl Database {
|
|||
ocr_status: row.get("ocr_status"),
|
||||
ocr_error: row.get("ocr_error"),
|
||||
ocr_completed_at: row.get("ocr_completed_at"),
|
||||
ocr_retry_count: row.get("ocr_retry_count"),
|
||||
ocr_failure_reason: row.get("ocr_failure_reason"),
|
||||
tags: row.get("tags"),
|
||||
created_at: row.get("created_at"),
|
||||
updated_at: row.get("updated_at"),
|
||||
|
|
@ -70,7 +74,7 @@ impl Database {
|
|||
let query = if user_role == crate::models::UserRole::Admin {
|
||||
// Admins can see all documents
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
ORDER BY created_at DESC
|
||||
LIMIT $1 OFFSET $2
|
||||
|
|
@ -78,7 +82,7 @@ impl Database {
|
|||
} else {
|
||||
// Regular users can only see their own documents
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE user_id = $3
|
||||
ORDER BY created_at DESC
|
||||
|
|
@ -118,6 +122,8 @@ impl Database {
|
|||
ocr_status: row.get("ocr_status"),
|
||||
ocr_error: row.get("ocr_error"),
|
||||
ocr_completed_at: row.get("ocr_completed_at"),
|
||||
ocr_retry_count: row.get("ocr_retry_count"),
|
||||
ocr_failure_reason: row.get("ocr_failure_reason"),
|
||||
tags: row.get("tags"),
|
||||
created_at: row.get("created_at"),
|
||||
updated_at: row.get("updated_at"),
|
||||
|
|
@ -138,7 +144,7 @@ impl Database {
|
|||
// Admin with OCR filter
|
||||
sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE ocr_status = $3
|
||||
ORDER BY created_at DESC
|
||||
|
|
@ -155,7 +161,7 @@ impl Database {
|
|||
// Admin without OCR filter
|
||||
sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
ORDER BY created_at DESC
|
||||
LIMIT $1 OFFSET $2
|
||||
|
|
@ -170,7 +176,7 @@ impl Database {
|
|||
// Regular user with OCR filter
|
||||
sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE user_id = $3 AND ocr_status = $4
|
||||
ORDER BY created_at DESC
|
||||
|
|
@ -188,7 +194,7 @@ impl Database {
|
|||
// Regular user without OCR filter
|
||||
sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE user_id = $3
|
||||
ORDER BY created_at DESC
|
||||
|
|
@ -220,6 +226,8 @@ impl Database {
|
|||
ocr_status: row.get("ocr_status"),
|
||||
ocr_error: row.get("ocr_error"),
|
||||
ocr_completed_at: row.get("ocr_completed_at"),
|
||||
ocr_retry_count: row.get("ocr_retry_count"),
|
||||
ocr_failure_reason: row.get("ocr_failure_reason"),
|
||||
tags: row.get("tags"),
|
||||
created_at: row.get("created_at"),
|
||||
updated_at: row.get("updated_at"),
|
||||
|
|
@ -280,7 +288,7 @@ impl Database {
|
|||
pub async fn get_documents_by_user(&self, user_id: Uuid, limit: i64, offset: i64) -> Result<Vec<Document>> {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE user_id = $1
|
||||
ORDER BY created_at DESC
|
||||
|
|
@ -310,6 +318,8 @@ impl Database {
|
|||
ocr_status: row.get("ocr_status"),
|
||||
ocr_error: row.get("ocr_error"),
|
||||
ocr_completed_at: row.get("ocr_completed_at"),
|
||||
ocr_retry_count: row.get("ocr_retry_count"),
|
||||
ocr_failure_reason: row.get("ocr_failure_reason"),
|
||||
tags: row.get("tags"),
|
||||
created_at: row.get("created_at"),
|
||||
updated_at: row.get("updated_at"),
|
||||
|
|
@ -327,7 +337,7 @@ impl Database {
|
|||
pub async fn find_documents_by_filename(&self, filename: &str) -> Result<Vec<Document>> {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE filename = $1 OR original_filename = $1
|
||||
ORDER BY created_at DESC
|
||||
|
|
@ -354,6 +364,8 @@ impl Database {
|
|||
ocr_status: row.get("ocr_status"),
|
||||
ocr_error: row.get("ocr_error"),
|
||||
ocr_completed_at: row.get("ocr_completed_at"),
|
||||
ocr_retry_count: row.get("ocr_retry_count"),
|
||||
ocr_failure_reason: row.get("ocr_failure_reason"),
|
||||
tags: row.get("tags"),
|
||||
created_at: row.get("created_at"),
|
||||
updated_at: row.get("updated_at"),
|
||||
|
|
@ -371,7 +383,7 @@ impl Database {
|
|||
pub async fn search_documents(&self, user_id: Uuid, search: SearchRequest) -> Result<(Vec<Document>, i64)> {
|
||||
let mut query_builder = QueryBuilder::new(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
|
||||
ts_rank(to_tsvector('english', COALESCE(content, '') || ' ' || COALESCE(ocr_text, '')), plainto_tsquery('english', "#
|
||||
);
|
||||
|
||||
|
|
@ -428,6 +440,8 @@ impl Database {
|
|||
ocr_status: row.get("ocr_status"),
|
||||
ocr_error: row.get("ocr_error"),
|
||||
ocr_completed_at: row.get("ocr_completed_at"),
|
||||
ocr_retry_count: row.get("ocr_retry_count"),
|
||||
ocr_failure_reason: row.get("ocr_failure_reason"),
|
||||
tags: row.get("tags"),
|
||||
created_at: row.get("created_at"),
|
||||
updated_at: row.get("updated_at"),
|
||||
|
|
@ -477,7 +491,7 @@ impl Database {
|
|||
// Use trigram similarity for substring matching
|
||||
let mut builder = QueryBuilder::new(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
|
||||
GREATEST(
|
||||
similarity(filename, "#
|
||||
);
|
||||
|
|
@ -520,7 +534,7 @@ impl Database {
|
|||
|
||||
let mut builder = QueryBuilder::new(&format!(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
|
||||
GREATEST(
|
||||
CASE WHEN filename ILIKE '%' || "#
|
||||
));
|
||||
|
|
@ -666,7 +680,7 @@ impl Database {
|
|||
// Use trigram similarity for substring matching
|
||||
let mut builder = QueryBuilder::new(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
|
||||
GREATEST(
|
||||
similarity(filename, "#
|
||||
);
|
||||
|
|
@ -705,7 +719,7 @@ impl Database {
|
|||
|
||||
let mut builder = QueryBuilder::new(&format!(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
|
||||
GREATEST(
|
||||
CASE WHEN filename ILIKE '%' || "#
|
||||
));
|
||||
|
|
@ -982,7 +996,7 @@ impl Database {
|
|||
|
||||
pub async fn get_recent_documents_for_source(&self, source_id: Uuid, limit: i64) -> Result<Vec<Document>> {
|
||||
let rows = sqlx::query(
|
||||
r#"SELECT * FROM documents
|
||||
r#"SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata FROM documents
|
||||
WHERE source_id = $1
|
||||
ORDER BY created_at DESC
|
||||
LIMIT $2"#
|
||||
|
|
@ -1009,6 +1023,8 @@ impl Database {
|
|||
ocr_status: row.get("ocr_status"),
|
||||
ocr_error: row.get("ocr_error"),
|
||||
ocr_completed_at: row.get("ocr_completed_at"),
|
||||
ocr_retry_count: row.get("ocr_retry_count"),
|
||||
ocr_failure_reason: row.get("ocr_failure_reason"),
|
||||
tags: row.get("tags"),
|
||||
created_at: row.get("created_at"),
|
||||
updated_at: row.get("updated_at"),
|
||||
|
|
@ -1105,14 +1121,14 @@ impl Database {
|
|||
let query = if user_role == crate::models::UserRole::Admin {
|
||||
// Admins can see any document
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE id = $1
|
||||
"#
|
||||
} else {
|
||||
// Regular users can only see their own documents
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE id = $1 AND user_id = $2
|
||||
"#
|
||||
|
|
@ -1147,6 +1163,8 @@ impl Database {
|
|||
ocr_status: row.get("ocr_status"),
|
||||
ocr_error: row.get("ocr_error"),
|
||||
ocr_completed_at: row.get("ocr_completed_at"),
|
||||
ocr_retry_count: row.get("ocr_retry_count"),
|
||||
ocr_failure_reason: row.get("ocr_failure_reason"),
|
||||
tags: row.get("tags"),
|
||||
created_at: row.get("created_at"),
|
||||
updated_at: row.get("updated_at"),
|
||||
|
|
@ -1164,7 +1182,7 @@ impl Database {
|
|||
pub async fn get_document_by_user_and_hash(&self, user_id: Uuid, file_hash: &str) -> Result<Option<Document>> {
|
||||
let row = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE user_id = $1 AND file_hash = $2
|
||||
LIMIT 1
|
||||
|
|
@ -1191,6 +1209,8 @@ impl Database {
|
|||
ocr_status: row.get("ocr_status"),
|
||||
ocr_error: row.get("ocr_error"),
|
||||
ocr_completed_at: row.get("ocr_completed_at"),
|
||||
ocr_retry_count: row.get("ocr_retry_count"),
|
||||
ocr_failure_reason: row.get("ocr_failure_reason"),
|
||||
tags: row.get("tags"),
|
||||
created_at: row.get("created_at"),
|
||||
updated_at: row.get("updated_at"),
|
||||
|
|
@ -1396,7 +1416,7 @@ impl Database {
|
|||
r#"
|
||||
DELETE FROM documents
|
||||
WHERE id = $1
|
||||
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
"#,
|
||||
)
|
||||
.bind(document_id)
|
||||
|
|
@ -1418,6 +1438,8 @@ impl Database {
|
|||
ocr_status: r.get("ocr_status"),
|
||||
ocr_error: r.get("ocr_error"),
|
||||
ocr_completed_at: r.get("ocr_completed_at"),
|
||||
ocr_retry_count: r.get("ocr_retry_count"),
|
||||
ocr_failure_reason: r.get("ocr_failure_reason"),
|
||||
tags: r.get("tags"),
|
||||
created_at: r.get("created_at"),
|
||||
updated_at: r.get("updated_at"),
|
||||
|
|
@ -1432,7 +1454,7 @@ impl Database {
|
|||
r#"
|
||||
DELETE FROM documents
|
||||
WHERE id = $1 AND user_id = $2
|
||||
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
"#,
|
||||
)
|
||||
.bind(document_id)
|
||||
|
|
@ -1455,6 +1477,8 @@ impl Database {
|
|||
ocr_status: r.get("ocr_status"),
|
||||
ocr_error: r.get("ocr_error"),
|
||||
ocr_completed_at: r.get("ocr_completed_at"),
|
||||
ocr_retry_count: r.get("ocr_retry_count"),
|
||||
ocr_failure_reason: r.get("ocr_failure_reason"),
|
||||
tags: r.get("tags"),
|
||||
created_at: r.get("created_at"),
|
||||
updated_at: r.get("updated_at"),
|
||||
|
|
@ -1479,7 +1503,7 @@ impl Database {
|
|||
r#"
|
||||
DELETE FROM documents
|
||||
WHERE id = ANY($1)
|
||||
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
"#,
|
||||
)
|
||||
.bind(document_ids)
|
||||
|
|
@ -1501,6 +1525,8 @@ impl Database {
|
|||
ocr_status: r.get("ocr_status"),
|
||||
ocr_error: r.get("ocr_error"),
|
||||
ocr_completed_at: r.get("ocr_completed_at"),
|
||||
ocr_retry_count: r.get("ocr_retry_count"),
|
||||
ocr_failure_reason: r.get("ocr_failure_reason"),
|
||||
tags: r.get("tags"),
|
||||
created_at: r.get("created_at"),
|
||||
updated_at: r.get("updated_at"),
|
||||
|
|
@ -1515,7 +1541,7 @@ impl Database {
|
|||
r#"
|
||||
DELETE FROM documents
|
||||
WHERE id = ANY($1) AND user_id = $2
|
||||
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
"#,
|
||||
)
|
||||
.bind(document_ids)
|
||||
|
|
@ -1538,6 +1564,8 @@ impl Database {
|
|||
ocr_status: r.get("ocr_status"),
|
||||
ocr_error: r.get("ocr_error"),
|
||||
ocr_completed_at: r.get("ocr_completed_at"),
|
||||
ocr_retry_count: r.get("ocr_retry_count"),
|
||||
ocr_failure_reason: r.get("ocr_failure_reason"),
|
||||
tags: r.get("tags"),
|
||||
created_at: r.get("created_at"),
|
||||
updated_at: r.get("updated_at"),
|
||||
|
|
@ -1557,7 +1585,7 @@ impl Database {
|
|||
let documents = if user_role == crate::models::UserRole::Admin {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE ocr_confidence IS NOT NULL AND ocr_confidence < $1
|
||||
ORDER BY ocr_confidence ASC, created_at DESC
|
||||
|
|
@ -1582,6 +1610,8 @@ impl Database {
|
|||
ocr_status: r.get("ocr_status"),
|
||||
ocr_error: r.get("ocr_error"),
|
||||
ocr_completed_at: r.get("ocr_completed_at"),
|
||||
ocr_retry_count: r.get("ocr_retry_count"),
|
||||
ocr_failure_reason: r.get("ocr_failure_reason"),
|
||||
tags: r.get("tags"),
|
||||
created_at: r.get("created_at"),
|
||||
updated_at: r.get("updated_at"),
|
||||
|
|
@ -1594,7 +1624,7 @@ impl Database {
|
|||
} else {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE ocr_confidence IS NOT NULL AND ocr_confidence < $1 AND user_id = $2
|
||||
ORDER BY ocr_confidence ASC, created_at DESC
|
||||
|
|
@ -1620,6 +1650,8 @@ impl Database {
|
|||
ocr_status: r.get("ocr_status"),
|
||||
ocr_error: r.get("ocr_error"),
|
||||
ocr_completed_at: r.get("ocr_completed_at"),
|
||||
ocr_retry_count: r.get("ocr_retry_count"),
|
||||
ocr_failure_reason: r.get("ocr_failure_reason"),
|
||||
tags: r.get("tags"),
|
||||
created_at: r.get("created_at"),
|
||||
updated_at: r.get("updated_at"),
|
||||
|
|
@ -1639,7 +1671,7 @@ impl Database {
|
|||
let documents = if user_role == crate::models::UserRole::Admin {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE ocr_status = 'failed' OR (ocr_confidence IS NULL AND ocr_status != 'pending' AND ocr_status != 'processing')
|
||||
ORDER BY created_at DESC
|
||||
|
|
@ -1663,6 +1695,8 @@ impl Database {
|
|||
ocr_status: r.get("ocr_status"),
|
||||
ocr_error: r.get("ocr_error"),
|
||||
ocr_completed_at: r.get("ocr_completed_at"),
|
||||
ocr_retry_count: r.get("ocr_retry_count"),
|
||||
ocr_failure_reason: r.get("ocr_failure_reason"),
|
||||
tags: r.get("tags"),
|
||||
created_at: r.get("created_at"),
|
||||
updated_at: r.get("updated_at"),
|
||||
|
|
@ -1675,7 +1709,7 @@ impl Database {
|
|||
} else {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE (ocr_status = 'failed' OR (ocr_confidence IS NULL AND ocr_status != 'pending' AND ocr_status != 'processing')) AND user_id = $1
|
||||
ORDER BY created_at DESC
|
||||
|
|
@ -1700,6 +1734,8 @@ impl Database {
|
|||
ocr_status: r.get("ocr_status"),
|
||||
ocr_error: r.get("ocr_error"),
|
||||
ocr_completed_at: r.get("ocr_completed_at"),
|
||||
ocr_retry_count: r.get("ocr_retry_count"),
|
||||
ocr_failure_reason: r.get("ocr_failure_reason"),
|
||||
tags: r.get("tags"),
|
||||
created_at: r.get("created_at"),
|
||||
updated_at: r.get("updated_at"),
|
||||
|
|
@ -1719,7 +1755,7 @@ impl Database {
|
|||
let documents = if user_role == crate::models::UserRole::Admin {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE (ocr_confidence IS NOT NULL AND ocr_confidence < $1)
|
||||
OR ocr_status = 'failed'
|
||||
|
|
@ -1747,6 +1783,8 @@ impl Database {
|
|||
ocr_status: r.get("ocr_status"),
|
||||
ocr_error: r.get("ocr_error"),
|
||||
ocr_completed_at: r.get("ocr_completed_at"),
|
||||
ocr_retry_count: r.get("ocr_retry_count"),
|
||||
ocr_failure_reason: r.get("ocr_failure_reason"),
|
||||
tags: r.get("tags"),
|
||||
created_at: r.get("created_at"),
|
||||
updated_at: r.get("updated_at"),
|
||||
|
|
@ -1759,7 +1797,7 @@ impl Database {
|
|||
} else {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE ((ocr_confidence IS NOT NULL AND ocr_confidence < $1)
|
||||
OR ocr_status = 'failed')
|
||||
|
|
@ -1789,6 +1827,8 @@ impl Database {
|
|||
ocr_status: r.get("ocr_status"),
|
||||
ocr_error: r.get("ocr_error"),
|
||||
ocr_completed_at: r.get("ocr_completed_at"),
|
||||
ocr_retry_count: r.get("ocr_retry_count"),
|
||||
ocr_failure_reason: r.get("ocr_failure_reason"),
|
||||
tags: r.get("tags"),
|
||||
created_at: r.get("created_at"),
|
||||
updated_at: r.get("updated_at"),
|
||||
|
|
|
|||
|
|
@ -27,7 +27,21 @@ pub async fn record_ocr_retry(
|
|||
priority: i32,
|
||||
queue_id: Option<Uuid>,
|
||||
) -> Result<Uuid> {
|
||||
crate::debug_log!("OCR_RETRY_HISTORY",
|
||||
"document_id" => document_id,
|
||||
"user_id" => user_id,
|
||||
"retry_reason" => retry_reason,
|
||||
"priority" => priority,
|
||||
"queue_id" => queue_id.unwrap_or_default(),
|
||||
"message" => "Recording OCR retry attempt"
|
||||
);
|
||||
|
||||
// First get the current OCR status
|
||||
crate::debug_log!("OCR_RETRY_HISTORY",
|
||||
"document_id" => document_id,
|
||||
"message" => "Fetching current OCR status"
|
||||
);
|
||||
|
||||
let current_status = sqlx::query(
|
||||
r#"
|
||||
SELECT ocr_status, ocr_failure_reason, ocr_error
|
||||
|
|
@ -37,19 +51,38 @@ pub async fn record_ocr_retry(
|
|||
)
|
||||
.bind(document_id)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
.await
|
||||
.map_err(|e| {
|
||||
crate::debug_error!("OCR_RETRY_HISTORY", format!("Failed to fetch current status for document {}: {}", document_id, e));
|
||||
e
|
||||
})?;
|
||||
|
||||
let (previous_status, previous_failure_reason, previous_error) = if let Some(row) = current_status {
|
||||
(
|
||||
row.get::<Option<String>, _>("ocr_status"),
|
||||
row.get::<Option<String>, _>("ocr_failure_reason"),
|
||||
row.get::<Option<String>, _>("ocr_error"),
|
||||
)
|
||||
let status = row.get::<Option<String>, _>("ocr_status");
|
||||
let failure = row.get::<Option<String>, _>("ocr_failure_reason");
|
||||
let error = row.get::<Option<String>, _>("ocr_error");
|
||||
|
||||
crate::debug_log!("OCR_RETRY_HISTORY",
|
||||
"document_id" => document_id,
|
||||
"status" => status.as_deref().unwrap_or("none"),
|
||||
"failure_reason" => failure.as_deref().unwrap_or("none"),
|
||||
"has_error" => error.is_some(),
|
||||
"message" => "Found current document status"
|
||||
);
|
||||
|
||||
(status, failure, error)
|
||||
} else {
|
||||
crate::debug_warn!("OCR_RETRY_HISTORY", "Document not found when recording retry history");
|
||||
(None, None, None)
|
||||
};
|
||||
|
||||
// Insert retry history record
|
||||
crate::debug_log!("OCR_RETRY_HISTORY",
|
||||
"document_id" => document_id,
|
||||
"previous_status" => previous_status.as_deref().unwrap_or("none"),
|
||||
"message" => "Inserting retry history record"
|
||||
);
|
||||
|
||||
let retry_id: Uuid = sqlx::query_scalar(
|
||||
r#"
|
||||
INSERT INTO ocr_retry_history (
|
||||
|
|
@ -63,15 +96,25 @@ pub async fn record_ocr_retry(
|
|||
.bind(document_id)
|
||||
.bind(user_id)
|
||||
.bind(retry_reason)
|
||||
.bind(previous_status)
|
||||
.bind(previous_failure_reason)
|
||||
.bind(previous_error)
|
||||
.bind(&previous_status)
|
||||
.bind(&previous_failure_reason)
|
||||
.bind(&previous_error)
|
||||
.bind(priority)
|
||||
.bind(queue_id)
|
||||
.fetch_one(pool)
|
||||
.await?;
|
||||
.await
|
||||
.map_err(|e| {
|
||||
crate::debug_error!("OCR_RETRY_HISTORY", format!("Failed to insert retry history for document {}: {}", document_id, e));
|
||||
e
|
||||
})?;
|
||||
|
||||
// Increment retry count
|
||||
crate::debug_log!("OCR_RETRY_HISTORY",
|
||||
"document_id" => document_id,
|
||||
"retry_id" => retry_id,
|
||||
"message" => "Incrementing retry count"
|
||||
);
|
||||
|
||||
sqlx::query(
|
||||
r#"
|
||||
UPDATE documents
|
||||
|
|
@ -82,7 +125,18 @@ pub async fn record_ocr_retry(
|
|||
)
|
||||
.bind(document_id)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
.await
|
||||
.map_err(|e| {
|
||||
crate::debug_error!("OCR_RETRY_HISTORY", format!("Failed to increment retry count for document {}: {}", document_id, e));
|
||||
e
|
||||
})?;
|
||||
|
||||
crate::debug_log!("OCR_RETRY_HISTORY",
|
||||
"document_id" => document_id,
|
||||
"retry_id" => retry_id,
|
||||
"user_id" => user_id,
|
||||
"message" => "Successfully recorded retry history"
|
||||
);
|
||||
|
||||
Ok(retry_id)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ pub mod scheduling;
|
|||
pub mod seed;
|
||||
pub mod services;
|
||||
pub mod swagger;
|
||||
pub mod utils;
|
||||
pub mod webdav_xml_parser;
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
|||
|
|
@ -129,6 +129,8 @@ pub struct Document {
|
|||
pub ocr_status: Option<String>,
|
||||
pub ocr_error: Option<String>,
|
||||
pub ocr_completed_at: Option<DateTime<Utc>>,
|
||||
pub ocr_retry_count: Option<i32>,
|
||||
pub ocr_failure_reason: Option<String>,
|
||||
pub tags: Vec<String>,
|
||||
pub created_at: DateTime<Utc>,
|
||||
pub updated_at: DateTime<Utc>,
|
||||
|
|
|
|||
|
|
@ -75,6 +75,13 @@ impl OcrQueueService {
|
|||
|
||||
/// Add a document to the OCR queue
|
||||
pub async fn enqueue_document(&self, document_id: Uuid, priority: i32, file_size: i64) -> Result<Uuid> {
|
||||
crate::debug_log!("OCR_QUEUE",
|
||||
"document_id" => document_id,
|
||||
"priority" => priority,
|
||||
"file_size" => file_size,
|
||||
"message" => "Enqueueing document"
|
||||
);
|
||||
|
||||
let row = sqlx::query(
|
||||
r#"
|
||||
INSERT INTO ocr_queue (document_id, priority, file_size)
|
||||
|
|
@ -86,10 +93,22 @@ impl OcrQueueService {
|
|||
.bind(priority)
|
||||
.bind(file_size)
|
||||
.fetch_one(&self.pool)
|
||||
.await?;
|
||||
.await
|
||||
.map_err(|e| {
|
||||
crate::debug_error!("OCR_QUEUE", format!("Failed to insert document {} into queue: {}", document_id, e));
|
||||
e
|
||||
})?;
|
||||
|
||||
let id: Uuid = row.get("id");
|
||||
|
||||
crate::debug_log!("OCR_QUEUE",
|
||||
"document_id" => document_id,
|
||||
"queue_id" => id,
|
||||
"priority" => priority,
|
||||
"file_size" => file_size,
|
||||
"message" => "Successfully enqueued document"
|
||||
);
|
||||
|
||||
info!("Enqueued document {} with priority {} for OCR processing", document_id, priority);
|
||||
Ok(id)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -571,28 +571,55 @@ async fn retry_ocr(
|
|||
auth_user: AuthUser,
|
||||
Path(document_id): Path<uuid::Uuid>,
|
||||
) -> Result<Json<serde_json::Value>, StatusCode> {
|
||||
crate::debug_log!("OCR_RETRY",
|
||||
"document_id" => document_id,
|
||||
"user_id" => auth_user.user.id,
|
||||
"message" => "Starting OCR retry request"
|
||||
);
|
||||
|
||||
// Check if document exists and belongs to user
|
||||
let document = state
|
||||
.db
|
||||
.get_document_by_id(document_id, auth_user.user.id, auth_user.user.role)
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
|
||||
.ok_or(StatusCode::NOT_FOUND)?;
|
||||
.map_err(|e| {
|
||||
crate::debug_error!("OCR_RETRY", format!("Failed to get document {}: {}", document_id, e));
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?
|
||||
.ok_or_else(|| {
|
||||
crate::debug_log!("OCR_RETRY", &format!("Document {} not found or access denied for user {}", document_id, auth_user.user.id));
|
||||
StatusCode::NOT_FOUND
|
||||
})?;
|
||||
|
||||
// Check if document is eligible for OCR retry (failed or not processed)
|
||||
let eligible = document.ocr_status.as_ref().map_or(true, |status| {
|
||||
status == "failed" || status == "pending"
|
||||
});
|
||||
// Check if document is eligible for OCR retry (all documents are now retryable)
|
||||
let current_status = document.ocr_status.as_deref().unwrap_or("unknown");
|
||||
let eligible = true; // All documents are retryable
|
||||
|
||||
crate::debug_log!("OCR_RETRY",
|
||||
"document_id" => document_id,
|
||||
"filename" => &document.filename,
|
||||
"current_status" => current_status,
|
||||
"eligible" => eligible,
|
||||
"file_size" => document.file_size,
|
||||
"retry_count" => document.ocr_retry_count.unwrap_or(0),
|
||||
"message" => "Checking document eligibility"
|
||||
);
|
||||
|
||||
if !eligible {
|
||||
crate::debug_log!("OCR_RETRY", &format!("Document {} is not eligible for retry - current status: {}", document_id, current_status));
|
||||
return Ok(Json(serde_json::json!({
|
||||
"success": false,
|
||||
"message": "Document is not eligible for OCR retry. Current status: {}",
|
||||
"message": format!("Document is not eligible for OCR retry. Current status: {}", current_status),
|
||||
"current_status": document.ocr_status
|
||||
})));
|
||||
}
|
||||
|
||||
// Reset document OCR fields
|
||||
crate::debug_log!("OCR_RETRY",
|
||||
"document_id" => document_id,
|
||||
"message" => "Resetting document OCR fields"
|
||||
);
|
||||
|
||||
let reset_result = sqlx::query(
|
||||
r#"
|
||||
UPDATE documents
|
||||
|
|
@ -611,12 +638,22 @@ async fn retry_ocr(
|
|||
.bind(document_id)
|
||||
.execute(state.db.get_pool())
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
.map_err(|e| {
|
||||
crate::debug_error!("OCR_RETRY", format!("Failed to reset OCR fields for document {}: {}", document_id, e));
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
if reset_result.rows_affected() == 0 {
|
||||
crate::debug_error!("OCR_RETRY", format!("No rows affected when resetting OCR fields for document {}", document_id));
|
||||
return Err(StatusCode::NOT_FOUND);
|
||||
}
|
||||
|
||||
crate::debug_log!("OCR_RETRY",
|
||||
"document_id" => document_id,
|
||||
"rows_affected" => reset_result.rows_affected(),
|
||||
"message" => "Successfully reset OCR fields"
|
||||
);
|
||||
|
||||
// Calculate priority based on file size (higher priority for retries)
|
||||
let priority = match document.file_size {
|
||||
0..=1048576 => 15, // <= 1MB: highest priority (boosted for retry)
|
||||
|
|
@ -626,10 +663,38 @@ async fn retry_ocr(
|
|||
_ => 6, // > 50MB: lowest priority
|
||||
};
|
||||
|
||||
crate::debug_log!("OCR_RETRY",
|
||||
"document_id" => document_id,
|
||||
"file_size" => document.file_size,
|
||||
"priority" => priority,
|
||||
"message" => "Calculated retry priority"
|
||||
);
|
||||
|
||||
// Add to OCR queue with detailed logging
|
||||
crate::debug_log!("OCR_RETRY",
|
||||
"document_id" => document_id,
|
||||
"priority" => priority,
|
||||
"file_size" => document.file_size,
|
||||
"message" => "Enqueueing document for OCR processing"
|
||||
);
|
||||
|
||||
match state.queue_service.enqueue_document(document_id, priority, document.file_size).await {
|
||||
Ok(queue_id) => {
|
||||
crate::debug_log!("OCR_RETRY",
|
||||
"document_id" => document_id,
|
||||
"queue_id" => queue_id,
|
||||
"priority" => priority,
|
||||
"message" => "Successfully enqueued document"
|
||||
);
|
||||
|
||||
// Record retry history
|
||||
crate::debug_log!("OCR_RETRY",
|
||||
"document_id" => document_id,
|
||||
"user_id" => auth_user.user.id,
|
||||
"queue_id" => queue_id,
|
||||
"message" => "Recording retry history"
|
||||
);
|
||||
|
||||
if let Err(e) = crate::db::ocr_retry::record_ocr_retry(
|
||||
state.db.get_pool(),
|
||||
document_id,
|
||||
|
|
@ -638,9 +703,25 @@ async fn retry_ocr(
|
|||
priority,
|
||||
Some(queue_id),
|
||||
).await {
|
||||
crate::debug_error!("OCR_RETRY", format!("Failed to record retry history for document {}: {}", document_id, e));
|
||||
tracing::warn!("Failed to record retry history for document {}: {}", document_id, e);
|
||||
} else {
|
||||
crate::debug_log!("OCR_RETRY",
|
||||
"document_id" => document_id,
|
||||
"queue_id" => queue_id,
|
||||
"message" => "Successfully recorded retry history"
|
||||
);
|
||||
}
|
||||
|
||||
crate::debug_log!("OCR_RETRY",
|
||||
"document_id" => document_id,
|
||||
"filename" => &document.filename,
|
||||
"queue_id" => queue_id,
|
||||
"priority" => priority,
|
||||
"file_size" => document.file_size,
|
||||
"message" => "OCR retry process completed successfully"
|
||||
);
|
||||
|
||||
tracing::info!(
|
||||
"OCR retry queued for document {} ({}): queue_id={}, priority={}, size={}",
|
||||
document_id, document.filename, queue_id, priority, document.file_size
|
||||
|
|
@ -656,6 +737,7 @@ async fn retry_ocr(
|
|||
})))
|
||||
}
|
||||
Err(e) => {
|
||||
crate::debug_error!("OCR_RETRY", format!("Failed to enqueue document {}: {}", document_id, e));
|
||||
tracing::error!("Failed to queue OCR retry for document {}: {}", document_id, e);
|
||||
Err(StatusCode::INTERNAL_SERVER_ERROR)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -101,62 +101,120 @@ pub async fn bulk_retry_ocr(
|
|||
auth_user: AuthUser,
|
||||
Json(request): Json<BulkOcrRetryRequest>,
|
||||
) -> Result<Json<BulkOcrRetryResponse>, StatusCode> {
|
||||
crate::debug_log!("BULK_OCR_RETRY",
|
||||
"user_id" => auth_user.user.id,
|
||||
"mode" => format!("{:?}", request.mode),
|
||||
"preview_only" => request.preview_only.unwrap_or(false),
|
||||
"priority_override" => request.priority_override.unwrap_or(-1),
|
||||
"message" => "Starting bulk OCR retry request"
|
||||
);
|
||||
|
||||
info!("Bulk OCR retry requested by user {} with mode: {:?}", auth_user.user.id, request.mode);
|
||||
|
||||
let preview_only = request.preview_only.unwrap_or(false);
|
||||
|
||||
// Build query based on selection mode
|
||||
crate::debug_log!("BULK_OCR_RETRY", "Building document query based on selection mode");
|
||||
|
||||
let documents = match request.mode {
|
||||
SelectionMode::All => {
|
||||
crate::debug_log!("BULK_OCR_RETRY", "Fetching all documents for retry");
|
||||
get_all_failed_ocr_documents(&state, &auth_user).await?
|
||||
}
|
||||
SelectionMode::Specific => {
|
||||
if let Some(ids) = request.document_ids {
|
||||
get_specific_documents(&state, &auth_user, ids).await?
|
||||
if let Some(ids) = &request.document_ids {
|
||||
crate::debug_log!("BULK_OCR_RETRY",
|
||||
"document_count" => ids.len(),
|
||||
"message" => "Fetching specific documents"
|
||||
);
|
||||
get_specific_documents(&state, &auth_user, ids.clone()).await?
|
||||
} else {
|
||||
crate::debug_error!("BULK_OCR_RETRY", "Specific mode requested but no document IDs provided");
|
||||
return Err(StatusCode::BAD_REQUEST);
|
||||
}
|
||||
}
|
||||
SelectionMode::Filter => {
|
||||
if let Some(filter) = request.filter {
|
||||
get_filtered_documents(&state, &auth_user, filter).await?
|
||||
if let Some(filter) = &request.filter {
|
||||
crate::debug_log!("BULK_OCR_RETRY",
|
||||
"filter_mime_types" => filter.mime_types.as_ref().map(|v| v.len()).unwrap_or(0),
|
||||
"filter_failure_reasons" => filter.failure_reasons.as_ref().map(|v| v.len()).unwrap_or(0),
|
||||
"message" => "Fetching filtered documents"
|
||||
);
|
||||
get_filtered_documents(&state, &auth_user, filter.clone()).await?
|
||||
} else {
|
||||
crate::debug_error!("BULK_OCR_RETRY", "Filter mode requested but no filter provided");
|
||||
return Err(StatusCode::BAD_REQUEST);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let matched_count = documents.len();
|
||||
crate::debug_log!("BULK_OCR_RETRY",
|
||||
"matched_count" => matched_count,
|
||||
"message" => "Document query completed"
|
||||
);
|
||||
let mut retry_documents = Vec::new();
|
||||
let mut queued_count = 0;
|
||||
let mut total_estimated_time = 0.0;
|
||||
|
||||
for doc in documents {
|
||||
for (index, doc) in documents.iter().enumerate() {
|
||||
let priority = calculate_priority(doc.file_size, request.priority_override);
|
||||
|
||||
crate::debug_log!("BULK_OCR_RETRY",
|
||||
"index" => index + 1,
|
||||
"total" => matched_count,
|
||||
"document_id" => doc.id,
|
||||
"filename" => &doc.filename,
|
||||
"file_size" => doc.file_size,
|
||||
"priority" => priority,
|
||||
"failure_reason" => doc.ocr_failure_reason.as_deref().unwrap_or("none"),
|
||||
"message" => "Processing document"
|
||||
);
|
||||
|
||||
let mut doc_info = OcrRetryDocumentInfo {
|
||||
id: doc.id,
|
||||
filename: doc.filename.clone(),
|
||||
file_size: doc.file_size,
|
||||
mime_type: doc.mime_type,
|
||||
ocr_failure_reason: doc.ocr_failure_reason,
|
||||
mime_type: doc.mime_type.clone(),
|
||||
ocr_failure_reason: doc.ocr_failure_reason.clone(),
|
||||
priority,
|
||||
queue_id: None,
|
||||
};
|
||||
|
||||
if !preview_only {
|
||||
// Reset OCR fields
|
||||
crate::debug_log!("BULK_OCR_RETRY",
|
||||
"document_id" => doc.id,
|
||||
"message" => "Resetting OCR status for document"
|
||||
);
|
||||
|
||||
if let Err(e) = reset_document_ocr_status(&state, doc.id).await {
|
||||
crate::debug_error!("BULK_OCR_RETRY", format!("Failed to reset OCR status for document {}: {}", doc.id, e));
|
||||
warn!("Failed to reset OCR status for document {}: {}", doc.id, e);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Queue for OCR
|
||||
crate::debug_log!("BULK_OCR_RETRY",
|
||||
"document_id" => doc.id,
|
||||
"priority" => priority,
|
||||
"file_size" => doc.file_size,
|
||||
"message" => "Enqueueing document for OCR"
|
||||
);
|
||||
|
||||
match state.queue_service.enqueue_document(doc.id, priority, doc.file_size).await {
|
||||
Ok(queue_id) => {
|
||||
doc_info.queue_id = Some(queue_id);
|
||||
queued_count += 1;
|
||||
|
||||
crate::debug_log!("BULK_OCR_RETRY",
|
||||
"document_id" => doc.id,
|
||||
"queue_id" => queue_id,
|
||||
"priority" => priority,
|
||||
"queued_count" => queued_count,
|
||||
"message" => "Successfully enqueued document"
|
||||
);
|
||||
|
||||
// Record retry history
|
||||
let retry_reason = match &request.mode {
|
||||
SelectionMode::All => "bulk_retry_all",
|
||||
|
|
@ -164,6 +222,13 @@ pub async fn bulk_retry_ocr(
|
|||
SelectionMode::Filter => "bulk_retry_filtered",
|
||||
};
|
||||
|
||||
crate::debug_log!("BULK_OCR_RETRY",
|
||||
"document_id" => doc.id,
|
||||
"retry_reason" => retry_reason,
|
||||
"queue_id" => queue_id,
|
||||
"message" => "Recording retry history"
|
||||
);
|
||||
|
||||
if let Err(e) = crate::db::ocr_retry::record_ocr_retry(
|
||||
state.db.get_pool(),
|
||||
doc.id,
|
||||
|
|
@ -172,12 +237,20 @@ pub async fn bulk_retry_ocr(
|
|||
priority,
|
||||
Some(queue_id),
|
||||
).await {
|
||||
crate::debug_error!("BULK_OCR_RETRY", format!("Failed to record retry history for document {}: {}", doc.id, e));
|
||||
warn!("Failed to record retry history for document {}: {}", doc.id, e);
|
||||
} else {
|
||||
crate::debug_log!("BULK_OCR_RETRY",
|
||||
"document_id" => doc.id,
|
||||
"queue_id" => queue_id,
|
||||
"message" => "Successfully recorded retry history"
|
||||
);
|
||||
}
|
||||
|
||||
info!("Queued document {} for OCR retry with priority {}", doc.id, priority);
|
||||
}
|
||||
Err(e) => {
|
||||
crate::debug_error!("BULK_OCR_RETRY", format!("Failed to enqueue document {}: {}", doc.id, e));
|
||||
error!("Failed to queue document {} for OCR retry: {}", doc.id, e);
|
||||
}
|
||||
}
|
||||
|
|
@ -188,6 +261,15 @@ pub async fn bulk_retry_ocr(
|
|||
retry_documents.push(doc_info);
|
||||
}
|
||||
|
||||
crate::debug_log!("BULK_OCR_RETRY",
|
||||
"matched_count" => matched_count,
|
||||
"queued_count" => queued_count,
|
||||
"preview_only" => preview_only,
|
||||
"estimated_time_minutes" => (total_estimated_time / 60.0) as i32,
|
||||
"user_id" => auth_user.user.id,
|
||||
"message" => "Bulk retry operation completed"
|
||||
);
|
||||
|
||||
let response = BulkOcrRetryResponse {
|
||||
success: true,
|
||||
message: if preview_only {
|
||||
|
|
@ -303,8 +385,7 @@ pub async fn get_ocr_retry_stats(
|
|||
MIN(created_at) as first_occurrence,
|
||||
MAX(updated_at) as last_occurrence
|
||||
FROM documents
|
||||
WHERE ocr_status = 'failed'
|
||||
AND ($1::uuid IS NULL OR user_id = $1)
|
||||
WHERE ($1::uuid IS NULL OR user_id = $1)
|
||||
GROUP BY ocr_failure_reason
|
||||
ORDER BY count DESC
|
||||
"#
|
||||
|
|
@ -322,8 +403,7 @@ pub async fn get_ocr_retry_stats(
|
|||
COUNT(*) as count,
|
||||
AVG(file_size) as avg_file_size
|
||||
FROM documents
|
||||
WHERE ocr_status = 'failed'
|
||||
AND ($1::uuid IS NULL OR user_id = $1)
|
||||
WHERE ($1::uuid IS NULL OR user_id = $1)
|
||||
GROUP BY mime_type
|
||||
ORDER BY count DESC
|
||||
"#
|
||||
|
|
@ -441,8 +521,7 @@ async fn get_all_failed_ocr_documents(
|
|||
r#"
|
||||
SELECT id, filename, file_size, mime_type, ocr_failure_reason
|
||||
FROM documents
|
||||
WHERE ocr_status = 'failed'
|
||||
AND ($1::uuid IS NULL OR user_id = $1)
|
||||
WHERE ($1::uuid IS NULL OR user_id = $1)
|
||||
ORDER BY created_at DESC
|
||||
"#
|
||||
)
|
||||
|
|
@ -465,12 +544,33 @@ async fn get_specific_documents(
|
|||
Some(auth_user.user.id)
|
||||
};
|
||||
|
||||
// First let's debug what documents we're looking for and their current status
|
||||
for doc_id in &document_ids {
|
||||
if let Ok(Some(row)) = sqlx::query("SELECT id, filename, ocr_status FROM documents WHERE id = $1")
|
||||
.bind(doc_id)
|
||||
.fetch_optional(state.db.get_pool())
|
||||
.await {
|
||||
let status: Option<String> = row.get("ocr_status");
|
||||
let filename: String = row.get("filename");
|
||||
crate::debug_log!("BULK_OCR_RETRY",
|
||||
"requested_document_id" => doc_id,
|
||||
"filename" => &filename,
|
||||
"current_ocr_status" => status.as_deref().unwrap_or("NULL"),
|
||||
"message" => "Document found in database"
|
||||
);
|
||||
} else {
|
||||
crate::debug_log!("BULK_OCR_RETRY",
|
||||
"requested_document_id" => doc_id,
|
||||
"message" => "Document NOT found in database"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let documents = sqlx::query_as::<_, DocumentInfo>(
|
||||
r#"
|
||||
SELECT id, filename, file_size, mime_type, ocr_failure_reason
|
||||
FROM documents
|
||||
WHERE id = ANY($1)
|
||||
AND ocr_status = 'failed'
|
||||
AND ($2::uuid IS NULL OR user_id = $2)
|
||||
"#
|
||||
)
|
||||
|
|
@ -489,7 +589,7 @@ async fn get_filtered_documents(
|
|||
filter: OcrRetryFilter
|
||||
) -> Result<Vec<DocumentInfo>, StatusCode> {
|
||||
let mut query = sqlx::QueryBuilder::new(
|
||||
"SELECT id, filename, file_size, mime_type, ocr_failure_reason FROM documents WHERE ocr_status = 'failed'"
|
||||
"SELECT id, filename, file_size, mime_type, ocr_failure_reason FROM documents WHERE 1=1"
|
||||
);
|
||||
|
||||
// User filter
|
||||
|
|
@ -585,6 +685,7 @@ async fn reset_document_ocr_status(state: &Arc<AppState>, document_id: Uuid) ->
|
|||
ocr_text = NULL,
|
||||
ocr_error = NULL,
|
||||
ocr_failure_reason = NULL,
|
||||
ocr_retry_count = NULL,
|
||||
ocr_confidence = NULL,
|
||||
ocr_word_count = NULL,
|
||||
ocr_processing_time_ms = NULL,
|
||||
|
|
|
|||
|
|
@ -177,6 +177,8 @@ impl FileService {
|
|||
ocr_status: Some("pending".to_string()),
|
||||
ocr_error: None,
|
||||
ocr_completed_at: None,
|
||||
ocr_retry_count: None,
|
||||
ocr_failure_reason: None,
|
||||
tags: Vec::new(),
|
||||
created_at: Utc::now(),
|
||||
updated_at: Utc::now(),
|
||||
|
|
|
|||
|
|
@ -52,6 +52,8 @@ mod tests {
|
|||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
ocr_retry_count: None,
|
||||
ocr_failure_reason: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -103,6 +103,8 @@ mod tests {
|
|||
original_created_at: Some(DateTime::parse_from_rfc3339("2023-12-01T10:00:00Z").unwrap().with_timezone(&Utc)),
|
||||
original_modified_at: Some(DateTime::parse_from_rfc3339("2023-12-15T15:30:00Z").unwrap().with_timezone(&Utc)),
|
||||
source_metadata: Some(serde_json::json!({"permissions": "644", "owner": "user1"})),
|
||||
ocr_retry_count: None,
|
||||
ocr_failure_reason: None,
|
||||
};
|
||||
|
||||
// Convert to DocumentResponse
|
||||
|
|
|
|||
|
|
@ -63,6 +63,8 @@ mod document_routes_deletion_tests {
|
|||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
ocr_retry_count: None,
|
||||
ocr_failure_reason: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -400,6 +402,8 @@ mod document_routes_deletion_tests {
|
|||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
ocr_retry_count: None,
|
||||
ocr_failure_reason: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -29,6 +29,8 @@ fn create_test_document(user_id: Uuid) -> Document {
|
|||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
ocr_retry_count: None,
|
||||
ocr_failure_reason: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -57,6 +59,8 @@ fn create_test_document_without_ocr(user_id: Uuid) -> Document {
|
|||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
ocr_retry_count: None,
|
||||
ocr_failure_reason: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -85,6 +89,8 @@ fn create_test_document_with_ocr_error(user_id: Uuid) -> Document {
|
|||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
ocr_retry_count: None,
|
||||
ocr_failure_reason: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1564,6 +1570,8 @@ mod deletion_error_handling_tests {
|
|||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
ocr_retry_count: None,
|
||||
ocr_failure_reason: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -942,6 +942,8 @@ mod tests {
|
|||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
ocr_retry_count: None,
|
||||
ocr_failure_reason: None,
|
||||
};
|
||||
|
||||
db.create_document(document).await.unwrap();
|
||||
|
|
|
|||
|
|
@ -195,6 +195,8 @@ mod file_deletion_tests {
|
|||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
ocr_retry_count: None,
|
||||
ocr_failure_reason: None,
|
||||
};
|
||||
|
||||
(
|
||||
|
|
@ -333,6 +335,8 @@ mod file_deletion_tests {
|
|||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
ocr_retry_count: None,
|
||||
ocr_failure_reason: None,
|
||||
};
|
||||
|
||||
// Try to delete nonexistent files (should not fail)
|
||||
|
|
@ -387,6 +391,8 @@ mod file_deletion_tests {
|
|||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
ocr_retry_count: None,
|
||||
ocr_failure_reason: None,
|
||||
};
|
||||
|
||||
// Verify files exist
|
||||
|
|
@ -445,6 +451,8 @@ mod file_deletion_tests {
|
|||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
ocr_retry_count: None,
|
||||
ocr_failure_reason: None,
|
||||
};
|
||||
|
||||
// Verify files exist
|
||||
|
|
@ -494,6 +502,8 @@ mod file_deletion_tests {
|
|||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
ocr_retry_count: None,
|
||||
ocr_failure_reason: None,
|
||||
};
|
||||
|
||||
// Verify file exists
|
||||
|
|
|
|||
|
|
@ -84,6 +84,8 @@ mod tests {
|
|||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
ocr_retry_count: None,
|
||||
ocr_failure_reason: None,
|
||||
};
|
||||
|
||||
sqlx::query("INSERT INTO documents (id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19)")
|
||||
|
|
|
|||
|
|
@ -0,0 +1,85 @@
|
|||
use std::env;
|
||||
use tracing::{debug, info, warn, error};
|
||||
|
||||
/// Check if DEBUG environment variable is set to enable verbose debug output
|
||||
pub fn is_debug_enabled() -> bool {
|
||||
env::var("DEBUG")
|
||||
.map(|val| !val.is_empty() && val != "0" && val.to_lowercase() != "false")
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
/// Log debug message only if DEBUG environment variable is set
|
||||
pub fn debug_log(message: &str) {
|
||||
if is_debug_enabled() {
|
||||
info!("🐛 DEBUG: {}", message);
|
||||
}
|
||||
}
|
||||
|
||||
/// Log debug message with context only if DEBUG environment variable is set
|
||||
pub fn debug_log_context(context: &str, message: &str) {
|
||||
if is_debug_enabled() {
|
||||
info!("🐛 DEBUG [{}]: {}", context, message);
|
||||
}
|
||||
}
|
||||
|
||||
/// Log debug message with structured data only if DEBUG environment variable is set
|
||||
pub fn debug_log_structured(context: &str, key_values: &[(&str, &dyn std::fmt::Display)]) {
|
||||
if is_debug_enabled() {
|
||||
let mut formatted = String::new();
|
||||
for (i, (key, value)) in key_values.iter().enumerate() {
|
||||
if i > 0 {
|
||||
formatted.push_str(", ");
|
||||
}
|
||||
formatted.push_str(&format!("{}={}", key, value));
|
||||
}
|
||||
info!("🐛 DEBUG [{}]: {}", context, formatted);
|
||||
}
|
||||
}
|
||||
|
||||
/// Log error with debug context
|
||||
pub fn debug_error(context: &str, error: &dyn std::fmt::Display) {
|
||||
if is_debug_enabled() {
|
||||
error!("🐛 DEBUG ERROR [{}]: {}", context, error);
|
||||
} else {
|
||||
error!("[{}]: {}", context, error);
|
||||
}
|
||||
}
|
||||
|
||||
/// Log warning with debug context
|
||||
pub fn debug_warn(context: &str, message: &str) {
|
||||
if is_debug_enabled() {
|
||||
warn!("🐛 DEBUG WARN [{}]: {}", context, message);
|
||||
} else {
|
||||
warn!("[{}]: {}", context, message);
|
||||
}
|
||||
}
|
||||
|
||||
/// Macro for easier debug logging with automatic context
|
||||
#[macro_export]
|
||||
macro_rules! debug_log {
|
||||
($msg:expr) => {
|
||||
crate::utils::debug::debug_log($msg)
|
||||
};
|
||||
($context:expr, $msg:expr) => {
|
||||
crate::utils::debug::debug_log_context($context, $msg)
|
||||
};
|
||||
($context:expr, $($key:expr => $value:expr),+ $(,)?) => {
|
||||
crate::utils::debug::debug_log_structured($context, &[$(($key, &$value)),+])
|
||||
};
|
||||
}
|
||||
|
||||
/// Macro for debug error logging
|
||||
#[macro_export]
|
||||
macro_rules! debug_error {
|
||||
($context:expr, $error:expr) => {
|
||||
crate::utils::debug::debug_error($context, &$error)
|
||||
};
|
||||
}
|
||||
|
||||
/// Macro for debug warning logging
|
||||
#[macro_export]
|
||||
macro_rules! debug_warn {
|
||||
($context:expr, $msg:expr) => {
|
||||
crate::utils::debug::debug_warn($context, $msg)
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1 @@
|
|||
pub mod debug;
|
||||
|
|
@ -109,6 +109,13 @@ async fn debug_ocr_content() {
|
|||
.await
|
||||
.expect("Upload should work");
|
||||
|
||||
println!("📤 Document 1 upload response status: {}", doc1_response.status());
|
||||
if !doc1_response.status().is_success() {
|
||||
let status = doc1_response.status();
|
||||
let error_text = doc1_response.text().await.unwrap_or_else(|_| "No response body".to_string());
|
||||
panic!("Document 1 upload failed with status {}: {}", status, error_text);
|
||||
}
|
||||
|
||||
let doc2_response = client
|
||||
.post(&format!("{}/api/documents", get_base_url()))
|
||||
.header("Authorization", format!("Bearer {}", token))
|
||||
|
|
@ -117,8 +124,15 @@ async fn debug_ocr_content() {
|
|||
.await
|
||||
.expect("Upload should work");
|
||||
|
||||
let doc1: DocumentResponse = doc1_response.json().await.expect("Valid JSON");
|
||||
let doc2: DocumentResponse = doc2_response.json().await.expect("Valid JSON");
|
||||
println!("📤 Document 2 upload response status: {}", doc2_response.status());
|
||||
if !doc2_response.status().is_success() {
|
||||
let status = doc2_response.status();
|
||||
let error_text = doc2_response.text().await.unwrap_or_else(|_| "No response body".to_string());
|
||||
panic!("Document 2 upload failed with status {}: {}", status, error_text);
|
||||
}
|
||||
|
||||
let doc1: DocumentResponse = doc1_response.json().await.expect("Valid JSON for doc1");
|
||||
let doc2: DocumentResponse = doc2_response.json().await.expect("Valid JSON for doc2");
|
||||
|
||||
println!("📄 Document 1: {}", doc1.id);
|
||||
println!("📄 Document 2: {}", doc2.id);
|
||||
|
|
|
|||
|
|
@ -36,6 +36,8 @@ fn create_test_document(user_id: Uuid, filename: &str, file_hash: String) -> Doc
|
|||
ocr_status: Some("pending".to_string()),
|
||||
ocr_error: None,
|
||||
ocr_completed_at: None,
|
||||
ocr_retry_count: None,
|
||||
ocr_failure_reason: None,
|
||||
tags: Vec::new(),
|
||||
created_at: Utc::now(),
|
||||
updated_at: Utc::now(),
|
||||
|
|
|
|||
|
|
@ -54,6 +54,8 @@ fn create_test_document(user_id: Uuid, filename: &str, file_hash: Option<String>
|
|||
ocr_status: Some("pending".to_string()),
|
||||
ocr_error: None,
|
||||
ocr_completed_at: None,
|
||||
ocr_retry_count: None,
|
||||
ocr_failure_reason: None,
|
||||
tags: Vec::new(),
|
||||
created_at: Utc::now(),
|
||||
updated_at: Utc::now(),
|
||||
|
|
|
|||
|
|
@ -351,6 +351,8 @@ async fn test_create_ignored_file_from_document() -> Result<()> {
|
|||
ocr_status: Some("completed".to_string()),
|
||||
ocr_error: None,
|
||||
ocr_completed_at: Some(chrono::Utc::now()),
|
||||
ocr_retry_count: None,
|
||||
ocr_failure_reason: None,
|
||||
tags: vec!["test".to_string()],
|
||||
created_at: chrono::Utc::now(),
|
||||
updated_at: chrono::Utc::now(),
|
||||
|
|
|
|||
|
|
@ -54,6 +54,8 @@ fn create_test_document(user_id: Uuid, filename: &str, file_hash: String) -> Doc
|
|||
ocr_status: Some("pending".to_string()),
|
||||
ocr_error: None,
|
||||
ocr_completed_at: None,
|
||||
ocr_retry_count: None,
|
||||
ocr_failure_reason: None,
|
||||
tags: Vec::new(),
|
||||
created_at: Utc::now(),
|
||||
updated_at: Utc::now(),
|
||||
|
|
|
|||
|
|
@ -54,6 +54,8 @@ fn create_test_document(user_id: Uuid, filename: &str, file_hash: String) -> Doc
|
|||
ocr_status: Some("pending".to_string()),
|
||||
ocr_error: None,
|
||||
ocr_completed_at: None,
|
||||
ocr_retry_count: None,
|
||||
ocr_failure_reason: None,
|
||||
tags: Vec::new(),
|
||||
created_at: Utc::now(),
|
||||
updated_at: Utc::now(),
|
||||
|
|
|
|||
|
|
@ -20,6 +20,8 @@ fn test_document_response_conversion_with_ocr() {
|
|||
ocr_status: Some("completed".to_string()),
|
||||
ocr_error: None,
|
||||
ocr_completed_at: Some(Utc::now()),
|
||||
ocr_retry_count: None,
|
||||
ocr_failure_reason: None,
|
||||
tags: vec!["test".to_string()],
|
||||
created_at: Utc::now(),
|
||||
updated_at: Utc::now(),
|
||||
|
|
@ -57,6 +59,8 @@ fn test_document_response_conversion_without_ocr() {
|
|||
ocr_status: Some("pending".to_string()),
|
||||
ocr_error: None,
|
||||
ocr_completed_at: None,
|
||||
ocr_retry_count: None,
|
||||
ocr_failure_reason: None,
|
||||
tags: vec![],
|
||||
created_at: Utc::now(),
|
||||
updated_at: Utc::now(),
|
||||
|
|
|
|||
Loading…
Reference in New Issue