feat(server): implement DEBUG environment variable
This commit is contained in:
parent
befd46ad76
commit
6bdd6f4a56
|
|
@ -10,9 +10,9 @@ impl Database {
|
|||
pub async fn create_document(&self, document: Document) -> Result<Document> {
|
||||
let row = sqlx::query(
|
||||
r#"
|
||||
INSERT INTO documents (id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22)
|
||||
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
INSERT INTO documents (id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24)
|
||||
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
"#
|
||||
)
|
||||
.bind(document.id)
|
||||
|
|
@ -36,6 +36,8 @@ impl Database {
|
|||
.bind(&document.file_hash)
|
||||
.bind(document.original_created_at)
|
||||
.bind(document.original_modified_at)
|
||||
.bind(document.ocr_retry_count)
|
||||
.bind(&document.ocr_failure_reason)
|
||||
.bind(&document.source_metadata)
|
||||
.fetch_one(&self.pool)
|
||||
.await?;
|
||||
|
|
@ -55,6 +57,8 @@ impl Database {
|
|||
ocr_status: row.get("ocr_status"),
|
||||
ocr_error: row.get("ocr_error"),
|
||||
ocr_completed_at: row.get("ocr_completed_at"),
|
||||
ocr_retry_count: row.get("ocr_retry_count"),
|
||||
ocr_failure_reason: row.get("ocr_failure_reason"),
|
||||
tags: row.get("tags"),
|
||||
created_at: row.get("created_at"),
|
||||
updated_at: row.get("updated_at"),
|
||||
|
|
@ -70,7 +74,7 @@ impl Database {
|
|||
let query = if user_role == crate::models::UserRole::Admin {
|
||||
// Admins can see all documents
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
ORDER BY created_at DESC
|
||||
LIMIT $1 OFFSET $2
|
||||
|
|
@ -78,7 +82,7 @@ impl Database {
|
|||
} else {
|
||||
// Regular users can only see their own documents
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE user_id = $3
|
||||
ORDER BY created_at DESC
|
||||
|
|
@ -118,6 +122,8 @@ impl Database {
|
|||
ocr_status: row.get("ocr_status"),
|
||||
ocr_error: row.get("ocr_error"),
|
||||
ocr_completed_at: row.get("ocr_completed_at"),
|
||||
ocr_retry_count: row.get("ocr_retry_count"),
|
||||
ocr_failure_reason: row.get("ocr_failure_reason"),
|
||||
tags: row.get("tags"),
|
||||
created_at: row.get("created_at"),
|
||||
updated_at: row.get("updated_at"),
|
||||
|
|
@ -138,7 +144,7 @@ impl Database {
|
|||
// Admin with OCR filter
|
||||
sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE ocr_status = $3
|
||||
ORDER BY created_at DESC
|
||||
|
|
@ -155,7 +161,7 @@ impl Database {
|
|||
// Admin without OCR filter
|
||||
sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
ORDER BY created_at DESC
|
||||
LIMIT $1 OFFSET $2
|
||||
|
|
@ -170,7 +176,7 @@ impl Database {
|
|||
// Regular user with OCR filter
|
||||
sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE user_id = $3 AND ocr_status = $4
|
||||
ORDER BY created_at DESC
|
||||
|
|
@ -188,7 +194,7 @@ impl Database {
|
|||
// Regular user without OCR filter
|
||||
sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE user_id = $3
|
||||
ORDER BY created_at DESC
|
||||
|
|
@ -220,6 +226,8 @@ impl Database {
|
|||
ocr_status: row.get("ocr_status"),
|
||||
ocr_error: row.get("ocr_error"),
|
||||
ocr_completed_at: row.get("ocr_completed_at"),
|
||||
ocr_retry_count: row.get("ocr_retry_count"),
|
||||
ocr_failure_reason: row.get("ocr_failure_reason"),
|
||||
tags: row.get("tags"),
|
||||
created_at: row.get("created_at"),
|
||||
updated_at: row.get("updated_at"),
|
||||
|
|
@ -280,7 +288,7 @@ impl Database {
|
|||
pub async fn get_documents_by_user(&self, user_id: Uuid, limit: i64, offset: i64) -> Result<Vec<Document>> {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE user_id = $1
|
||||
ORDER BY created_at DESC
|
||||
|
|
@ -310,6 +318,8 @@ impl Database {
|
|||
ocr_status: row.get("ocr_status"),
|
||||
ocr_error: row.get("ocr_error"),
|
||||
ocr_completed_at: row.get("ocr_completed_at"),
|
||||
ocr_retry_count: row.get("ocr_retry_count"),
|
||||
ocr_failure_reason: row.get("ocr_failure_reason"),
|
||||
tags: row.get("tags"),
|
||||
created_at: row.get("created_at"),
|
||||
updated_at: row.get("updated_at"),
|
||||
|
|
@ -327,7 +337,7 @@ impl Database {
|
|||
pub async fn find_documents_by_filename(&self, filename: &str) -> Result<Vec<Document>> {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE filename = $1 OR original_filename = $1
|
||||
ORDER BY created_at DESC
|
||||
|
|
@ -354,6 +364,8 @@ impl Database {
|
|||
ocr_status: row.get("ocr_status"),
|
||||
ocr_error: row.get("ocr_error"),
|
||||
ocr_completed_at: row.get("ocr_completed_at"),
|
||||
ocr_retry_count: row.get("ocr_retry_count"),
|
||||
ocr_failure_reason: row.get("ocr_failure_reason"),
|
||||
tags: row.get("tags"),
|
||||
created_at: row.get("created_at"),
|
||||
updated_at: row.get("updated_at"),
|
||||
|
|
@ -371,7 +383,7 @@ impl Database {
|
|||
pub async fn search_documents(&self, user_id: Uuid, search: SearchRequest) -> Result<(Vec<Document>, i64)> {
|
||||
let mut query_builder = QueryBuilder::new(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
|
||||
ts_rank(to_tsvector('english', COALESCE(content, '') || ' ' || COALESCE(ocr_text, '')), plainto_tsquery('english', "#
|
||||
);
|
||||
|
||||
|
|
@ -428,6 +440,8 @@ impl Database {
|
|||
ocr_status: row.get("ocr_status"),
|
||||
ocr_error: row.get("ocr_error"),
|
||||
ocr_completed_at: row.get("ocr_completed_at"),
|
||||
ocr_retry_count: row.get("ocr_retry_count"),
|
||||
ocr_failure_reason: row.get("ocr_failure_reason"),
|
||||
tags: row.get("tags"),
|
||||
created_at: row.get("created_at"),
|
||||
updated_at: row.get("updated_at"),
|
||||
|
|
@ -477,7 +491,7 @@ impl Database {
|
|||
// Use trigram similarity for substring matching
|
||||
let mut builder = QueryBuilder::new(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
|
||||
GREATEST(
|
||||
similarity(filename, "#
|
||||
);
|
||||
|
|
@ -520,7 +534,7 @@ impl Database {
|
|||
|
||||
let mut builder = QueryBuilder::new(&format!(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
|
||||
GREATEST(
|
||||
CASE WHEN filename ILIKE '%' || "#
|
||||
));
|
||||
|
|
@ -666,7 +680,7 @@ impl Database {
|
|||
// Use trigram similarity for substring matching
|
||||
let mut builder = QueryBuilder::new(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
|
||||
GREATEST(
|
||||
similarity(filename, "#
|
||||
);
|
||||
|
|
@ -705,7 +719,7 @@ impl Database {
|
|||
|
||||
let mut builder = QueryBuilder::new(&format!(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
|
||||
GREATEST(
|
||||
CASE WHEN filename ILIKE '%' || "#
|
||||
));
|
||||
|
|
@ -982,7 +996,7 @@ impl Database {
|
|||
|
||||
pub async fn get_recent_documents_for_source(&self, source_id: Uuid, limit: i64) -> Result<Vec<Document>> {
|
||||
let rows = sqlx::query(
|
||||
r#"SELECT * FROM documents
|
||||
r#"SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata FROM documents
|
||||
WHERE source_id = $1
|
||||
ORDER BY created_at DESC
|
||||
LIMIT $2"#
|
||||
|
|
@ -1009,6 +1023,8 @@ impl Database {
|
|||
ocr_status: row.get("ocr_status"),
|
||||
ocr_error: row.get("ocr_error"),
|
||||
ocr_completed_at: row.get("ocr_completed_at"),
|
||||
ocr_retry_count: row.get("ocr_retry_count"),
|
||||
ocr_failure_reason: row.get("ocr_failure_reason"),
|
||||
tags: row.get("tags"),
|
||||
created_at: row.get("created_at"),
|
||||
updated_at: row.get("updated_at"),
|
||||
|
|
@ -1105,14 +1121,14 @@ impl Database {
|
|||
let query = if user_role == crate::models::UserRole::Admin {
|
||||
// Admins can see any document
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE id = $1
|
||||
"#
|
||||
} else {
|
||||
// Regular users can only see their own documents
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE id = $1 AND user_id = $2
|
||||
"#
|
||||
|
|
@ -1147,6 +1163,8 @@ impl Database {
|
|||
ocr_status: row.get("ocr_status"),
|
||||
ocr_error: row.get("ocr_error"),
|
||||
ocr_completed_at: row.get("ocr_completed_at"),
|
||||
ocr_retry_count: row.get("ocr_retry_count"),
|
||||
ocr_failure_reason: row.get("ocr_failure_reason"),
|
||||
tags: row.get("tags"),
|
||||
created_at: row.get("created_at"),
|
||||
updated_at: row.get("updated_at"),
|
||||
|
|
@ -1164,7 +1182,7 @@ impl Database {
|
|||
pub async fn get_document_by_user_and_hash(&self, user_id: Uuid, file_hash: &str) -> Result<Option<Document>> {
|
||||
let row = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE user_id = $1 AND file_hash = $2
|
||||
LIMIT 1
|
||||
|
|
@ -1191,6 +1209,8 @@ impl Database {
|
|||
ocr_status: row.get("ocr_status"),
|
||||
ocr_error: row.get("ocr_error"),
|
||||
ocr_completed_at: row.get("ocr_completed_at"),
|
||||
ocr_retry_count: row.get("ocr_retry_count"),
|
||||
ocr_failure_reason: row.get("ocr_failure_reason"),
|
||||
tags: row.get("tags"),
|
||||
created_at: row.get("created_at"),
|
||||
updated_at: row.get("updated_at"),
|
||||
|
|
@ -1418,6 +1438,8 @@ impl Database {
|
|||
ocr_status: r.get("ocr_status"),
|
||||
ocr_error: r.get("ocr_error"),
|
||||
ocr_completed_at: r.get("ocr_completed_at"),
|
||||
ocr_retry_count: r.get("ocr_retry_count"),
|
||||
ocr_failure_reason: r.get("ocr_failure_reason"),
|
||||
tags: r.get("tags"),
|
||||
created_at: r.get("created_at"),
|
||||
updated_at: r.get("updated_at"),
|
||||
|
|
@ -1455,6 +1477,8 @@ impl Database {
|
|||
ocr_status: r.get("ocr_status"),
|
||||
ocr_error: r.get("ocr_error"),
|
||||
ocr_completed_at: r.get("ocr_completed_at"),
|
||||
ocr_retry_count: r.get("ocr_retry_count"),
|
||||
ocr_failure_reason: r.get("ocr_failure_reason"),
|
||||
tags: r.get("tags"),
|
||||
created_at: r.get("created_at"),
|
||||
updated_at: r.get("updated_at"),
|
||||
|
|
@ -1501,6 +1525,8 @@ impl Database {
|
|||
ocr_status: r.get("ocr_status"),
|
||||
ocr_error: r.get("ocr_error"),
|
||||
ocr_completed_at: r.get("ocr_completed_at"),
|
||||
ocr_retry_count: r.get("ocr_retry_count"),
|
||||
ocr_failure_reason: r.get("ocr_failure_reason"),
|
||||
tags: r.get("tags"),
|
||||
created_at: r.get("created_at"),
|
||||
updated_at: r.get("updated_at"),
|
||||
|
|
@ -1538,6 +1564,8 @@ impl Database {
|
|||
ocr_status: r.get("ocr_status"),
|
||||
ocr_error: r.get("ocr_error"),
|
||||
ocr_completed_at: r.get("ocr_completed_at"),
|
||||
ocr_retry_count: r.get("ocr_retry_count"),
|
||||
ocr_failure_reason: r.get("ocr_failure_reason"),
|
||||
tags: r.get("tags"),
|
||||
created_at: r.get("created_at"),
|
||||
updated_at: r.get("updated_at"),
|
||||
|
|
@ -1557,7 +1585,7 @@ impl Database {
|
|||
let documents = if user_role == crate::models::UserRole::Admin {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE ocr_confidence IS NOT NULL AND ocr_confidence < $1
|
||||
ORDER BY ocr_confidence ASC, created_at DESC
|
||||
|
|
@ -1582,6 +1610,8 @@ impl Database {
|
|||
ocr_status: r.get("ocr_status"),
|
||||
ocr_error: r.get("ocr_error"),
|
||||
ocr_completed_at: r.get("ocr_completed_at"),
|
||||
ocr_retry_count: r.get("ocr_retry_count"),
|
||||
ocr_failure_reason: r.get("ocr_failure_reason"),
|
||||
tags: r.get("tags"),
|
||||
created_at: r.get("created_at"),
|
||||
updated_at: r.get("updated_at"),
|
||||
|
|
@ -1594,7 +1624,7 @@ impl Database {
|
|||
} else {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE ocr_confidence IS NOT NULL AND ocr_confidence < $1 AND user_id = $2
|
||||
ORDER BY ocr_confidence ASC, created_at DESC
|
||||
|
|
@ -1620,6 +1650,8 @@ impl Database {
|
|||
ocr_status: r.get("ocr_status"),
|
||||
ocr_error: r.get("ocr_error"),
|
||||
ocr_completed_at: r.get("ocr_completed_at"),
|
||||
ocr_retry_count: r.get("ocr_retry_count"),
|
||||
ocr_failure_reason: r.get("ocr_failure_reason"),
|
||||
tags: r.get("tags"),
|
||||
created_at: r.get("created_at"),
|
||||
updated_at: r.get("updated_at"),
|
||||
|
|
@ -1639,7 +1671,7 @@ impl Database {
|
|||
let documents = if user_role == crate::models::UserRole::Admin {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE ocr_status = 'failed' OR (ocr_confidence IS NULL AND ocr_status != 'pending' AND ocr_status != 'processing')
|
||||
ORDER BY created_at DESC
|
||||
|
|
@ -1663,6 +1695,8 @@ impl Database {
|
|||
ocr_status: r.get("ocr_status"),
|
||||
ocr_error: r.get("ocr_error"),
|
||||
ocr_completed_at: r.get("ocr_completed_at"),
|
||||
ocr_retry_count: r.get("ocr_retry_count"),
|
||||
ocr_failure_reason: r.get("ocr_failure_reason"),
|
||||
tags: r.get("tags"),
|
||||
created_at: r.get("created_at"),
|
||||
updated_at: r.get("updated_at"),
|
||||
|
|
@ -1675,7 +1709,7 @@ impl Database {
|
|||
} else {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE (ocr_status = 'failed' OR (ocr_confidence IS NULL AND ocr_status != 'pending' AND ocr_status != 'processing')) AND user_id = $1
|
||||
ORDER BY created_at DESC
|
||||
|
|
@ -1700,6 +1734,8 @@ impl Database {
|
|||
ocr_status: r.get("ocr_status"),
|
||||
ocr_error: r.get("ocr_error"),
|
||||
ocr_completed_at: r.get("ocr_completed_at"),
|
||||
ocr_retry_count: r.get("ocr_retry_count"),
|
||||
ocr_failure_reason: r.get("ocr_failure_reason"),
|
||||
tags: r.get("tags"),
|
||||
created_at: r.get("created_at"),
|
||||
updated_at: r.get("updated_at"),
|
||||
|
|
@ -1719,7 +1755,7 @@ impl Database {
|
|||
let documents = if user_role == crate::models::UserRole::Admin {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE (ocr_confidence IS NOT NULL AND ocr_confidence < $1)
|
||||
OR ocr_status = 'failed'
|
||||
|
|
@ -1747,6 +1783,8 @@ impl Database {
|
|||
ocr_status: r.get("ocr_status"),
|
||||
ocr_error: r.get("ocr_error"),
|
||||
ocr_completed_at: r.get("ocr_completed_at"),
|
||||
ocr_retry_count: r.get("ocr_retry_count"),
|
||||
ocr_failure_reason: r.get("ocr_failure_reason"),
|
||||
tags: r.get("tags"),
|
||||
created_at: r.get("created_at"),
|
||||
updated_at: r.get("updated_at"),
|
||||
|
|
@ -1759,7 +1797,7 @@ impl Database {
|
|||
} else {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE ((ocr_confidence IS NOT NULL AND ocr_confidence < $1)
|
||||
OR ocr_status = 'failed')
|
||||
|
|
@ -1789,6 +1827,8 @@ impl Database {
|
|||
ocr_status: r.get("ocr_status"),
|
||||
ocr_error: r.get("ocr_error"),
|
||||
ocr_completed_at: r.get("ocr_completed_at"),
|
||||
ocr_retry_count: r.get("ocr_retry_count"),
|
||||
ocr_failure_reason: r.get("ocr_failure_reason"),
|
||||
tags: r.get("tags"),
|
||||
created_at: r.get("created_at"),
|
||||
updated_at: r.get("updated_at"),
|
||||
|
|
|
|||
|
|
@ -27,7 +27,21 @@ pub async fn record_ocr_retry(
|
|||
priority: i32,
|
||||
queue_id: Option<Uuid>,
|
||||
) -> Result<Uuid> {
|
||||
crate::debug_log!("OCR_RETRY_HISTORY",
|
||||
"document_id" => document_id,
|
||||
"user_id" => user_id,
|
||||
"retry_reason" => retry_reason,
|
||||
"priority" => priority,
|
||||
"queue_id" => queue_id.unwrap_or_default(),
|
||||
"message" => "Recording OCR retry attempt"
|
||||
);
|
||||
|
||||
// First get the current OCR status
|
||||
crate::debug_log!("OCR_RETRY_HISTORY",
|
||||
"document_id" => document_id,
|
||||
"message" => "Fetching current OCR status"
|
||||
);
|
||||
|
||||
let current_status = sqlx::query(
|
||||
r#"
|
||||
SELECT ocr_status, ocr_failure_reason, ocr_error
|
||||
|
|
@ -37,19 +51,38 @@ pub async fn record_ocr_retry(
|
|||
)
|
||||
.bind(document_id)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
.await
|
||||
.map_err(|e| {
|
||||
crate::debug_error!("OCR_RETRY_HISTORY", format!("Failed to fetch current status for document {}: {}", document_id, e));
|
||||
e
|
||||
})?;
|
||||
|
||||
let (previous_status, previous_failure_reason, previous_error) = if let Some(row) = current_status {
|
||||
(
|
||||
row.get::<Option<String>, _>("ocr_status"),
|
||||
row.get::<Option<String>, _>("ocr_failure_reason"),
|
||||
row.get::<Option<String>, _>("ocr_error"),
|
||||
)
|
||||
let status = row.get::<Option<String>, _>("ocr_status");
|
||||
let failure = row.get::<Option<String>, _>("ocr_failure_reason");
|
||||
let error = row.get::<Option<String>, _>("ocr_error");
|
||||
|
||||
crate::debug_log!("OCR_RETRY_HISTORY",
|
||||
"document_id" => document_id,
|
||||
"status" => status.as_deref().unwrap_or("none"),
|
||||
"failure_reason" => failure.as_deref().unwrap_or("none"),
|
||||
"has_error" => error.is_some(),
|
||||
"message" => "Found current document status"
|
||||
);
|
||||
|
||||
(status, failure, error)
|
||||
} else {
|
||||
crate::debug_warn!("OCR_RETRY_HISTORY", "Document not found when recording retry history");
|
||||
(None, None, None)
|
||||
};
|
||||
|
||||
// Insert retry history record
|
||||
crate::debug_log!("OCR_RETRY_HISTORY",
|
||||
"document_id" => document_id,
|
||||
"previous_status" => previous_status.as_deref().unwrap_or("none"),
|
||||
"message" => "Inserting retry history record"
|
||||
);
|
||||
|
||||
let retry_id: Uuid = sqlx::query_scalar(
|
||||
r#"
|
||||
INSERT INTO ocr_retry_history (
|
||||
|
|
@ -63,15 +96,25 @@ pub async fn record_ocr_retry(
|
|||
.bind(document_id)
|
||||
.bind(user_id)
|
||||
.bind(retry_reason)
|
||||
.bind(previous_status)
|
||||
.bind(previous_failure_reason)
|
||||
.bind(previous_error)
|
||||
.bind(&previous_status)
|
||||
.bind(&previous_failure_reason)
|
||||
.bind(&previous_error)
|
||||
.bind(priority)
|
||||
.bind(queue_id)
|
||||
.fetch_one(pool)
|
||||
.await?;
|
||||
.await
|
||||
.map_err(|e| {
|
||||
crate::debug_error!("OCR_RETRY_HISTORY", format!("Failed to insert retry history for document {}: {}", document_id, e));
|
||||
e
|
||||
})?;
|
||||
|
||||
// Increment retry count
|
||||
crate::debug_log!("OCR_RETRY_HISTORY",
|
||||
"document_id" => document_id,
|
||||
"retry_id" => retry_id,
|
||||
"message" => "Incrementing retry count"
|
||||
);
|
||||
|
||||
sqlx::query(
|
||||
r#"
|
||||
UPDATE documents
|
||||
|
|
@ -82,7 +125,18 @@ pub async fn record_ocr_retry(
|
|||
)
|
||||
.bind(document_id)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
.await
|
||||
.map_err(|e| {
|
||||
crate::debug_error!("OCR_RETRY_HISTORY", format!("Failed to increment retry count for document {}: {}", document_id, e));
|
||||
e
|
||||
})?;
|
||||
|
||||
crate::debug_log!("OCR_RETRY_HISTORY",
|
||||
"document_id" => document_id,
|
||||
"retry_id" => retry_id,
|
||||
"user_id" => user_id,
|
||||
"message" => "Successfully recorded retry history"
|
||||
);
|
||||
|
||||
Ok(retry_id)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ pub mod scheduling;
|
|||
pub mod seed;
|
||||
pub mod services;
|
||||
pub mod swagger;
|
||||
pub mod utils;
|
||||
pub mod webdav_xml_parser;
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
|||
|
|
@ -129,6 +129,8 @@ pub struct Document {
|
|||
pub ocr_status: Option<String>,
|
||||
pub ocr_error: Option<String>,
|
||||
pub ocr_completed_at: Option<DateTime<Utc>>,
|
||||
pub ocr_retry_count: Option<i32>,
|
||||
pub ocr_failure_reason: Option<String>,
|
||||
pub tags: Vec<String>,
|
||||
pub created_at: DateTime<Utc>,
|
||||
pub updated_at: DateTime<Utc>,
|
||||
|
|
|
|||
|
|
@ -75,6 +75,13 @@ impl OcrQueueService {
|
|||
|
||||
/// Add a document to the OCR queue
|
||||
pub async fn enqueue_document(&self, document_id: Uuid, priority: i32, file_size: i64) -> Result<Uuid> {
|
||||
crate::debug_log!("OCR_QUEUE",
|
||||
"document_id" => document_id,
|
||||
"priority" => priority,
|
||||
"file_size" => file_size,
|
||||
"message" => "Enqueueing document"
|
||||
);
|
||||
|
||||
let row = sqlx::query(
|
||||
r#"
|
||||
INSERT INTO ocr_queue (document_id, priority, file_size)
|
||||
|
|
@ -86,10 +93,22 @@ impl OcrQueueService {
|
|||
.bind(priority)
|
||||
.bind(file_size)
|
||||
.fetch_one(&self.pool)
|
||||
.await?;
|
||||
.await
|
||||
.map_err(|e| {
|
||||
crate::debug_error!("OCR_QUEUE", format!("Failed to insert document {} into queue: {}", document_id, e));
|
||||
e
|
||||
})?;
|
||||
|
||||
let id: Uuid = row.get("id");
|
||||
|
||||
crate::debug_log!("OCR_QUEUE",
|
||||
"document_id" => document_id,
|
||||
"queue_id" => id,
|
||||
"priority" => priority,
|
||||
"file_size" => file_size,
|
||||
"message" => "Successfully enqueued document"
|
||||
);
|
||||
|
||||
info!("Enqueued document {} with priority {} for OCR processing", document_id, priority);
|
||||
Ok(id)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -571,28 +571,57 @@ async fn retry_ocr(
|
|||
auth_user: AuthUser,
|
||||
Path(document_id): Path<uuid::Uuid>,
|
||||
) -> Result<Json<serde_json::Value>, StatusCode> {
|
||||
crate::debug_log!("OCR_RETRY",
|
||||
"document_id" => document_id,
|
||||
"user_id" => auth_user.user.id,
|
||||
"message" => "Starting OCR retry request"
|
||||
);
|
||||
|
||||
// Check if document exists and belongs to user
|
||||
let document = state
|
||||
.db
|
||||
.get_document_by_id(document_id, auth_user.user.id, auth_user.user.role)
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
|
||||
.ok_or(StatusCode::NOT_FOUND)?;
|
||||
.map_err(|e| {
|
||||
crate::debug_error!("OCR_RETRY", format!("Failed to get document {}: {}", document_id, e));
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?
|
||||
.ok_or_else(|| {
|
||||
crate::debug_log!("OCR_RETRY", &format!("Document {} not found or access denied for user {}", document_id, auth_user.user.id));
|
||||
StatusCode::NOT_FOUND
|
||||
})?;
|
||||
|
||||
// Check if document is eligible for OCR retry (failed or not processed)
|
||||
let current_status = document.ocr_status.as_deref().unwrap_or("unknown");
|
||||
let eligible = document.ocr_status.as_ref().map_or(true, |status| {
|
||||
status == "failed" || status == "pending"
|
||||
});
|
||||
|
||||
crate::debug_log!("OCR_RETRY",
|
||||
"document_id" => document_id,
|
||||
"filename" => &document.filename,
|
||||
"current_status" => current_status,
|
||||
"eligible" => eligible,
|
||||
"file_size" => document.file_size,
|
||||
"retry_count" => document.ocr_retry_count.unwrap_or(0),
|
||||
"message" => "Checking document eligibility"
|
||||
);
|
||||
|
||||
if !eligible {
|
||||
crate::debug_log!("OCR_RETRY", &format!("Document {} is not eligible for retry - current status: {}", document_id, current_status));
|
||||
return Ok(Json(serde_json::json!({
|
||||
"success": false,
|
||||
"message": "Document is not eligible for OCR retry. Current status: {}",
|
||||
"message": format!("Document is not eligible for OCR retry. Current status: {}", current_status),
|
||||
"current_status": document.ocr_status
|
||||
})));
|
||||
}
|
||||
|
||||
// Reset document OCR fields
|
||||
crate::debug_log!("OCR_RETRY",
|
||||
"document_id" => document_id,
|
||||
"message" => "Resetting document OCR fields"
|
||||
);
|
||||
|
||||
let reset_result = sqlx::query(
|
||||
r#"
|
||||
UPDATE documents
|
||||
|
|
@ -611,12 +640,22 @@ async fn retry_ocr(
|
|||
.bind(document_id)
|
||||
.execute(state.db.get_pool())
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
.map_err(|e| {
|
||||
crate::debug_error!("OCR_RETRY", format!("Failed to reset OCR fields for document {}: {}", document_id, e));
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
if reset_result.rows_affected() == 0 {
|
||||
crate::debug_error!("OCR_RETRY", format!("No rows affected when resetting OCR fields for document {}", document_id));
|
||||
return Err(StatusCode::NOT_FOUND);
|
||||
}
|
||||
|
||||
crate::debug_log!("OCR_RETRY",
|
||||
"document_id" => document_id,
|
||||
"rows_affected" => reset_result.rows_affected(),
|
||||
"message" => "Successfully reset OCR fields"
|
||||
);
|
||||
|
||||
// Calculate priority based on file size (higher priority for retries)
|
||||
let priority = match document.file_size {
|
||||
0..=1048576 => 15, // <= 1MB: highest priority (boosted for retry)
|
||||
|
|
@ -626,10 +665,38 @@ async fn retry_ocr(
|
|||
_ => 6, // > 50MB: lowest priority
|
||||
};
|
||||
|
||||
crate::debug_log!("OCR_RETRY",
|
||||
"document_id" => document_id,
|
||||
"file_size" => document.file_size,
|
||||
"priority" => priority,
|
||||
"message" => "Calculated retry priority"
|
||||
);
|
||||
|
||||
// Add to OCR queue with detailed logging
|
||||
crate::debug_log!("OCR_RETRY",
|
||||
"document_id" => document_id,
|
||||
"priority" => priority,
|
||||
"file_size" => document.file_size,
|
||||
"message" => "Enqueueing document for OCR processing"
|
||||
);
|
||||
|
||||
match state.queue_service.enqueue_document(document_id, priority, document.file_size).await {
|
||||
Ok(queue_id) => {
|
||||
crate::debug_log!("OCR_RETRY",
|
||||
"document_id" => document_id,
|
||||
"queue_id" => queue_id,
|
||||
"priority" => priority,
|
||||
"message" => "Successfully enqueued document"
|
||||
);
|
||||
|
||||
// Record retry history
|
||||
crate::debug_log!("OCR_RETRY",
|
||||
"document_id" => document_id,
|
||||
"user_id" => auth_user.user.id,
|
||||
"queue_id" => queue_id,
|
||||
"message" => "Recording retry history"
|
||||
);
|
||||
|
||||
if let Err(e) = crate::db::ocr_retry::record_ocr_retry(
|
||||
state.db.get_pool(),
|
||||
document_id,
|
||||
|
|
@ -638,9 +705,25 @@ async fn retry_ocr(
|
|||
priority,
|
||||
Some(queue_id),
|
||||
).await {
|
||||
crate::debug_error!("OCR_RETRY", format!("Failed to record retry history for document {}: {}", document_id, e));
|
||||
tracing::warn!("Failed to record retry history for document {}: {}", document_id, e);
|
||||
} else {
|
||||
crate::debug_log!("OCR_RETRY",
|
||||
"document_id" => document_id,
|
||||
"queue_id" => queue_id,
|
||||
"message" => "Successfully recorded retry history"
|
||||
);
|
||||
}
|
||||
|
||||
crate::debug_log!("OCR_RETRY",
|
||||
"document_id" => document_id,
|
||||
"filename" => &document.filename,
|
||||
"queue_id" => queue_id,
|
||||
"priority" => priority,
|
||||
"file_size" => document.file_size,
|
||||
"message" => "OCR retry process completed successfully"
|
||||
);
|
||||
|
||||
tracing::info!(
|
||||
"OCR retry queued for document {} ({}): queue_id={}, priority={}, size={}",
|
||||
document_id, document.filename, queue_id, priority, document.file_size
|
||||
|
|
@ -656,6 +739,7 @@ async fn retry_ocr(
|
|||
})))
|
||||
}
|
||||
Err(e) => {
|
||||
crate::debug_error!("OCR_RETRY", format!("Failed to enqueue document {}: {}", document_id, e));
|
||||
tracing::error!("Failed to queue OCR retry for document {}: {}", document_id, e);
|
||||
Err(StatusCode::INTERNAL_SERVER_ERROR)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -101,62 +101,120 @@ pub async fn bulk_retry_ocr(
|
|||
auth_user: AuthUser,
|
||||
Json(request): Json<BulkOcrRetryRequest>,
|
||||
) -> Result<Json<BulkOcrRetryResponse>, StatusCode> {
|
||||
crate::debug_log!("BULK_OCR_RETRY",
|
||||
"user_id" => auth_user.user.id,
|
||||
"mode" => format!("{:?}", request.mode),
|
||||
"preview_only" => request.preview_only.unwrap_or(false),
|
||||
"priority_override" => request.priority_override.unwrap_or(-1),
|
||||
"message" => "Starting bulk OCR retry request"
|
||||
);
|
||||
|
||||
info!("Bulk OCR retry requested by user {} with mode: {:?}", auth_user.user.id, request.mode);
|
||||
|
||||
let preview_only = request.preview_only.unwrap_or(false);
|
||||
|
||||
// Build query based on selection mode
|
||||
crate::debug_log!("BULK_OCR_RETRY", "Building document query based on selection mode");
|
||||
|
||||
let documents = match request.mode {
|
||||
SelectionMode::All => {
|
||||
crate::debug_log!("BULK_OCR_RETRY", "Fetching all failed OCR documents");
|
||||
get_all_failed_ocr_documents(&state, &auth_user).await?
|
||||
}
|
||||
SelectionMode::Specific => {
|
||||
if let Some(ids) = request.document_ids {
|
||||
get_specific_documents(&state, &auth_user, ids).await?
|
||||
if let Some(ids) = &request.document_ids {
|
||||
crate::debug_log!("BULK_OCR_RETRY",
|
||||
"document_count" => ids.len(),
|
||||
"message" => "Fetching specific documents"
|
||||
);
|
||||
get_specific_documents(&state, &auth_user, ids.clone()).await?
|
||||
} else {
|
||||
crate::debug_error!("BULK_OCR_RETRY", "Specific mode requested but no document IDs provided");
|
||||
return Err(StatusCode::BAD_REQUEST);
|
||||
}
|
||||
}
|
||||
SelectionMode::Filter => {
|
||||
if let Some(filter) = request.filter {
|
||||
get_filtered_documents(&state, &auth_user, filter).await?
|
||||
if let Some(filter) = &request.filter {
|
||||
crate::debug_log!("BULK_OCR_RETRY",
|
||||
"filter_mime_types" => filter.mime_types.as_ref().map(|v| v.len()).unwrap_or(0),
|
||||
"filter_failure_reasons" => filter.failure_reasons.as_ref().map(|v| v.len()).unwrap_or(0),
|
||||
"message" => "Fetching filtered documents"
|
||||
);
|
||||
get_filtered_documents(&state, &auth_user, filter.clone()).await?
|
||||
} else {
|
||||
crate::debug_error!("BULK_OCR_RETRY", "Filter mode requested but no filter provided");
|
||||
return Err(StatusCode::BAD_REQUEST);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let matched_count = documents.len();
|
||||
crate::debug_log!("BULK_OCR_RETRY",
|
||||
"matched_count" => matched_count,
|
||||
"message" => "Document query completed"
|
||||
);
|
||||
let mut retry_documents = Vec::new();
|
||||
let mut queued_count = 0;
|
||||
let mut total_estimated_time = 0.0;
|
||||
|
||||
for doc in documents {
|
||||
for (index, doc) in documents.iter().enumerate() {
|
||||
let priority = calculate_priority(doc.file_size, request.priority_override);
|
||||
|
||||
crate::debug_log!("BULK_OCR_RETRY",
|
||||
"index" => index + 1,
|
||||
"total" => matched_count,
|
||||
"document_id" => doc.id,
|
||||
"filename" => &doc.filename,
|
||||
"file_size" => doc.file_size,
|
||||
"priority" => priority,
|
||||
"failure_reason" => doc.ocr_failure_reason.as_deref().unwrap_or("none"),
|
||||
"message" => "Processing document"
|
||||
);
|
||||
|
||||
let mut doc_info = OcrRetryDocumentInfo {
|
||||
id: doc.id,
|
||||
filename: doc.filename.clone(),
|
||||
file_size: doc.file_size,
|
||||
mime_type: doc.mime_type,
|
||||
ocr_failure_reason: doc.ocr_failure_reason,
|
||||
mime_type: doc.mime_type.clone(),
|
||||
ocr_failure_reason: doc.ocr_failure_reason.clone(),
|
||||
priority,
|
||||
queue_id: None,
|
||||
};
|
||||
|
||||
if !preview_only {
|
||||
// Reset OCR fields
|
||||
crate::debug_log!("BULK_OCR_RETRY",
|
||||
"document_id" => doc.id,
|
||||
"message" => "Resetting OCR status for document"
|
||||
);
|
||||
|
||||
if let Err(e) = reset_document_ocr_status(&state, doc.id).await {
|
||||
crate::debug_error!("BULK_OCR_RETRY", format!("Failed to reset OCR status for document {}: {}", doc.id, e));
|
||||
warn!("Failed to reset OCR status for document {}: {}", doc.id, e);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Queue for OCR
|
||||
crate::debug_log!("BULK_OCR_RETRY",
|
||||
"document_id" => doc.id,
|
||||
"priority" => priority,
|
||||
"file_size" => doc.file_size,
|
||||
"message" => "Enqueueing document for OCR"
|
||||
);
|
||||
|
||||
match state.queue_service.enqueue_document(doc.id, priority, doc.file_size).await {
|
||||
Ok(queue_id) => {
|
||||
doc_info.queue_id = Some(queue_id);
|
||||
queued_count += 1;
|
||||
|
||||
crate::debug_log!("BULK_OCR_RETRY",
|
||||
"document_id" => doc.id,
|
||||
"queue_id" => queue_id,
|
||||
"priority" => priority,
|
||||
"queued_count" => queued_count,
|
||||
"message" => "Successfully enqueued document"
|
||||
);
|
||||
|
||||
// Record retry history
|
||||
let retry_reason = match &request.mode {
|
||||
SelectionMode::All => "bulk_retry_all",
|
||||
|
|
@ -164,6 +222,13 @@ pub async fn bulk_retry_ocr(
|
|||
SelectionMode::Filter => "bulk_retry_filtered",
|
||||
};
|
||||
|
||||
crate::debug_log!("BULK_OCR_RETRY",
|
||||
"document_id" => doc.id,
|
||||
"retry_reason" => retry_reason,
|
||||
"queue_id" => queue_id,
|
||||
"message" => "Recording retry history"
|
||||
);
|
||||
|
||||
if let Err(e) = crate::db::ocr_retry::record_ocr_retry(
|
||||
state.db.get_pool(),
|
||||
doc.id,
|
||||
|
|
@ -172,12 +237,20 @@ pub async fn bulk_retry_ocr(
|
|||
priority,
|
||||
Some(queue_id),
|
||||
).await {
|
||||
crate::debug_error!("BULK_OCR_RETRY", format!("Failed to record retry history for document {}: {}", doc.id, e));
|
||||
warn!("Failed to record retry history for document {}: {}", doc.id, e);
|
||||
} else {
|
||||
crate::debug_log!("BULK_OCR_RETRY",
|
||||
"document_id" => doc.id,
|
||||
"queue_id" => queue_id,
|
||||
"message" => "Successfully recorded retry history"
|
||||
);
|
||||
}
|
||||
|
||||
info!("Queued document {} for OCR retry with priority {}", doc.id, priority);
|
||||
}
|
||||
Err(e) => {
|
||||
crate::debug_error!("BULK_OCR_RETRY", format!("Failed to enqueue document {}: {}", doc.id, e));
|
||||
error!("Failed to queue document {} for OCR retry: {}", doc.id, e);
|
||||
}
|
||||
}
|
||||
|
|
@ -188,6 +261,15 @@ pub async fn bulk_retry_ocr(
|
|||
retry_documents.push(doc_info);
|
||||
}
|
||||
|
||||
crate::debug_log!("BULK_OCR_RETRY",
|
||||
"matched_count" => matched_count,
|
||||
"queued_count" => queued_count,
|
||||
"preview_only" => preview_only,
|
||||
"estimated_time_minutes" => (total_estimated_time / 60.0) as i32,
|
||||
"user_id" => auth_user.user.id,
|
||||
"message" => "Bulk retry operation completed"
|
||||
);
|
||||
|
||||
let response = BulkOcrRetryResponse {
|
||||
success: true,
|
||||
message: if preview_only {
|
||||
|
|
@ -585,6 +667,7 @@ async fn reset_document_ocr_status(state: &Arc<AppState>, document_id: Uuid) ->
|
|||
ocr_text = NULL,
|
||||
ocr_error = NULL,
|
||||
ocr_failure_reason = NULL,
|
||||
ocr_retry_count = NULL,
|
||||
ocr_confidence = NULL,
|
||||
ocr_word_count = NULL,
|
||||
ocr_processing_time_ms = NULL,
|
||||
|
|
|
|||
|
|
@ -177,6 +177,8 @@ impl FileService {
|
|||
ocr_status: Some("pending".to_string()),
|
||||
ocr_error: None,
|
||||
ocr_completed_at: None,
|
||||
ocr_retry_count: None,
|
||||
ocr_failure_reason: None,
|
||||
tags: Vec::new(),
|
||||
created_at: Utc::now(),
|
||||
updated_at: Utc::now(),
|
||||
|
|
|
|||
|
|
@ -0,0 +1,85 @@
|
|||
use std::env;
|
||||
use tracing::{debug, info, warn, error};
|
||||
|
||||
/// Check if DEBUG environment variable is set to enable verbose debug output
|
||||
pub fn is_debug_enabled() -> bool {
|
||||
env::var("DEBUG")
|
||||
.map(|val| !val.is_empty() && val != "0" && val.to_lowercase() != "false")
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
/// Log debug message only if DEBUG environment variable is set
|
||||
pub fn debug_log(message: &str) {
|
||||
if is_debug_enabled() {
|
||||
info!("🐛 DEBUG: {}", message);
|
||||
}
|
||||
}
|
||||
|
||||
/// Log debug message with context only if DEBUG environment variable is set
|
||||
pub fn debug_log_context(context: &str, message: &str) {
|
||||
if is_debug_enabled() {
|
||||
info!("🐛 DEBUG [{}]: {}", context, message);
|
||||
}
|
||||
}
|
||||
|
||||
/// Log debug message with structured data only if DEBUG environment variable is set
|
||||
pub fn debug_log_structured(context: &str, key_values: &[(&str, &dyn std::fmt::Display)]) {
|
||||
if is_debug_enabled() {
|
||||
let mut formatted = String::new();
|
||||
for (i, (key, value)) in key_values.iter().enumerate() {
|
||||
if i > 0 {
|
||||
formatted.push_str(", ");
|
||||
}
|
||||
formatted.push_str(&format!("{}={}", key, value));
|
||||
}
|
||||
info!("🐛 DEBUG [{}]: {}", context, formatted);
|
||||
}
|
||||
}
|
||||
|
||||
/// Log error with debug context
|
||||
pub fn debug_error(context: &str, error: &dyn std::fmt::Display) {
|
||||
if is_debug_enabled() {
|
||||
error!("🐛 DEBUG ERROR [{}]: {}", context, error);
|
||||
} else {
|
||||
error!("[{}]: {}", context, error);
|
||||
}
|
||||
}
|
||||
|
||||
/// Log warning with debug context
|
||||
pub fn debug_warn(context: &str, message: &str) {
|
||||
if is_debug_enabled() {
|
||||
warn!("🐛 DEBUG WARN [{}]: {}", context, message);
|
||||
} else {
|
||||
warn!("[{}]: {}", context, message);
|
||||
}
|
||||
}
|
||||
|
||||
/// Macro for easier debug logging with automatic context
|
||||
#[macro_export]
|
||||
macro_rules! debug_log {
|
||||
($msg:expr) => {
|
||||
crate::utils::debug::debug_log($msg)
|
||||
};
|
||||
($context:expr, $msg:expr) => {
|
||||
crate::utils::debug::debug_log_context($context, $msg)
|
||||
};
|
||||
($context:expr, $($key:expr => $value:expr),+ $(,)?) => {
|
||||
crate::utils::debug::debug_log_structured($context, &[$(($key, &$value)),+])
|
||||
};
|
||||
}
|
||||
|
||||
/// Macro for debug error logging
|
||||
#[macro_export]
|
||||
macro_rules! debug_error {
|
||||
($context:expr, $error:expr) => {
|
||||
crate::utils::debug::debug_error($context, &$error)
|
||||
};
|
||||
}
|
||||
|
||||
/// Macro for debug warning logging
|
||||
#[macro_export]
|
||||
macro_rules! debug_warn {
|
||||
($context:expr, $msg:expr) => {
|
||||
crate::utils::debug::debug_warn($context, $msg)
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1 @@
|
|||
pub mod debug;
|
||||
Loading…
Reference in New Issue