diff --git a/migrations/20240101000007_fix_ocr_queue_stats_types.sql b/migrations/20240101000007_fix_ocr_queue_stats_types.sql new file mode 100644 index 0000000..f3f1417 --- /dev/null +++ b/migrations/20240101000007_fix_ocr_queue_stats_types.sql @@ -0,0 +1,24 @@ +-- Fix type mismatch in get_ocr_queue_stats function +-- The AVG() and MAX() functions return NUMERIC but we need DOUBLE PRECISION + +CREATE OR REPLACE FUNCTION get_ocr_queue_stats() +RETURNS TABLE ( + pending_count BIGINT, + processing_count BIGINT, + failed_count BIGINT, + completed_today BIGINT, + avg_wait_time_minutes DOUBLE PRECISION, + oldest_pending_minutes DOUBLE PRECISION +) AS $$ +BEGIN + RETURN QUERY + SELECT + COUNT(*) FILTER (WHERE status = 'pending') as pending_count, + COUNT(*) FILTER (WHERE status = 'processing') as processing_count, + COUNT(*) FILTER (WHERE status = 'failed' AND attempts >= max_attempts) as failed_count, + COUNT(*) FILTER (WHERE status = 'completed' AND completed_at >= CURRENT_DATE) as completed_today, + CAST(AVG(EXTRACT(EPOCH FROM (COALESCE(started_at, NOW()) - created_at))/60) FILTER (WHERE status IN ('processing', 'completed')) AS DOUBLE PRECISION) as avg_wait_time_minutes, + CAST(MAX(EXTRACT(EPOCH FROM (NOW() - created_at))/60) FILTER (WHERE status = 'pending') AS DOUBLE PRECISION) as oldest_pending_minutes + FROM ocr_queue; +END; +$$ LANGUAGE plpgsql; \ No newline at end of file diff --git a/src/routes/documents.rs b/src/routes/documents.rs index c4376b2..bd76133 100644 --- a/src/routes/documents.rs +++ b/src/routes/documents.rs @@ -28,6 +28,7 @@ pub fn router() -> Router> { .route("/", post(upload_document)) .route("/", get(list_documents)) .route("/:id/download", get(download_document)) + .route("/:id/ocr", get(get_document_ocr)) } #[utoipa::path( @@ -209,4 +210,52 @@ async fn download_document( .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; Ok(file_data) +} + +#[utoipa::path( + get, + path = "/api/documents/{id}/ocr", + tag = "documents", + security( + ("bearer_auth" = []) + ), + params( + ("id" = uuid::Uuid, Path, description = "Document ID") + ), + responses( + (status = 200, description = "OCR extracted text and metadata", body = String), + (status = 404, description = "Document not found"), + (status = 401, description = "Unauthorized"), + (status = 500, description = "Internal server error") + ) +)] +async fn get_document_ocr( + State(state): State>, + auth_user: AuthUser, + Path(document_id): Path, +) -> Result, StatusCode> { + let documents = state + .db + .get_documents_by_user(auth_user.user.id, 1000, 0) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + let document = documents + .into_iter() + .find(|doc| doc.id == document_id) + .ok_or(StatusCode::NOT_FOUND)?; + + // Return OCR text and metadata + Ok(Json(serde_json::json!({ + "document_id": document.id, + "filename": document.filename, + "has_ocr_text": document.ocr_text.is_some(), + "ocr_text": document.ocr_text, + "ocr_confidence": document.ocr_confidence, + "ocr_word_count": document.ocr_word_count, + "ocr_processing_time_ms": document.ocr_processing_time_ms, + "ocr_status": document.ocr_status, + "ocr_error": document.ocr_error, + "ocr_completed_at": document.ocr_completed_at + }))) } \ No newline at end of file