diff --git a/.github/workflows/test-e2e.yml b/.github/workflows/test-e2e.yml index a53e4f8..5231fd3 100644 --- a/.github/workflows/test-e2e.yml +++ b/.github/workflows/test-e2e.yml @@ -42,6 +42,11 @@ jobs: sudo apt-get install -y \ tesseract-ocr \ tesseract-ocr-eng \ + tesseract-ocr-spa \ + tesseract-ocr-fra \ + tesseract-ocr-deu \ + tesseract-ocr-ita \ + tesseract-ocr-por \ libtesseract-dev \ libleptonica-dev \ pkg-config \ diff --git a/.github/workflows/test-integration.yml b/.github/workflows/test-integration.yml index 3c1bbab..83bcec9 100644 --- a/.github/workflows/test-integration.yml +++ b/.github/workflows/test-integration.yml @@ -50,6 +50,11 @@ jobs: sudo apt-get install -y \ tesseract-ocr \ tesseract-ocr-eng \ + tesseract-ocr-spa \ + tesseract-ocr-fra \ + tesseract-ocr-deu \ + tesseract-ocr-ita \ + tesseract-ocr-por \ libtesseract-dev \ libleptonica-dev \ pkg-config \ diff --git a/.github/workflows/test-unit.yml b/.github/workflows/test-unit.yml index d2c7c07..1ed8d92 100644 --- a/.github/workflows/test-unit.yml +++ b/.github/workflows/test-unit.yml @@ -27,6 +27,11 @@ jobs: sudo apt-get install -y \ tesseract-ocr \ tesseract-ocr-eng \ + tesseract-ocr-spa \ + tesseract-ocr-fra \ + tesseract-ocr-deu \ + tesseract-ocr-ita \ + tesseract-ocr-por \ libtesseract-dev \ libleptonica-dev \ pkg-config \ diff --git a/src/routes/documents/ocr.rs b/src/routes/documents/ocr.rs index 7cc62d0..7a0b8bc 100644 --- a/src/routes/documents/ocr.rs +++ b/src/routes/documents/ocr.rs @@ -1,7 +1,7 @@ use axum::{ - extract::{Path, State}, + extract::{Json, Path, State}, http::StatusCode, - response::Json, + response::Json as ResponseJson, }; use std::sync::Arc; use tracing::{debug, error, info, warn}; @@ -34,7 +34,7 @@ pub async fn get_document_ocr( State(state): State>, auth_user: AuthUser, Path(document_id): Path, -) -> Result, StatusCode> { +) -> Result, StatusCode> { let document = state .db .get_document_by_id(document_id, auth_user.user.id, auth_user.user.role) @@ -57,7 +57,7 @@ pub async fn get_document_ocr( pages_processed: None, // This would need to be stored separately if needed }; - Ok(Json(response)) + Ok(ResponseJson(response)) } /// Retry OCR processing for a document @@ -85,7 +85,7 @@ pub async fn retry_ocr( auth_user: AuthUser, Path(document_id): Path, Json(request): Json, -) -> Result, StatusCode> { +) -> Result, StatusCode> { debug!("OCR retry request for document {} by user {}", document_id, auth_user.user.id); debug!("Request data: language={:?}, languages={:?}", request.language, request.languages); // Get document first to check if it exists and user has access @@ -102,7 +102,7 @@ pub async fn retry_ocr( // Check if OCR is already in progress if let Some(ref status) = document.ocr_status { if status == "processing" { - return Ok(Json(serde_json::json!({ + return Ok(ResponseJson(serde_json::json!({ "success": false, "message": "OCR is already in progress for this document" }))); @@ -156,7 +156,7 @@ pub async fn retry_ocr( match state.queue_service.enqueue_document(document.id, 5, document.file_size).await { Ok(_) => { info!("Document {} queued for OCR retry", document_id); - Ok(Json(serde_json::json!({ + Ok(ResponseJson(serde_json::json!({ "success": true, "message": "Document queued for OCR processing" }))) @@ -173,7 +173,7 @@ pub async fn get_ocr_status_batch( State(state): State>, auth_user: AuthUser, Json(document_ids): Json>, -) -> Result, StatusCode> { +) -> Result, StatusCode> { if document_ids.len() > 100 { return Err(StatusCode::BAD_REQUEST); } @@ -211,7 +211,7 @@ pub async fn get_ocr_status_batch( } } - Ok(Json(serde_json::json!({ + Ok(ResponseJson(serde_json::json!({ "results": results }))) } @@ -221,7 +221,7 @@ pub async fn cancel_ocr( State(state): State>, auth_user: AuthUser, Path(document_id): Path, -) -> Result, StatusCode> { +) -> Result, StatusCode> { // Verify user has access to the document let _document = state .db @@ -235,7 +235,7 @@ pub async fn cancel_ocr( // Note: OCR queue removal not implemented in current queue service info!("Stop OCR processing requested for document {}", document_id); - Ok(Json(serde_json::json!({ + Ok(ResponseJson(serde_json::json!({ "success": true, "message": "OCR processing stop requested" }))) @@ -245,7 +245,7 @@ pub async fn cancel_ocr( pub async fn get_ocr_stats( State(state): State>, auth_user: AuthUser, -) -> Result, StatusCode> { +) -> Result, StatusCode> { let (total, pending, completed, failed) = state .db .count_documents_by_ocr_status(auth_user.user.id, auth_user.user.role) @@ -265,7 +265,7 @@ pub async fn get_ocr_stats( StatusCode::INTERNAL_SERVER_ERROR })?; - Ok(Json(serde_json::json!({ + Ok(ResponseJson(serde_json::json!({ "total_documents": total, "pending_ocr": pending, "completed_ocr": completed, @@ -282,7 +282,7 @@ pub async fn update_ocr_settings( auth_user: AuthUser, Path(document_id): Path, Json(settings): Json, -) -> Result, StatusCode> { +) -> Result, StatusCode> { // Verify user has access to the document let _document = state .db @@ -297,7 +297,7 @@ pub async fn update_ocr_settings( // For now, just return success - OCR settings would be stored in metadata debug!("OCR settings updated for document {}: {:?}", document_id, settings); - Ok(Json(serde_json::json!({ + Ok(ResponseJson(serde_json::json!({ "success": true, "message": "OCR settings updated" })))