fix(ci): add missing languages to ci

This commit is contained in:
perf3ct 2025-07-21 19:50:06 +00:00
parent 6fcae1e752
commit 95e5186b78
4 changed files with 30 additions and 15 deletions

View File

@ -42,6 +42,11 @@ jobs:
sudo apt-get install -y \ sudo apt-get install -y \
tesseract-ocr \ tesseract-ocr \
tesseract-ocr-eng \ tesseract-ocr-eng \
tesseract-ocr-spa \
tesseract-ocr-fra \
tesseract-ocr-deu \
tesseract-ocr-ita \
tesseract-ocr-por \
libtesseract-dev \ libtesseract-dev \
libleptonica-dev \ libleptonica-dev \
pkg-config \ pkg-config \

View File

@ -50,6 +50,11 @@ jobs:
sudo apt-get install -y \ sudo apt-get install -y \
tesseract-ocr \ tesseract-ocr \
tesseract-ocr-eng \ tesseract-ocr-eng \
tesseract-ocr-spa \
tesseract-ocr-fra \
tesseract-ocr-deu \
tesseract-ocr-ita \
tesseract-ocr-por \
libtesseract-dev \ libtesseract-dev \
libleptonica-dev \ libleptonica-dev \
pkg-config \ pkg-config \

View File

@ -27,6 +27,11 @@ jobs:
sudo apt-get install -y \ sudo apt-get install -y \
tesseract-ocr \ tesseract-ocr \
tesseract-ocr-eng \ tesseract-ocr-eng \
tesseract-ocr-spa \
tesseract-ocr-fra \
tesseract-ocr-deu \
tesseract-ocr-ita \
tesseract-ocr-por \
libtesseract-dev \ libtesseract-dev \
libleptonica-dev \ libleptonica-dev \
pkg-config \ pkg-config \

View File

@ -1,7 +1,7 @@
use axum::{ use axum::{
extract::{Path, State}, extract::{Json, Path, State},
http::StatusCode, http::StatusCode,
response::Json, response::Json as ResponseJson,
}; };
use std::sync::Arc; use std::sync::Arc;
use tracing::{debug, error, info, warn}; use tracing::{debug, error, info, warn};
@ -34,7 +34,7 @@ pub async fn get_document_ocr(
State(state): State<Arc<AppState>>, State(state): State<Arc<AppState>>,
auth_user: AuthUser, auth_user: AuthUser,
Path(document_id): Path<uuid::Uuid>, Path(document_id): Path<uuid::Uuid>,
) -> Result<Json<DocumentOcrResponse>, StatusCode> { ) -> Result<ResponseJson<DocumentOcrResponse>, StatusCode> {
let document = state let document = state
.db .db
.get_document_by_id(document_id, auth_user.user.id, auth_user.user.role) .get_document_by_id(document_id, auth_user.user.id, auth_user.user.role)
@ -57,7 +57,7 @@ pub async fn get_document_ocr(
pages_processed: None, // This would need to be stored separately if needed pages_processed: None, // This would need to be stored separately if needed
}; };
Ok(Json(response)) Ok(ResponseJson(response))
} }
/// Retry OCR processing for a document /// Retry OCR processing for a document
@ -85,7 +85,7 @@ pub async fn retry_ocr(
auth_user: AuthUser, auth_user: AuthUser,
Path(document_id): Path<uuid::Uuid>, Path(document_id): Path<uuid::Uuid>,
Json(request): Json<super::types::RetryOcrRequest>, Json(request): Json<super::types::RetryOcrRequest>,
) -> Result<Json<serde_json::Value>, StatusCode> { ) -> Result<ResponseJson<serde_json::Value>, StatusCode> {
debug!("OCR retry request for document {} by user {}", document_id, auth_user.user.id); debug!("OCR retry request for document {} by user {}", document_id, auth_user.user.id);
debug!("Request data: language={:?}, languages={:?}", request.language, request.languages); debug!("Request data: language={:?}, languages={:?}", request.language, request.languages);
// Get document first to check if it exists and user has access // Get document first to check if it exists and user has access
@ -102,7 +102,7 @@ pub async fn retry_ocr(
// Check if OCR is already in progress // Check if OCR is already in progress
if let Some(ref status) = document.ocr_status { if let Some(ref status) = document.ocr_status {
if status == "processing" { if status == "processing" {
return Ok(Json(serde_json::json!({ return Ok(ResponseJson(serde_json::json!({
"success": false, "success": false,
"message": "OCR is already in progress for this document" "message": "OCR is already in progress for this document"
}))); })));
@ -156,7 +156,7 @@ pub async fn retry_ocr(
match state.queue_service.enqueue_document(document.id, 5, document.file_size).await { match state.queue_service.enqueue_document(document.id, 5, document.file_size).await {
Ok(_) => { Ok(_) => {
info!("Document {} queued for OCR retry", document_id); info!("Document {} queued for OCR retry", document_id);
Ok(Json(serde_json::json!({ Ok(ResponseJson(serde_json::json!({
"success": true, "success": true,
"message": "Document queued for OCR processing" "message": "Document queued for OCR processing"
}))) })))
@ -173,7 +173,7 @@ pub async fn get_ocr_status_batch(
State(state): State<Arc<AppState>>, State(state): State<Arc<AppState>>,
auth_user: AuthUser, auth_user: AuthUser,
Json(document_ids): Json<Vec<uuid::Uuid>>, Json(document_ids): Json<Vec<uuid::Uuid>>,
) -> Result<Json<serde_json::Value>, StatusCode> { ) -> Result<ResponseJson<serde_json::Value>, StatusCode> {
if document_ids.len() > 100 { if document_ids.len() > 100 {
return Err(StatusCode::BAD_REQUEST); return Err(StatusCode::BAD_REQUEST);
} }
@ -211,7 +211,7 @@ pub async fn get_ocr_status_batch(
} }
} }
Ok(Json(serde_json::json!({ Ok(ResponseJson(serde_json::json!({
"results": results "results": results
}))) })))
} }
@ -221,7 +221,7 @@ pub async fn cancel_ocr(
State(state): State<Arc<AppState>>, State(state): State<Arc<AppState>>,
auth_user: AuthUser, auth_user: AuthUser,
Path(document_id): Path<uuid::Uuid>, Path(document_id): Path<uuid::Uuid>,
) -> Result<Json<serde_json::Value>, StatusCode> { ) -> Result<ResponseJson<serde_json::Value>, StatusCode> {
// Verify user has access to the document // Verify user has access to the document
let _document = state let _document = state
.db .db
@ -235,7 +235,7 @@ pub async fn cancel_ocr(
// Note: OCR queue removal not implemented in current queue service // Note: OCR queue removal not implemented in current queue service
info!("Stop OCR processing requested for document {}", document_id); info!("Stop OCR processing requested for document {}", document_id);
Ok(Json(serde_json::json!({ Ok(ResponseJson(serde_json::json!({
"success": true, "success": true,
"message": "OCR processing stop requested" "message": "OCR processing stop requested"
}))) })))
@ -245,7 +245,7 @@ pub async fn cancel_ocr(
pub async fn get_ocr_stats( pub async fn get_ocr_stats(
State(state): State<Arc<AppState>>, State(state): State<Arc<AppState>>,
auth_user: AuthUser, auth_user: AuthUser,
) -> Result<Json<serde_json::Value>, StatusCode> { ) -> Result<ResponseJson<serde_json::Value>, StatusCode> {
let (total, pending, completed, failed) = state let (total, pending, completed, failed) = state
.db .db
.count_documents_by_ocr_status(auth_user.user.id, auth_user.user.role) .count_documents_by_ocr_status(auth_user.user.id, auth_user.user.role)
@ -265,7 +265,7 @@ pub async fn get_ocr_stats(
StatusCode::INTERNAL_SERVER_ERROR StatusCode::INTERNAL_SERVER_ERROR
})?; })?;
Ok(Json(serde_json::json!({ Ok(ResponseJson(serde_json::json!({
"total_documents": total, "total_documents": total,
"pending_ocr": pending, "pending_ocr": pending,
"completed_ocr": completed, "completed_ocr": completed,
@ -282,7 +282,7 @@ pub async fn update_ocr_settings(
auth_user: AuthUser, auth_user: AuthUser,
Path(document_id): Path<uuid::Uuid>, Path(document_id): Path<uuid::Uuid>,
Json(settings): Json<serde_json::Value>, Json(settings): Json<serde_json::Value>,
) -> Result<Json<serde_json::Value>, StatusCode> { ) -> Result<ResponseJson<serde_json::Value>, StatusCode> {
// Verify user has access to the document // Verify user has access to the document
let _document = state let _document = state
.db .db
@ -297,7 +297,7 @@ pub async fn update_ocr_settings(
// For now, just return success - OCR settings would be stored in metadata // For now, just return success - OCR settings would be stored in metadata
debug!("OCR settings updated for document {}: {:?}", document_id, settings); debug!("OCR settings updated for document {}: {:?}", document_id, settings);
Ok(Json(serde_json::json!({ Ok(ResponseJson(serde_json::json!({
"success": true, "success": true,
"message": "OCR settings updated" "message": "OCR settings updated"
}))) })))