363 lines
12 KiB
Rust
363 lines
12 KiB
Rust
use axum::{
|
|
extract::{Multipart, Path, Query, State},
|
|
http::{StatusCode, header::CONTENT_TYPE},
|
|
response::{Json, Response},
|
|
routing::{get, post},
|
|
Router,
|
|
};
|
|
use serde::Deserialize;
|
|
use std::sync::Arc;
|
|
use utoipa::ToSchema;
|
|
|
|
use crate::{
|
|
auth::AuthUser,
|
|
file_service::FileService,
|
|
models::DocumentResponse,
|
|
ocr_queue::OcrQueueService,
|
|
AppState,
|
|
};
|
|
|
|
#[derive(Deserialize, ToSchema)]
|
|
struct PaginationQuery {
|
|
limit: Option<i64>,
|
|
offset: Option<i64>,
|
|
}
|
|
|
|
pub fn router() -> Router<Arc<AppState>> {
|
|
Router::new()
|
|
.route("/", post(upload_document))
|
|
.route("/", get(list_documents))
|
|
.route("/{id}/download", get(download_document))
|
|
.route("/{id}/view", get(view_document))
|
|
.route("/{id}/thumbnail", get(get_document_thumbnail))
|
|
.route("/{id}/ocr", get(get_document_ocr))
|
|
}
|
|
|
|
#[utoipa::path(
|
|
post,
|
|
path = "/api/documents",
|
|
tag = "documents",
|
|
security(
|
|
("bearer_auth" = [])
|
|
),
|
|
request_body(content = String, description = "Multipart form data with file. Supported formats: PDF, PNG, JPG, JPEG, TIFF, BMP, TXT. OCR will be automatically performed on image and PDF files.", content_type = "multipart/form-data"),
|
|
responses(
|
|
(status = 200, description = "Document uploaded successfully. OCR processing will begin automatically if enabled in user settings.", body = DocumentResponse),
|
|
(status = 400, description = "Bad request - invalid file type or malformed data"),
|
|
(status = 413, description = "Payload too large - file exceeds size limit"),
|
|
(status = 401, description = "Unauthorized - valid authentication required")
|
|
)
|
|
)]
|
|
async fn upload_document(
|
|
State(state): State<Arc<AppState>>,
|
|
auth_user: AuthUser,
|
|
mut multipart: Multipart,
|
|
) -> Result<Json<DocumentResponse>, StatusCode> {
|
|
let file_service = FileService::new(state.config.upload_path.clone());
|
|
|
|
// Get user settings for file upload restrictions
|
|
let settings = state
|
|
.db
|
|
.get_user_settings(auth_user.user.id)
|
|
.await
|
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
|
|
.unwrap_or_else(|| crate::models::Settings::default());
|
|
|
|
while let Some(field) = multipart.next_field().await.map_err(|_| StatusCode::BAD_REQUEST)? {
|
|
let name = field.name().unwrap_or("").to_string();
|
|
|
|
if name == "file" {
|
|
let filename = field
|
|
.file_name()
|
|
.ok_or(StatusCode::BAD_REQUEST)?
|
|
.to_string();
|
|
|
|
if !file_service.is_allowed_file_type(&filename, &settings.allowed_file_types) {
|
|
return Err(StatusCode::BAD_REQUEST);
|
|
}
|
|
|
|
let data = field.bytes().await.map_err(|_| StatusCode::BAD_REQUEST)?;
|
|
let file_size = data.len() as i64;
|
|
|
|
// Check file size limit
|
|
let max_size_bytes = (settings.max_file_size_mb as i64) * 1024 * 1024;
|
|
if file_size > max_size_bytes {
|
|
return Err(StatusCode::PAYLOAD_TOO_LARGE);
|
|
}
|
|
|
|
let mime_type = mime_guess::from_path(&filename)
|
|
.first_or_octet_stream()
|
|
.to_string();
|
|
|
|
let file_path = file_service
|
|
.save_file(&filename, &data)
|
|
.await
|
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
|
|
|
let document = file_service.create_document(
|
|
&filename,
|
|
&filename,
|
|
&file_path,
|
|
file_size,
|
|
&mime_type,
|
|
auth_user.user.id,
|
|
);
|
|
|
|
let saved_document = state
|
|
.db
|
|
.create_document(document)
|
|
.await
|
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
|
|
|
let document_id = saved_document.id;
|
|
let enable_background_ocr = settings.enable_background_ocr;
|
|
|
|
if enable_background_ocr {
|
|
let queue_service = OcrQueueService::new(state.db.clone(), state.db.pool.clone(), 1);
|
|
|
|
// Calculate priority based on file size
|
|
let priority = match file_size {
|
|
0..=1048576 => 10, // <= 1MB: highest priority
|
|
..=5242880 => 8, // 1-5MB: high priority
|
|
..=10485760 => 6, // 5-10MB: medium priority
|
|
..=52428800 => 4, // 10-50MB: low priority
|
|
_ => 2, // > 50MB: lowest priority
|
|
};
|
|
|
|
queue_service.enqueue_document(document_id, priority, file_size).await
|
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
|
}
|
|
|
|
return Ok(Json(saved_document.into()));
|
|
}
|
|
}
|
|
|
|
Err(StatusCode::BAD_REQUEST)
|
|
}
|
|
|
|
#[utoipa::path(
|
|
get,
|
|
path = "/api/documents",
|
|
tag = "documents",
|
|
security(
|
|
("bearer_auth" = [])
|
|
),
|
|
params(
|
|
("limit" = Option<i64>, Query, description = "Number of documents to return (default: 50)"),
|
|
("offset" = Option<i64>, Query, description = "Number of documents to skip (default: 0)")
|
|
),
|
|
responses(
|
|
(status = 200, description = "List of user documents", body = Vec<DocumentResponse>),
|
|
(status = 401, description = "Unauthorized")
|
|
)
|
|
)]
|
|
async fn list_documents(
|
|
State(state): State<Arc<AppState>>,
|
|
auth_user: AuthUser,
|
|
Query(pagination): Query<PaginationQuery>,
|
|
) -> Result<Json<Vec<DocumentResponse>>, StatusCode> {
|
|
let limit = pagination.limit.unwrap_or(50);
|
|
let offset = pagination.offset.unwrap_or(0);
|
|
|
|
let documents = state
|
|
.db
|
|
.get_documents_by_user_with_role(auth_user.user.id, auth_user.user.role, limit, offset)
|
|
.await
|
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
|
|
|
let response: Vec<DocumentResponse> = documents.into_iter().map(|doc| doc.into()).collect();
|
|
|
|
Ok(Json(response))
|
|
}
|
|
|
|
#[utoipa::path(
|
|
get,
|
|
path = "/api/documents/{id}/download",
|
|
tag = "documents",
|
|
security(
|
|
("bearer_auth" = [])
|
|
),
|
|
params(
|
|
("id" = uuid::Uuid, Path, description = "Document ID")
|
|
),
|
|
responses(
|
|
(status = 200, description = "Document file content", content_type = "application/octet-stream"),
|
|
(status = 404, description = "Document not found"),
|
|
(status = 401, description = "Unauthorized")
|
|
)
|
|
)]
|
|
async fn download_document(
|
|
State(state): State<Arc<AppState>>,
|
|
auth_user: AuthUser,
|
|
Path(document_id): Path<uuid::Uuid>,
|
|
) -> Result<Vec<u8>, StatusCode> {
|
|
let documents = state
|
|
.db
|
|
.get_documents_by_user_with_role(auth_user.user.id, auth_user.user.role, 1000, 0)
|
|
.await
|
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
|
|
|
let document = documents
|
|
.into_iter()
|
|
.find(|doc| doc.id == document_id)
|
|
.ok_or(StatusCode::NOT_FOUND)?;
|
|
|
|
let file_service = FileService::new(state.config.upload_path.clone());
|
|
let file_data = file_service
|
|
.read_file(&document.file_path)
|
|
.await
|
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
|
|
|
Ok(file_data)
|
|
}
|
|
|
|
#[utoipa::path(
|
|
get,
|
|
path = "/api/documents/{id}/view",
|
|
tag = "documents",
|
|
security(
|
|
("bearer_auth" = [])
|
|
),
|
|
params(
|
|
("id" = uuid::Uuid, Path, description = "Document ID")
|
|
),
|
|
responses(
|
|
(status = 200, description = "Document content for viewing in browser"),
|
|
(status = 404, description = "Document not found"),
|
|
(status = 401, description = "Unauthorized")
|
|
)
|
|
)]
|
|
async fn view_document(
|
|
State(state): State<Arc<AppState>>,
|
|
auth_user: AuthUser,
|
|
Path(document_id): Path<uuid::Uuid>,
|
|
) -> Result<Response, StatusCode> {
|
|
let documents = state
|
|
.db
|
|
.get_documents_by_user_with_role(auth_user.user.id, auth_user.user.role, 1000, 0)
|
|
.await
|
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
|
|
|
let document = documents
|
|
.into_iter()
|
|
.find(|doc| doc.id == document_id)
|
|
.ok_or(StatusCode::NOT_FOUND)?;
|
|
|
|
let file_service = FileService::new(state.config.upload_path.clone());
|
|
let file_data = file_service
|
|
.read_file(&document.file_path)
|
|
.await
|
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
|
|
|
// Determine content type from file extension
|
|
let content_type = mime_guess::from_path(&document.filename)
|
|
.first_or_octet_stream()
|
|
.to_string();
|
|
|
|
let response = Response::builder()
|
|
.header(CONTENT_TYPE, content_type)
|
|
.header("Content-Length", file_data.len())
|
|
.body(file_data.into())
|
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
|
|
|
Ok(response)
|
|
}
|
|
|
|
#[utoipa::path(
|
|
get,
|
|
path = "/api/documents/{id}/thumbnail",
|
|
tag = "documents",
|
|
security(
|
|
("bearer_auth" = [])
|
|
),
|
|
params(
|
|
("id" = uuid::Uuid, Path, description = "Document ID")
|
|
),
|
|
responses(
|
|
(status = 200, description = "Document thumbnail image", content_type = "image/jpeg"),
|
|
(status = 404, description = "Document not found or thumbnail not available"),
|
|
(status = 401, description = "Unauthorized")
|
|
)
|
|
)]
|
|
async fn get_document_thumbnail(
|
|
State(state): State<Arc<AppState>>,
|
|
auth_user: AuthUser,
|
|
Path(document_id): Path<uuid::Uuid>,
|
|
) -> Result<Response, StatusCode> {
|
|
let documents = state
|
|
.db
|
|
.get_documents_by_user_with_role(auth_user.user.id, auth_user.user.role, 1000, 0)
|
|
.await
|
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
|
|
|
let document = documents
|
|
.into_iter()
|
|
.find(|doc| doc.id == document_id)
|
|
.ok_or(StatusCode::NOT_FOUND)?;
|
|
|
|
let file_service = FileService::new(state.config.upload_path.clone());
|
|
|
|
// Try to generate or get cached thumbnail
|
|
match file_service.get_or_generate_thumbnail(&document.file_path, &document.filename).await {
|
|
Ok(thumbnail_data) => {
|
|
Ok(Response::builder()
|
|
.header(CONTENT_TYPE, "image/jpeg")
|
|
.header("Content-Length", thumbnail_data.len())
|
|
.header("Cache-Control", "public, max-age=3600") // Cache for 1 hour
|
|
.body(thumbnail_data.into())
|
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?)
|
|
}
|
|
Err(_) => {
|
|
// Return a placeholder thumbnail or 404
|
|
Err(StatusCode::NOT_FOUND)
|
|
}
|
|
}
|
|
}
|
|
|
|
#[utoipa::path(
|
|
get,
|
|
path = "/api/documents/{id}/ocr",
|
|
tag = "documents",
|
|
security(
|
|
("bearer_auth" = [])
|
|
),
|
|
params(
|
|
("id" = uuid::Uuid, Path, description = "Document ID")
|
|
),
|
|
responses(
|
|
(status = 200, description = "OCR extracted text and metadata", body = String),
|
|
(status = 404, description = "Document not found"),
|
|
(status = 401, description = "Unauthorized"),
|
|
(status = 500, description = "Internal server error")
|
|
)
|
|
)]
|
|
async fn get_document_ocr(
|
|
State(state): State<Arc<AppState>>,
|
|
auth_user: AuthUser,
|
|
Path(document_id): Path<uuid::Uuid>,
|
|
) -> Result<Json<serde_json::Value>, StatusCode> {
|
|
let documents = state
|
|
.db
|
|
.get_documents_by_user_with_role(auth_user.user.id, auth_user.user.role, 1000, 0)
|
|
.await
|
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
|
|
|
let document = documents
|
|
.into_iter()
|
|
.find(|doc| doc.id == document_id)
|
|
.ok_or(StatusCode::NOT_FOUND)?;
|
|
|
|
// Return OCR text and metadata
|
|
Ok(Json(serde_json::json!({
|
|
"document_id": document.id,
|
|
"filename": document.filename,
|
|
"has_ocr_text": document.ocr_text.is_some(),
|
|
"ocr_text": document.ocr_text,
|
|
"ocr_confidence": document.ocr_confidence,
|
|
"ocr_word_count": document.ocr_word_count,
|
|
"ocr_processing_time_ms": document.ocr_processing_time_ms,
|
|
"ocr_status": document.ocr_status,
|
|
"ocr_error": document.ocr_error,
|
|
"ocr_completed_at": document.ocr_completed_at
|
|
})))
|
|
} |