feat(server): upgrade all versions and resolve breaking changes

This commit is contained in:
perf3ct 2025-06-15 02:23:35 +00:00
parent d21e51436b
commit cfc6c85261
8 changed files with 1545 additions and 545 deletions

1766
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -13,16 +13,16 @@ path = "src/bin/test_runner.rs"
[dependencies]
tokio = { version = "1", features = ["full"] }
axum = { version = "0.7", features = ["multipart"] }
tower = { version = "0.4", features = ["util"] }
tower-http = { version = "0.5", features = ["cors", "fs"] }
axum = { version = "0.8", features = ["multipart"] }
tower = { version = "0.5", features = ["util"] }
tower-http = { version = "0.6", features = ["cors", "fs"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
sqlx = { version = "0.7", features = ["runtime-tokio-rustls", "postgres", "sqlite", "chrono", "uuid", "migrate"] }
regex = "1.0"
sqlx = { version = "0.8", features = ["runtime-tokio-rustls", "postgres", "sqlite", "chrono", "uuid", "migrate"] }
regex = "1.11"
uuid = { version = "1", features = ["v4", "serde"] }
chrono = { version = "0.4", features = ["serde"] }
bcrypt = "0.15"
bcrypt = "0.17"
base64ct = "=1.6.0"
jsonwebtoken = "9"
anyhow = "1"
@ -31,24 +31,24 @@ tracing-subscriber = "0.3"
tokio-util = { version = "0.7", features = ["io"] }
futures-util = "0.3"
futures = "0.3"
notify = "6"
notify = "8"
mime_guess = "2"
tesseract = { version = "0.15", optional = true }
pdf-extract = { version = "0.7", optional = true }
image = { version = "0.24", features = ["png", "jpeg", "tiff", "bmp"], optional = true }
imageproc = { version = "0.23", optional = true }
thiserror = "1.0"
sysinfo = "0.30"
pdf-extract = { version = "0.9", optional = true }
image = { version = "0.25", features = ["png", "jpeg", "tiff", "bmp"], optional = true }
imageproc = { version = "0.25", optional = true }
thiserror = "2.0"
sysinfo = "0.35"
raw-cpuid = { version = "11", optional = true }
reqwest = { version = "0.11", features = ["json", "multipart"] }
quick-xml = { version = "0.31", features = ["serialize"] }
reqwest = { version = "0.12", features = ["json", "multipart"] }
quick-xml = { version = "0.37", features = ["serialize"] }
urlencoding = "2.1"
dotenvy = "0.15"
hostname = "0.4"
walkdir = "2"
clap = { version = "4", features = ["derive"] }
utoipa = { version = "4", features = ["axum_extras", "chrono", "uuid"] }
utoipa-swagger-ui = { version = "6", features = ["axum"] }
utoipa = { version = "5", features = ["axum_extras", "chrono", "uuid"] }
utoipa-swagger-ui = { version = "9", features = ["axum"] }
[features]
default = ["ocr"]
@ -56,5 +56,5 @@ ocr = ["tesseract", "pdf-extract", "image", "imageproc", "raw-cpuid"]
[dev-dependencies]
tempfile = "3"
testcontainers = "0.15"
testcontainers-modules = { version = "0.3", features = ["postgres"] }
testcontainers = "0.24"
testcontainers-modules = { version = "0.12", features = ["postgres"] }

View File

@ -505,24 +505,43 @@ const Dashboard: React.FC = () => {
searchablePages: 0,
});
const [loading, setLoading] = useState<boolean>(true);
const [metrics, setMetrics] = useState<any>(null);
useEffect(() => {
const fetchDashboardData = async (): Promise<void> => {
try {
const response = await api.get<Document[]>('/documents');
const docs = response.data || [];
// Fetch both documents and metrics
const [docsResponse, metricsResponse] = await Promise.all([
api.get<Document[]>('/documents'),
api.get<any>('/metrics')
]);
const docs = docsResponse.data || [];
setDocuments(docs);
// Calculate stats
const totalSize = docs.reduce((sum, doc) => sum + (doc.file_size || 0), 0);
const ocrProcessed = docs.filter(doc => doc.ocr_text).length;
const metricsData = metricsResponse.data;
setMetrics(metricsData);
setStats({
totalDocuments: docs.length,
totalSize,
ocrProcessed,
searchablePages: docs.length, // Assuming each doc is searchable
});
// Use backend metrics if available, otherwise fall back to client calculation
if (metricsData?.documents) {
setStats({
totalDocuments: metricsData.documents.total_documents || 0,
totalSize: metricsData.documents.total_storage_bytes || 0,
ocrProcessed: metricsData.documents.documents_with_ocr || 0,
searchablePages: metricsData.documents.documents_with_ocr || 0,
});
} else {
// Fallback to client-side calculation
const totalSize = docs.reduce((sum, doc) => sum + (doc.file_size || 0), 0);
const ocrProcessed = docs.filter(doc => doc.ocr_text).length;
setStats({
totalDocuments: docs.length,
totalSize,
ocrProcessed,
searchablePages: docs.length,
});
}
} catch (error) {
console.error('Failed to fetch dashboard data:', error);
} finally {
@ -596,7 +615,7 @@ const Dashboard: React.FC = () => {
subtitle="Text extracted documents"
icon={OcrIcon}
color="#f59e0b"
trend={`${Math.round((stats.ocrProcessed / Math.max(stats.totalDocuments, 1)) * 100)}% completion`}
trend={stats.totalDocuments > 0 ? `${Math.round((stats.ocrProcessed / stats.totalDocuments) * 100)}% completion` : '0% completion'}
/>
</Grid>
<Grid item xs={12} sm={6} lg={3}>

View File

@ -1,6 +1,5 @@
use anyhow::Result;
use axum::{
async_trait,
extract::FromRequestParts,
http::{request::Parts, HeaderMap, StatusCode},
response::{IntoResponse, Response},
@ -24,7 +23,6 @@ pub struct AuthUser {
pub user: User,
}
#[async_trait]
impl FromRequestParts<Arc<AppState>> for AuthUser {
type Rejection = Response;

View File

@ -136,6 +136,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
.nest("/api/auth", readur::routes::auth::router())
.nest("/api/documents", readur::routes::documents::router())
.nest("/api/metrics", readur::routes::metrics::router())
.nest("/metrics", readur::routes::prometheus_metrics::router())
.nest("/api/notifications", readur::routes::notifications::router())
.nest("/api/queue", readur::routes::queue::router())
.nest("/api/search", readur::routes::search::router())

View File

@ -2,6 +2,7 @@ pub mod auth;
pub mod documents;
pub mod metrics;
pub mod notifications;
pub mod prometheus_metrics;
pub mod queue;
pub mod search;
pub mod settings;

View File

@ -0,0 +1,239 @@
use axum::{
extract::State,
http::{header, StatusCode},
response::{IntoResponse, Response},
routing::get,
Router,
};
use std::sync::Arc;
use std::fmt::Write;
use crate::AppState;
pub fn router() -> Router<Arc<AppState>> {
Router::new()
.route("/", get(get_prometheus_metrics))
}
/// Returns metrics in Prometheus text format (text/plain; version=0.0.4)
pub async fn get_prometheus_metrics(
State(state): State<Arc<AppState>>,
) -> Result<Response, StatusCode> {
let mut output = String::new();
// Get current timestamp
let timestamp = chrono::Utc::now().timestamp_millis();
// Collect all metrics
let (document_metrics, ocr_metrics, user_metrics) = tokio::try_join!(
collect_document_metrics(&state),
collect_ocr_metrics(&state),
collect_user_metrics(&state)
)?;
// Write Prometheus formatted metrics
// Document metrics
writeln!(&mut output, "# HELP readur_documents_total Total number of documents").unwrap();
writeln!(&mut output, "# TYPE readur_documents_total gauge").unwrap();
writeln!(&mut output, "readur_documents_total {} {}", document_metrics.total_documents, timestamp).unwrap();
writeln!(&mut output, "# HELP readur_documents_uploaded_today Documents uploaded today").unwrap();
writeln!(&mut output, "# TYPE readur_documents_uploaded_today gauge").unwrap();
writeln!(&mut output, "readur_documents_uploaded_today {} {}", document_metrics.documents_uploaded_today, timestamp).unwrap();
writeln!(&mut output, "# HELP readur_storage_bytes Total storage used in bytes").unwrap();
writeln!(&mut output, "# TYPE readur_storage_bytes gauge").unwrap();
writeln!(&mut output, "readur_storage_bytes {} {}", document_metrics.total_storage_bytes, timestamp).unwrap();
writeln!(&mut output, "# HELP readur_documents_with_ocr Documents with OCR text").unwrap();
writeln!(&mut output, "# TYPE readur_documents_with_ocr gauge").unwrap();
writeln!(&mut output, "readur_documents_with_ocr {} {}", document_metrics.documents_with_ocr, timestamp).unwrap();
writeln!(&mut output, "# HELP readur_documents_without_ocr Documents without OCR text").unwrap();
writeln!(&mut output, "# TYPE readur_documents_without_ocr gauge").unwrap();
writeln!(&mut output, "readur_documents_without_ocr {} {}", document_metrics.documents_without_ocr, timestamp).unwrap();
// OCR metrics
writeln!(&mut output, "# HELP readur_ocr_queue_pending OCR jobs pending").unwrap();
writeln!(&mut output, "# TYPE readur_ocr_queue_pending gauge").unwrap();
writeln!(&mut output, "readur_ocr_queue_pending {} {}", ocr_metrics.pending_jobs, timestamp).unwrap();
writeln!(&mut output, "# HELP readur_ocr_queue_processing OCR jobs currently processing").unwrap();
writeln!(&mut output, "# TYPE readur_ocr_queue_processing gauge").unwrap();
writeln!(&mut output, "readur_ocr_queue_processing {} {}", ocr_metrics.processing_jobs, timestamp).unwrap();
writeln!(&mut output, "# HELP readur_ocr_queue_failed OCR jobs failed").unwrap();
writeln!(&mut output, "# TYPE readur_ocr_queue_failed gauge").unwrap();
writeln!(&mut output, "readur_ocr_queue_failed {} {}", ocr_metrics.failed_jobs, timestamp).unwrap();
writeln!(&mut output, "# HELP readur_ocr_completed_today OCR jobs completed today").unwrap();
writeln!(&mut output, "# TYPE readur_ocr_completed_today gauge").unwrap();
writeln!(&mut output, "readur_ocr_completed_today {} {}", ocr_metrics.completed_today, timestamp).unwrap();
if let Some(avg_time) = ocr_metrics.avg_processing_time_minutes {
writeln!(&mut output, "# HELP readur_ocr_avg_processing_minutes Average OCR processing time in minutes").unwrap();
writeln!(&mut output, "# TYPE readur_ocr_avg_processing_minutes gauge").unwrap();
writeln!(&mut output, "readur_ocr_avg_processing_minutes {} {}", avg_time, timestamp).unwrap();
}
// User metrics
writeln!(&mut output, "# HELP readur_users_total Total number of users").unwrap();
writeln!(&mut output, "# TYPE readur_users_total gauge").unwrap();
writeln!(&mut output, "readur_users_total {} {}", user_metrics.total_users, timestamp).unwrap();
writeln!(&mut output, "# HELP readur_users_active_today Active users today").unwrap();
writeln!(&mut output, "# TYPE readur_users_active_today gauge").unwrap();
writeln!(&mut output, "readur_users_active_today {} {}", user_metrics.active_users_today, timestamp).unwrap();
writeln!(&mut output, "# HELP readur_users_registered_today New user registrations today").unwrap();
writeln!(&mut output, "# TYPE readur_users_registered_today gauge").unwrap();
writeln!(&mut output, "readur_users_registered_today {} {}", user_metrics.new_registrations_today, timestamp).unwrap();
// Return the metrics with the correct content type
Ok((
[(header::CONTENT_TYPE, "text/plain; version=0.0.4")],
output,
).into_response())
}
// Reuse the same metric collection structs from the JSON endpoint
struct DocumentMetrics {
total_documents: i64,
documents_uploaded_today: i64,
total_storage_bytes: i64,
documents_with_ocr: i64,
documents_without_ocr: i64,
}
struct OcrMetrics {
pending_jobs: i64,
processing_jobs: i64,
failed_jobs: i64,
completed_today: i64,
avg_processing_time_minutes: Option<f64>,
}
struct UserMetrics {
total_users: i64,
active_users_today: i64,
new_registrations_today: i64,
}
async fn collect_document_metrics(state: &Arc<AppState>) -> Result<DocumentMetrics, StatusCode> {
// Get total document count
let total_docs = sqlx::query_scalar::<_, i64>("SELECT COUNT(*) FROM documents")
.fetch_one(&state.db.pool)
.await
.map_err(|e| {
tracing::error!("Failed to get total document count: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
// Get documents uploaded today
let docs_today = sqlx::query_scalar::<_, i64>(
"SELECT COUNT(*) FROM documents WHERE DATE(created_at) = CURRENT_DATE"
)
.fetch_one(&state.db.pool)
.await
.map_err(|e| {
tracing::error!("Failed to get today's document count: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
// Get total storage size
let total_size = sqlx::query_scalar::<_, Option<i64>>("SELECT SUM(file_size) FROM documents")
.fetch_one(&state.db.pool)
.await
.map_err(|e| {
tracing::error!("Failed to get total storage size: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?
.unwrap_or(0);
// Get documents with and without OCR
let docs_with_ocr = sqlx::query_scalar::<_, i64>(
"SELECT COUNT(*) FROM documents WHERE has_ocr_text = true"
)
.fetch_one(&state.db.pool)
.await
.map_err(|e| {
tracing::error!("Failed to get OCR document count: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
let docs_without_ocr = total_docs - docs_with_ocr;
Ok(DocumentMetrics {
total_documents: total_docs,
documents_uploaded_today: docs_today,
total_storage_bytes: total_size,
documents_with_ocr: docs_with_ocr,
documents_without_ocr: docs_without_ocr,
})
}
async fn collect_ocr_metrics(state: &Arc<AppState>) -> Result<OcrMetrics, StatusCode> {
use crate::ocr_queue::OcrQueueService;
let queue_service = OcrQueueService::new(
state.db.clone(),
state.db.pool.clone(),
state.config.concurrent_ocr_jobs
);
let stats = queue_service
.get_stats()
.await
.map_err(|e| {
tracing::error!("Failed to get OCR stats: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
Ok(OcrMetrics {
pending_jobs: stats.pending_count,
processing_jobs: stats.processing_count,
failed_jobs: stats.failed_count,
completed_today: stats.completed_today,
avg_processing_time_minutes: stats.avg_wait_time_minutes,
})
}
async fn collect_user_metrics(state: &Arc<AppState>) -> Result<UserMetrics, StatusCode> {
// Get total user count
let total_users = sqlx::query_scalar::<_, i64>("SELECT COUNT(*) FROM users")
.fetch_one(&state.db.pool)
.await
.map_err(|e| {
tracing::error!("Failed to get total user count: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
// Get new users today
let new_users_today = sqlx::query_scalar::<_, i64>(
"SELECT COUNT(*) FROM users WHERE DATE(created_at) = CURRENT_DATE"
)
.fetch_one(&state.db.pool)
.await
.map_err(|e| {
tracing::error!("Failed to get new user count: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
// For active users, count users who uploaded documents today
let active_users_today = sqlx::query_scalar::<_, i64>(
"SELECT COUNT(DISTINCT user_id) FROM documents WHERE DATE(created_at) = CURRENT_DATE"
)
.fetch_one(&state.db.pool)
.await
.map_err(|e| {
tracing::error!("Failed to get active user count: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
Ok(UserMetrics {
total_users,
active_users_today,
new_registrations_today: new_users_today,
})
}

View File

@ -19,7 +19,7 @@ struct PropFindResponse {
pub fn parse_propfind_response(xml_text: &str) -> Result<Vec<FileInfo>> {
let mut reader = Reader::from_str(xml_text);
reader.trim_text(true);
reader.config_mut().trim_text(true);
let mut files = Vec::new();
let mut current_response: Option<PropFindResponse> = None;