fix(tests): fix the crazy metrics collection issue

This commit is contained in:
perf3ct 2025-07-08 16:52:23 +00:00
parent 58b8a71404
commit 03555ed756
2 changed files with 72 additions and 2 deletions

View File

@ -0,0 +1,42 @@
-- Fix the get_ocr_queue_stats function to ensure it matches the expected structure
-- This migration ensures the function correctly gets completed_today from documents table
-- and handles the case where migration 20250620100019 may have failed silently
CREATE OR REPLACE FUNCTION get_ocr_queue_stats()
RETURNS TABLE (
pending_count BIGINT,
processing_count BIGINT,
failed_count BIGINT,
completed_today BIGINT,
avg_wait_time_minutes DOUBLE PRECISION,
oldest_pending_minutes DOUBLE PRECISION
) AS $$
BEGIN
RETURN QUERY
WITH queue_stats AS (
SELECT
COUNT(*) FILTER (WHERE status = 'pending') as pending_count,
COUNT(*) FILTER (WHERE status = 'processing') as processing_count,
COUNT(*) FILTER (WHERE status = 'failed' AND attempts >= max_attempts) as failed_count,
CAST(AVG(EXTRACT(EPOCH FROM (COALESCE(started_at, NOW()) - created_at))/60) FILTER (WHERE status IN ('processing', 'completed')) AS DOUBLE PRECISION) as avg_wait_time_minutes,
CAST(MAX(EXTRACT(EPOCH FROM (NOW() - created_at))/60) FILTER (WHERE status = 'pending') AS DOUBLE PRECISION) as oldest_pending_minutes
FROM ocr_queue
),
document_stats AS (
-- Count documents that completed OCR today (looking at documents table where actual completion is tracked)
SELECT COUNT(*) as completed_today
FROM documents
WHERE ocr_status = 'completed'
AND updated_at >= CURRENT_DATE
AND updated_at < CURRENT_DATE + INTERVAL '1 day'
)
SELECT
queue_stats.pending_count,
queue_stats.processing_count,
queue_stats.failed_count,
document_stats.completed_today,
queue_stats.avg_wait_time_minutes,
queue_stats.oldest_pending_minutes
FROM queue_stats, document_stats;
END;
$$ LANGUAGE plpgsql;

View File

@ -795,6 +795,31 @@ impl OcrQueueService {
pub async fn get_stats(&self) -> Result<QueueStats> { pub async fn get_stats(&self) -> Result<QueueStats> {
tracing::debug!("OCR Queue: Starting get_stats() call"); tracing::debug!("OCR Queue: Starting get_stats() call");
// First, let's check if the function exists and what it returns
let function_exists = sqlx::query_scalar::<_, bool>(
r#"
SELECT EXISTS (
SELECT 1 FROM pg_proc p
JOIN pg_namespace n ON p.pronamespace = n.oid
WHERE n.nspname = 'public' AND p.proname = 'get_ocr_queue_stats'
)
"#
)
.fetch_one(&self.pool)
.await
.map_err(|e| {
tracing::error!("OCR Queue: Failed to check if function exists: {}", e);
e
})?;
if !function_exists {
tracing::error!("OCR Queue: Function get_ocr_queue_stats() does not exist");
return Err(anyhow::anyhow!("Function get_ocr_queue_stats() does not exist"));
}
tracing::debug!("OCR Queue: Function get_ocr_queue_stats() exists, attempting to call it");
// Call the function
let stats = sqlx::query( let stats = sqlx::query(
r#" r#"
SELECT * FROM get_ocr_queue_stats() SELECT * FROM get_ocr_queue_stats()
@ -809,10 +834,13 @@ impl OcrQueueService {
tracing::debug!("OCR Queue: Successfully fetched stats row"); tracing::debug!("OCR Queue: Successfully fetched stats row");
// Debug: Print all column names and their types // Debug: Print all column names, their types, and their values
let columns = stats.columns(); let columns = stats.columns();
for (i, column) in columns.iter().enumerate() { for (i, column) in columns.iter().enumerate() {
tracing::debug!("OCR Queue: Column {}: name='{}', type='{:?}'", i, column.name(), column.type_info()); let column_name = column.name();
let column_type = column.type_info();
tracing::debug!("OCR Queue: Column {}: name='{}', type='{:?}'", i, column_name, column_type);
} }
// Try to extract each field with detailed error handling // Try to extract each field with detailed error handling