From 05a035579657e2c56efd020c83c43053257de8d7 Mon Sep 17 00:00:00 2001 From: perf3ct Date: Tue, 8 Jul 2025 16:52:23 +0000 Subject: [PATCH] fix(tests): fix the crazy metrics collection issue --- ...708000001_fix_ocr_queue_stats_function.sql | 42 +++++++++++++++++++ src/ocr/queue.rs | 32 +++++++++++++- 2 files changed, 72 insertions(+), 2 deletions(-) create mode 100644 migrations/20250708000001_fix_ocr_queue_stats_function.sql diff --git a/migrations/20250708000001_fix_ocr_queue_stats_function.sql b/migrations/20250708000001_fix_ocr_queue_stats_function.sql new file mode 100644 index 0000000..0370004 --- /dev/null +++ b/migrations/20250708000001_fix_ocr_queue_stats_function.sql @@ -0,0 +1,42 @@ +-- Fix the get_ocr_queue_stats function to ensure it matches the expected structure +-- This migration ensures the function correctly gets completed_today from documents table +-- and handles the case where migration 20250620100019 may have failed silently + +CREATE OR REPLACE FUNCTION get_ocr_queue_stats() +RETURNS TABLE ( + pending_count BIGINT, + processing_count BIGINT, + failed_count BIGINT, + completed_today BIGINT, + avg_wait_time_minutes DOUBLE PRECISION, + oldest_pending_minutes DOUBLE PRECISION +) AS $$ +BEGIN + RETURN QUERY + WITH queue_stats AS ( + SELECT + COUNT(*) FILTER (WHERE status = 'pending') as pending_count, + COUNT(*) FILTER (WHERE status = 'processing') as processing_count, + COUNT(*) FILTER (WHERE status = 'failed' AND attempts >= max_attempts) as failed_count, + CAST(AVG(EXTRACT(EPOCH FROM (COALESCE(started_at, NOW()) - created_at))/60) FILTER (WHERE status IN ('processing', 'completed')) AS DOUBLE PRECISION) as avg_wait_time_minutes, + CAST(MAX(EXTRACT(EPOCH FROM (NOW() - created_at))/60) FILTER (WHERE status = 'pending') AS DOUBLE PRECISION) as oldest_pending_minutes + FROM ocr_queue + ), + document_stats AS ( + -- Count documents that completed OCR today (looking at documents table where actual completion is tracked) + SELECT COUNT(*) as completed_today + FROM documents + WHERE ocr_status = 'completed' + AND updated_at >= CURRENT_DATE + AND updated_at < CURRENT_DATE + INTERVAL '1 day' + ) + SELECT + queue_stats.pending_count, + queue_stats.processing_count, + queue_stats.failed_count, + document_stats.completed_today, + queue_stats.avg_wait_time_minutes, + queue_stats.oldest_pending_minutes + FROM queue_stats, document_stats; +END; +$$ LANGUAGE plpgsql; \ No newline at end of file diff --git a/src/ocr/queue.rs b/src/ocr/queue.rs index 10e5e8a..1386fe7 100644 --- a/src/ocr/queue.rs +++ b/src/ocr/queue.rs @@ -795,6 +795,31 @@ impl OcrQueueService { pub async fn get_stats(&self) -> Result { tracing::debug!("OCR Queue: Starting get_stats() call"); + // First, let's check if the function exists and what it returns + let function_exists = sqlx::query_scalar::<_, bool>( + r#" + SELECT EXISTS ( + SELECT 1 FROM pg_proc p + JOIN pg_namespace n ON p.pronamespace = n.oid + WHERE n.nspname = 'public' AND p.proname = 'get_ocr_queue_stats' + ) + "# + ) + .fetch_one(&self.pool) + .await + .map_err(|e| { + tracing::error!("OCR Queue: Failed to check if function exists: {}", e); + e + })?; + + if !function_exists { + tracing::error!("OCR Queue: Function get_ocr_queue_stats() does not exist"); + return Err(anyhow::anyhow!("Function get_ocr_queue_stats() does not exist")); + } + + tracing::debug!("OCR Queue: Function get_ocr_queue_stats() exists, attempting to call it"); + + // Call the function let stats = sqlx::query( r#" SELECT * FROM get_ocr_queue_stats() @@ -809,10 +834,13 @@ impl OcrQueueService { tracing::debug!("OCR Queue: Successfully fetched stats row"); - // Debug: Print all column names and their types + // Debug: Print all column names, their types, and their values let columns = stats.columns(); for (i, column) in columns.iter().enumerate() { - tracing::debug!("OCR Queue: Column {}: name='{}', type='{:?}'", i, column.name(), column.type_info()); + let column_name = column.name(); + let column_type = column.type_info(); + + tracing::debug!("OCR Queue: Column {}: name='{}', type='{:?}'", i, column_name, column_type); } // Try to extract each field with detailed error handling