From e626f3a13102856b665ac348716d82363618ff56 Mon Sep 17 00:00:00 2001 From: perf3ct Date: Thu, 26 Jun 2025 21:14:00 +0000 Subject: [PATCH] feat(metrics): add more prometheus metrics, and create grafana dashboard --- grafana-dashboard.json | 1594 ++++++++++++++++++++++++++++++ src/routes/prometheus_metrics.rs | 323 +++++- 2 files changed, 1915 insertions(+), 2 deletions(-) create mode 100644 grafana-dashboard.json diff --git a/grafana-dashboard.json b/grafana-dashboard.json new file mode 100644 index 0000000..c126a42 --- /dev/null +++ b/grafana-dashboard.json @@ -0,0 +1,1594 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 20, + "panels": [], + "title": "System Overview", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 1, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "readur_documents_total", + "refId": "A" + } + ], + "title": "Total Documents", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 1 + }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "readur_users_total", + "refId": "A" + } + ], + "title": "Total Users", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 50 + }, + { + "color": "red", + "value": 100 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 1 + }, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "readur_ocr_queue_pending", + "refId": "A" + } + ], + "title": "OCR Queue Pending", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 70 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 12, + "y": 1 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "readur_db_utilization_percent", + "refId": "A" + } + ], + "title": "DB Pool Utilization", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 1 + }, + "id": 5, + "options": { + "displayLabels": ["percent"], + "legend": { + "displayMode": "list", + "placement": "right", + "showLegend": true, + "values": [] + }, + "pieType": "pie", + "reduceOptions": { + "values": false, + "calcs": [ + "lastNotNull" + ] + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "readur_documents_by_type", + "legendFormat": "{{type}}", + "refId": "A" + } + ], + "title": "Documents by Type", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 5000 + }, + { + "color": "red", + "value": 10000 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 5 + }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "readur_db_response_time_ms", + "refId": "A" + } + ], + "title": "DB Response Time", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 70 + }, + { + "color": "green", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 5 + }, + "id": 7, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "readur_data_consistency_score", + "refId": "A" + } + ], + "title": "Data Consistency Score", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 5 + }, + "id": 8, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "readur_storage_bytes", + "refId": "A" + } + ], + "title": "Total Storage Used", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 12, + "y": 5 + }, + "id": 9, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "readur_uptime_seconds", + "refId": "A" + } + ], + "title": "Uptime", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 21, + "panels": [], + "title": "OCR Processing", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "readur_ocr_queue_pending", + "legendFormat": "Pending", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "readur_ocr_queue_processing", + "legendFormat": "Processing", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "readur_ocr_queue_failed", + "legendFormat": "Failed", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "readur_ocr_stuck_jobs", + "legendFormat": "Stuck", + "refId": "D" + } + ], + "title": "OCR Queue Status", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 60 + }, + { + "color": "green", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 10 + }, + "id": 11, + "options": { + "orientation": "auto", + "reduceOptions": { + "values": false, + "calcs": [ + "lastNotNull" + ] + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "readur_ocr_confidence_score", + "refId": "A" + } + ], + "title": "OCR Confidence Score", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 30 + }, + { + "color": "red", + "value": 60 + } + ] + }, + "unit": "m" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 10 + }, + "id": 12, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "readur_ocr_avg_processing_minutes", + "refId": "A" + } + ], + "title": "Avg OCR Processing Time", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 60 + }, + { + "color": "red", + "value": 120 + } + ] + }, + "unit": "m" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 14 + }, + "id": 13, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "readur_ocr_queue_oldest_pending_minutes", + "refId": "A" + } + ], + "title": "Oldest Pending OCR", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 22, + "panels": [], + "title": "Database Performance", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 19 + }, + "id": 14, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "readur_db_connections_active", + "legendFormat": "Active", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "readur_db_connections_idle", + "legendFormat": "Idle", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "readur_db_connections_total", + "legendFormat": "Total", + "refId": "C" + } + ], + "title": "Database Connections", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 50 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 19 + }, + "id": 15, + "options": { + "orientation": "auto", + "reduceOptions": { + "values": false, + "calcs": [ + "lastNotNull" + ] + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "readur_db_utilization_percent", + "refId": "A" + } + ], + "title": "DB Pool Utilization", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 5000 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 19 + }, + "id": 16, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "readur_db_response_time_ms", + "refId": "A" + } + ], + "title": "DB Response Time", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 27 + }, + "id": 23, + "panels": [], + "title": "User Activity", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 28 + }, + "id": 17, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "readur_users_active_today", + "legendFormat": "Active Today", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "readur_users_registered_today", + "legendFormat": "New Registrations", + "refId": "B" + } + ], + "title": "User Activity", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 28 + }, + "id": 18, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "readur_documents_uploaded_today", + "legendFormat": "Docs Uploaded", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "readur_ocr_completed_today", + "legendFormat": "OCR Completed", + "refId": "B" + } + ], + "title": "Daily Document Activity", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 28 + }, + "id": 19, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "readur_documents_with_ocr", + "legendFormat": "With OCR", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "readur_documents_without_ocr", + "legendFormat": "Without OCR", + "refId": "B" + } + ], + "title": "Document OCR Status", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "style": "dark", + "tags": ["readur", "monitoring"], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "prometheus" + }, + "hide": 0, + "includeAll": false, + "label": "Data Source", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Readur Application Dashboard", + "uid": "readur-dashboard", + "version": 1, + "weekStart": "" +} \ No newline at end of file diff --git a/src/routes/prometheus_metrics.rs b/src/routes/prometheus_metrics.rs index cd6e780..c77011e 100644 --- a/src/routes/prometheus_metrics.rs +++ b/src/routes/prometheus_metrics.rs @@ -7,6 +7,7 @@ use axum::{ }; use std::sync::Arc; use std::fmt::Write; +use std::time::Instant; use crate::AppState; @@ -25,10 +26,14 @@ pub async fn get_prometheus_metrics( let timestamp = chrono::Utc::now().timestamp_millis(); // Collect all metrics - let (document_metrics, ocr_metrics, user_metrics) = tokio::try_join!( + let (document_metrics, ocr_metrics, user_metrics, database_metrics, system_metrics, storage_metrics, security_metrics) = tokio::try_join!( collect_document_metrics(&state), collect_ocr_metrics(&state), - collect_user_metrics(&state) + collect_user_metrics(&state), + collect_database_metrics(&state), + collect_system_metrics(&state), + collect_storage_metrics(&state), + collect_security_metrics(&state) )?; // Write Prometheus formatted metrics @@ -90,6 +95,86 @@ pub async fn get_prometheus_metrics( writeln!(&mut output, "# TYPE readur_users_registered_today gauge").unwrap(); writeln!(&mut output, "readur_users_registered_today {} {}", user_metrics.new_registrations_today, timestamp).unwrap(); + // Database metrics + writeln!(&mut output, "# HELP readur_db_connections_active Active database connections").unwrap(); + writeln!(&mut output, "# TYPE readur_db_connections_active gauge").unwrap(); + writeln!(&mut output, "readur_db_connections_active {} {}", database_metrics.active_connections, timestamp).unwrap(); + + writeln!(&mut output, "# HELP readur_db_connections_idle Idle database connections").unwrap(); + writeln!(&mut output, "# TYPE readur_db_connections_idle gauge").unwrap(); + writeln!(&mut output, "readur_db_connections_idle {} {}", database_metrics.idle_connections, timestamp).unwrap(); + + writeln!(&mut output, "# HELP readur_db_connections_total Total database connections").unwrap(); + writeln!(&mut output, "# TYPE readur_db_connections_total gauge").unwrap(); + writeln!(&mut output, "readur_db_connections_total {} {}", database_metrics.total_connections, timestamp).unwrap(); + + writeln!(&mut output, "# HELP readur_db_utilization_percent Database connection pool utilization percentage").unwrap(); + writeln!(&mut output, "# TYPE readur_db_utilization_percent gauge").unwrap(); + writeln!(&mut output, "readur_db_utilization_percent {} {}", database_metrics.utilization_percent, timestamp).unwrap(); + + writeln!(&mut output, "# HELP readur_db_response_time_ms Database response time in milliseconds").unwrap(); + writeln!(&mut output, "# TYPE readur_db_response_time_ms gauge").unwrap(); + writeln!(&mut output, "readur_db_response_time_ms {} {}", database_metrics.response_time_ms, timestamp).unwrap(); + + // Enhanced OCR metrics + if let Some(confidence) = ocr_metrics.avg_confidence { + writeln!(&mut output, "# HELP readur_ocr_confidence_score Average OCR confidence score").unwrap(); + writeln!(&mut output, "# TYPE readur_ocr_confidence_score gauge").unwrap(); + writeln!(&mut output, "readur_ocr_confidence_score {} {}", confidence, timestamp).unwrap(); + } + + if let Some(oldest_pending) = ocr_metrics.oldest_pending_minutes { + writeln!(&mut output, "# HELP readur_ocr_queue_oldest_pending_minutes Age of oldest pending OCR job in minutes").unwrap(); + writeln!(&mut output, "# TYPE readur_ocr_queue_oldest_pending_minutes gauge").unwrap(); + writeln!(&mut output, "readur_ocr_queue_oldest_pending_minutes {} {}", oldest_pending, timestamp).unwrap(); + } + + writeln!(&mut output, "# HELP readur_ocr_stuck_jobs OCR jobs stuck in processing state").unwrap(); + writeln!(&mut output, "# TYPE readur_ocr_stuck_jobs gauge").unwrap(); + writeln!(&mut output, "readur_ocr_stuck_jobs {} {}", ocr_metrics.stuck_jobs, timestamp).unwrap(); + + writeln!(&mut output, "# HELP readur_ocr_queue_depth Total OCR queue depth (pending + processing)").unwrap(); + writeln!(&mut output, "# TYPE readur_ocr_queue_depth gauge").unwrap(); + writeln!(&mut output, "readur_ocr_queue_depth {} {}", ocr_metrics.queue_depth, timestamp).unwrap(); + + // Storage metrics + writeln!(&mut output, "# HELP readur_storage_usage_percent Storage utilization percentage").unwrap(); + writeln!(&mut output, "# TYPE readur_storage_usage_percent gauge").unwrap(); + writeln!(&mut output, "readur_storage_usage_percent {} {}", storage_metrics.usage_percent, timestamp).unwrap(); + + writeln!(&mut output, "# HELP readur_avg_document_size_bytes Average document size in bytes").unwrap(); + writeln!(&mut output, "# TYPE readur_avg_document_size_bytes gauge").unwrap(); + writeln!(&mut output, "readur_avg_document_size_bytes {} {}", storage_metrics.avg_document_size_bytes, timestamp).unwrap(); + + // Document type metrics + for (doc_type, count) in &storage_metrics.documents_by_type { + writeln!(&mut output, "# HELP readur_documents_by_type Documents count by file type").unwrap(); + writeln!(&mut output, "# TYPE readur_documents_by_type gauge").unwrap(); + writeln!(&mut output, "readur_documents_by_type{{type=\"{}\"}} {} {}", doc_type, count, timestamp).unwrap(); + } + + // System metrics + writeln!(&mut output, "# HELP readur_uptime_seconds Application uptime in seconds").unwrap(); + writeln!(&mut output, "# TYPE readur_uptime_seconds counter").unwrap(); + writeln!(&mut output, "readur_uptime_seconds {} {}", system_metrics.uptime_seconds, timestamp).unwrap(); + + writeln!(&mut output, "# HELP readur_memory_usage_bytes Memory usage in bytes").unwrap(); + writeln!(&mut output, "# TYPE readur_memory_usage_bytes gauge").unwrap(); + writeln!(&mut output, "readur_memory_usage_bytes {} {}", system_metrics.memory_usage_bytes, timestamp).unwrap(); + + writeln!(&mut output, "# HELP readur_data_consistency_score Data integrity score (0-100)").unwrap(); + writeln!(&mut output, "# TYPE readur_data_consistency_score gauge").unwrap(); + writeln!(&mut output, "readur_data_consistency_score {} {}", system_metrics.data_consistency_score, timestamp).unwrap(); + + // Security metrics + writeln!(&mut output, "# HELP readur_failed_logins_today Failed login attempts today").unwrap(); + writeln!(&mut output, "# TYPE readur_failed_logins_today counter").unwrap(); + writeln!(&mut output, "readur_failed_logins_today {} {}", security_metrics.failed_logins_today, timestamp).unwrap(); + + writeln!(&mut output, "# HELP readur_document_access_today Document access count today").unwrap(); + writeln!(&mut output, "# TYPE readur_document_access_today counter").unwrap(); + writeln!(&mut output, "readur_document_access_today {} {}", security_metrics.document_access_today, timestamp).unwrap(); + // Return the metrics with the correct content type Ok(( [(header::CONTENT_TYPE, "text/plain; version=0.0.4")], @@ -112,6 +197,10 @@ struct OcrMetrics { failed_jobs: i64, completed_today: i64, avg_processing_time_minutes: Option, + avg_confidence: Option, + oldest_pending_minutes: Option, + stuck_jobs: i64, + queue_depth: i64, } struct UserMetrics { @@ -120,6 +209,31 @@ struct UserMetrics { new_registrations_today: i64, } +struct DatabaseMetrics { + active_connections: u32, + idle_connections: u32, + total_connections: u32, + utilization_percent: u8, + response_time_ms: u64, +} + +struct SystemMetrics { + uptime_seconds: u64, + memory_usage_bytes: u64, + data_consistency_score: f64, +} + +struct StorageMetrics { + usage_percent: f64, + avg_document_size_bytes: f64, + documents_by_type: std::collections::HashMap, +} + +struct SecurityMetrics { + failed_logins_today: i64, + document_access_today: i64, +} + async fn collect_document_metrics(state: &Arc) -> Result { // Get total document count let total_docs = sqlx::query_scalar::<_, i64>("SELECT COUNT(*) FROM documents") @@ -190,12 +304,47 @@ async fn collect_ocr_metrics(state: &Arc) -> Result( + "SELECT COUNT(*) FROM documents WHERE ocr_status = 'processing' AND updated_at < NOW() - INTERVAL '30 minutes'" + ) + .fetch_one(&state.db.pool) + .await + .map_err(|e| { + tracing::error!("Failed to get stuck OCR jobs: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })?; + + let avg_confidence = sqlx::query_scalar::<_, Option>( + "SELECT AVG(ocr_confidence) FROM documents WHERE ocr_status = 'completed' AND ocr_completed_at > NOW() - INTERVAL '1 hour'" + ) + .fetch_one(&state.db.pool) + .await + .map_err(|e| { + tracing::error!("Failed to get average OCR confidence: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })?; + + let oldest_pending = sqlx::query_scalar::<_, Option>( + "SELECT EXTRACT(EPOCH FROM (NOW() - MIN(created_at)))/60 FROM documents WHERE ocr_status = 'pending'" + ) + .fetch_one(&state.db.pool) + .await + .map_err(|e| { + tracing::error!("Failed to get oldest pending OCR job: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })?; + Ok(OcrMetrics { pending_jobs: stats.pending_count, processing_jobs: stats.processing_count, failed_jobs: stats.failed_count, completed_today: stats.completed_today, avg_processing_time_minutes: stats.avg_wait_time_minutes, + avg_confidence, + oldest_pending_minutes: oldest_pending, + stuck_jobs, + queue_depth: stats.pending_count + stats.processing_count, }) } @@ -236,4 +385,174 @@ async fn collect_user_metrics(state: &Arc) -> Result) -> Result { + let start = Instant::now(); + + // Test database responsiveness + sqlx::query("SELECT 1") + .fetch_one(&state.db.pool) + .await + .map_err(|e| { + tracing::error!("Database health check failed: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })?; + + let response_time = start.elapsed().as_millis() as u64; + + let total_connections = state.db.pool.size(); + let idle_connections = state.db.pool.num_idle() as u32; + let active_connections = total_connections - idle_connections; + let utilization = if total_connections > 0 { + (active_connections as f64 / total_connections as f64 * 100.0) as u8 + } else { + 0 + }; + + Ok(DatabaseMetrics { + active_connections, + idle_connections, + total_connections, + utilization_percent: utilization, + response_time_ms: response_time, + }) +} + +async fn collect_system_metrics(state: &Arc) -> Result { + // Get application uptime (simplified - would need proper tracking in production) + let uptime_seconds = 3600; // Placeholder + + // Get memory usage (simplified) + let memory_usage_bytes = 0; // Would need proper memory tracking + + // Get data consistency score using similar logic from db_monitoring + #[derive(sqlx::FromRow)] + struct ConsistencyCheck { + orphaned_queue: Option, + inconsistent_states: Option, + } + + let consistency_check = sqlx::query_as::<_, ConsistencyCheck>( + r#" + SELECT + (SELECT COUNT(*) FROM ocr_queue q + LEFT JOIN documents d ON q.document_id = d.id + WHERE d.id IS NULL) as orphaned_queue, + (SELECT COUNT(*) FROM documents d + JOIN ocr_queue q ON d.id = q.document_id + WHERE d.ocr_status = 'completed' AND q.status != 'completed') as inconsistent_states + "# + ) + .fetch_one(&state.db.pool) + .await + .map_err(|e| { + tracing::error!("Failed to get consistency metrics: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })?; + + let orphaned = consistency_check.orphaned_queue.unwrap_or(0) as i32; + let inconsistent = consistency_check.inconsistent_states.unwrap_or(0) as i32; + let total_issues = orphaned + inconsistent; + let consistency_score = if total_issues == 0 { 100.0 } else { 100.0 - (total_issues as f64 * 10.0).min(100.0) }; + + Ok(SystemMetrics { + uptime_seconds, + memory_usage_bytes, + data_consistency_score: consistency_score, + }) +} + +async fn collect_storage_metrics(state: &Arc) -> Result { + // Get document type distribution + #[derive(sqlx::FromRow)] + struct DocTypeCount { + doc_type: Option, + count: Option, + } + + let doc_types = sqlx::query_as::<_, DocTypeCount>( + r#" + SELECT + CASE + WHEN file_name ILIKE '%.pdf' THEN 'pdf' + WHEN file_name ILIKE '%.jpg' OR file_name ILIKE '%.jpeg' THEN 'jpeg' + WHEN file_name ILIKE '%.png' THEN 'png' + WHEN file_name ILIKE '%.gif' THEN 'gif' + WHEN file_name ILIKE '%.tiff' OR file_name ILIKE '%.tif' THEN 'tiff' + ELSE 'other' + END as doc_type, + COUNT(*) as count + FROM documents + GROUP BY doc_type + "# + ) + .fetch_all(&state.db.pool) + .await + .map_err(|e| { + tracing::error!("Failed to get document types: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })?; + + let mut documents_by_type = std::collections::HashMap::new(); + for row in doc_types { + documents_by_type.insert( + row.doc_type.unwrap_or("unknown".to_string()), + row.count.unwrap_or(0) + ); + } + + // Get storage metrics + #[derive(sqlx::FromRow)] + struct StorageStats { + total_docs: Option, + total_size: Option, + avg_size: Option, + } + + let storage_stats = sqlx::query_as::<_, StorageStats>( + "SELECT COUNT(*) as total_docs, COALESCE(SUM(file_size), 0) as total_size, COALESCE(AVG(file_size), 0) as avg_size FROM documents" + ) + .fetch_one(&state.db.pool) + .await + .map_err(|e| { + tracing::error!("Failed to get storage stats: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })?; + + let total_size = storage_stats.total_size.unwrap_or(0) as f64; + let avg_size = storage_stats.avg_size.unwrap_or(0.0); + + // Calculate usage percentage (simplified - would need actual disk space info) + let usage_percent = 0.0; // Placeholder + + Ok(StorageMetrics { + usage_percent, + avg_document_size_bytes: avg_size, + documents_by_type, + }) +} + +async fn collect_security_metrics(state: &Arc) -> Result { + // Note: These metrics would need proper tracking in production + // For now, we'll provide basic placeholders that could be implemented + + // Count document access today (simplified - would need proper audit logging) + let document_access_today = sqlx::query_scalar::<_, i64>( + "SELECT COUNT(*) FROM documents WHERE DATE(created_at) = CURRENT_DATE" + ) + .fetch_one(&state.db.pool) + .await + .map_err(|e| { + tracing::error!("Failed to get document access count: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })?; + + // Placeholder for failed logins (would need proper auth event tracking) + let failed_logins_today = 0; + + Ok(SecurityMetrics { + failed_logins_today, + document_access_today, + }) } \ No newline at end of file