From 59cc85af6eec381f1a6b82761e898b1a34d21d4e Mon Sep 17 00:00:00 2001 From: Jordan TETE Date: Wed, 8 Jan 2025 01:04:01 +0100 Subject: [PATCH] update grafana and promtail default config --- .../grafana/provisioning/datasources.yml | 2 +- monitoring/configs/loki/rules.yaml | 18 ++- monitoring/configs/promtail/promtail.yaml | 47 ++++++- .../grid_trading_bot_dashboard.json | 131 ++++++++---------- 4 files changed, 118 insertions(+), 80 deletions(-) diff --git a/monitoring/configs/grafana/provisioning/datasources.yml b/monitoring/configs/grafana/provisioning/datasources.yml index 1d896e8..03f6c02 100644 --- a/monitoring/configs/grafana/provisioning/datasources.yml +++ b/monitoring/configs/grafana/provisioning/datasources.yml @@ -5,6 +5,6 @@ datasources: type: loki access: proxy url: http://loki:3100 - uid: loki-datasource + uid: Loki editable: false isDefault: true \ No newline at end of file diff --git a/monitoring/configs/loki/rules.yaml b/monitoring/configs/loki/rules.yaml index 0441b6c..f140fad 100644 --- a/monitoring/configs/loki/rules.yaml +++ b/monitoring/configs/loki/rules.yaml @@ -1,12 +1,20 @@ groups: - - name: log-rules + - name: grid_trading_alerts rules: - - alert: HighErrorRate + - alert: HighCPUUsage expr: | - sum(rate({job="grid_trading_bot", level="error"}[5m])) > 0.01 + avg_over_time({job="grid_trading_bot"} | json | unwrap cpu [5m]) > 80 + for: 2m + labels: + severity: warning + annotations: + summary: High CPU usage detected + + - alert: OrderExecutionFailure + expr: | + count_over_time({job="grid_trading_bot"} |= "Failed to execute order" [5m]) > 3 for: 1m labels: severity: critical annotations: - description: High error rate detected in logs for job {{ $labels.job }} is above 0.01. - summary: High error rate detected in logs + summary: Multiple order execution failures detected diff --git a/monitoring/configs/promtail/promtail.yaml b/monitoring/configs/promtail/promtail.yaml index cd37a48..7489fe6 100644 --- a/monitoring/configs/promtail/promtail.yaml +++ b/monitoring/configs/promtail/promtail.yaml @@ -1,6 +1,7 @@ server: http_listen_port: 9080 grpc_listen_port: 0 + log_level: debug positions: filename: /tmp/positions.yaml @@ -15,4 +16,48 @@ scrape_configs: - localhost labels: job: grid_trading_bot - __path__: /logs/**/*.log \ No newline at end of file + __path__: /logs/**/*.log + pipeline_stages: + # Extract detailed info from filename + - regex: + source: filename + expression: 'bot_(?P[A-Z]+)_(?P[A-Z]+)_(?P[A-Z]+)_strategy(?P[A-Z_]+)_spacing(?P[A-Z]+)_size(?P\d+)_range(?P\d+-\d+)_.*\.log' + - labels: + trading_pair: '{{ .base }}/{{ .quote }}' + trading_mode: '{{ .mode }}' + strategy_type: '{{ .strategy }}' + spacing_type: '{{ .spacing }}' + grid_size: '{{ .size }}' + grid_range: '{{ .range }}' + + # Extract ticker price from log lines + - regex: + expression: 'Connected to WebSocket for (?P[A-Z/]+) ticker current price: (?P\d+\.\d+)' + - labels: + current_price: '{{ .current_price }}' + + # Extract system metrics from log lines + - regex: + expression: 'System resource usage: \{\'cpu\': (?P\d+\.\d+), \'memory\': (?P\d+\.\d+), \'disk\': (?P\d+\.\d+), \'bot_cpu\': (?P\d+\.\d+), \'bot_memory\': (?P\d+\.\d+)\}' + - labels: + cpu: '{{ .cpu }}' + memory: '{{ .memory }}' + disk: '{{ .disk }}' + bot_cpu: '{{ .bot_cpu }}' + bot_memory: '{{ .bot_memory }}' + + - regex: + expression: 'Fetched bot health status: .*\'exchange_status\': \'(?P\w+)\'' + - labels: + exchange_status: '{{ .exchange_status }}' + + # Parse log lines + - regex: + expression: '^(?P\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2},\d{3})\s+-\s+(?P[\w]+)\s+-\s+(?P\w+)\s+-\s+(?P.*)$' + - labels: + component: + level: + + - timestamp: + source: timestamp + format: "2006-01-02 15:04:05,000" \ No newline at end of file diff --git a/monitoring/dashboards/grid_trading_bot_dashboard.json b/monitoring/dashboards/grid_trading_bot_dashboard.json index e4e78e8..af13d8d 100644 --- a/monitoring/dashboards/grid_trading_bot_dashboard.json +++ b/monitoring/dashboards/grid_trading_bot_dashboard.json @@ -1,118 +1,103 @@ { - "id": null, - "uid": "grid_trading_bot_dashboard", - "title": "Grid Trading Bot Monitoring", - "tags": ["grid_trading_bot", "monitoring"], - "timezone": "browser", - "schemaVersion": 36, - "version": 1, - "refresh": "30s", - "panels": [ + "title": "Grid Trading Bot Dashboard", + "variables": [ { - "id": 1, - "type": "stat", - "title": "Bot Uptime", + "name": "trading_pair", + "type": "query", "datasource": "Loki", - "targets": [ - { - "expr": "{job=\"grid_trading_bot\"} |= \"Bot started successfully\"", - "legendFormat": "Uptime", - "refId": "A" - } - ], - "gridPos": { "x": 0, "y": 0, "w": 4, "h": 3 } + "query": "label_values(trading_pair)" }, { - "id": 2, - "type": "logs", - "title": "Error Logs", + "name": "trading_mode", + "type": "query", + "datasource": "Loki", + "query": "label_values(trading_mode)" + }, + { + "name": "strategy", + "type": "query", + "datasource": "Loki", + "query": "label_values(strategy_type)" + } + ], + "panels": [ + { + "title": "Strategy Overview", + "type": "stat", "datasource": "Loki", "targets": [ { - "expr": "{job=\"grid_trading_bot\"} |= \"ERROR\"", - "refId": "A" + "expr": "{job=\"grid_trading_bot\", trading_pair=\"$trading_pair\", trading_mode=\"$trading_mode\", strategy_type=\"$strategy\"} | json | line_format \"{{.grid_size}} grids, Range: {{.grid_range}}, Spacing: {{.spacing_type}}\"" } - ], - "gridPos": { "x": 4, "y": 0, "w": 8, "h": 6 } + ] }, { - "id": 3, + "title": "ROI Over Time", "type": "timeseries", - "title": "Total Account Value", "datasource": "Loki", "targets": [ { - "expr": "{job=\"grid_trading_bot\"} |= \"Account value\" | json | __value__=Account value", - "refId": "A" + "expr": "{job=\"grid_trading_bot\", trading_pair=\"$trading_pair\", trading_mode=\"$trading_mode\", strategy_type=\"$strategy\"} | regexp \"ROI\\s+\\|\\s+(?P[\\-\\d\\.]+)%\" | unwrap roi" } - ], - "gridPos": { "x": 0, "y": 3, "w": 12, "h": 6 } + ] }, { - "id": 4, - "type": "piechart", - "title": "Orders by Status", + "title": "Grid Level States", + "type": "table", "datasource": "Loki", "targets": [ { - "expr": "{job=\"grid_trading_bot\"} |= \"Order status\" | json | order_status", - "refId": "A" + "expr": "{job=\"grid_trading_bot\", trading_pair=\"$trading_pair\", trading_mode=\"$trading_mode\", strategy_type=\"$strategy\"} | json | grid_price != \"\" and grid_state != \"\" | line_format \"{{.grid_price}} - {{.grid_state}}\"" } - ], - "gridPos": { "x": 12, "y": 0, "w": 6, "h": 6 } + ] }, { - "id": 5, + "title": "Order Flow", "type": "timeseries", - "title": "Error Count Over Time", "datasource": "Loki", "targets": [ { - "expr": "count_over_time({job=\"grid_trading_bot\"} |= \"ERROR\" [1m])", - "refId": "A" + "expr": "sum(count_over_time({job=\"grid_trading_bot\", trading_pair=\"$trading_pair\", trading_mode=\"$trading_mode\", strategy_type=\"$strategy\", order_side=\"BUY\"}[5m]))", + "legendFormat": "Buy Orders" + }, + { + "expr": "sum(count_over_time({job=\"grid_trading_bot\", trading_pair=\"$trading_pair\", trading_mode=\"$trading_mode\", strategy_type=\"$strategy\", order_side=\"SELL\"}[5m]))", + "legendFormat": "Sell Orders" } - ], - "gridPos": { "x": 12, "y": 6, "w": 6, "h": 6 } + ] }, { - "id": 6, - "type": "logs", - "title": "Completed Orders", + "title": "Balance History", + "type": "timeseries", "datasource": "Loki", "targets": [ { - "expr": "{job=\"grid_trading_bot\"} |= \"Order status: FILLED\"", - "refId": "A" + "expr": "{job=\"grid_trading_bot\", trading_pair=\"$trading_pair\", trading_mode=\"$trading_mode\", strategy_type=\"$strategy\"} | regexp \"Balance: (?P[\\d\\.]+)\" | unwrap balance" } - ], - "gridPos": { "x": 0, "y": 9, "w": 12, "h": 6 } + ] }, { - "id": 7, - "type": "logs", - "title": "Open Orders", + "title": "System Health", + "type": "gauge", "datasource": "Loki", "targets": [ { - "expr": "{job=\"grid_trading_bot\"} |= \"Order status: OPEN\"", - "refId": "A" + "expr": "{job=\"grid_trading_bot\", trading_pair=\"$trading_pair\"} | json | unwrap cpu" } ], - "gridPos": { "x": 12, "y": 9, "w": 12, "h": 6 } + "fieldConfig": { + "defaults": { + "thresholds": { + "steps": [ + { "value": 0, "color": "green" }, + { "value": 70, "color": "yellow" }, + { "value": 85, "color": "red" } + ] + } + } + } } ], - "templating": { - "list": [] - }, - "time": { - "from": "now-24h", - "to": "now" - }, - "timepicker": {}, - "annotations": { - "list": [] - }, - "variables": { - "list": [] - } + "refresh": "10s", + "schemaVersion": 36 } \ No newline at end of file