diff --git a/monitoring/configs/grafana/provisioning/datasources.yml b/monitoring/configs/grafana/provisioning/datasources.yml
index 1d896e8..03f6c02 100644
--- a/monitoring/configs/grafana/provisioning/datasources.yml
+++ b/monitoring/configs/grafana/provisioning/datasources.yml
@@ -5,6 +5,6 @@ datasources:
type: loki
access: proxy
url: http://loki:3100
- uid: loki-datasource
+ uid: Loki
editable: false
isDefault: true
\ No newline at end of file
diff --git a/monitoring/configs/loki/rules.yaml b/monitoring/configs/loki/rules.yaml
index 0441b6c..f140fad 100644
--- a/monitoring/configs/loki/rules.yaml
+++ b/monitoring/configs/loki/rules.yaml
@@ -1,12 +1,20 @@
groups:
- - name: log-rules
+ - name: grid_trading_alerts
rules:
- - alert: HighErrorRate
+ - alert: HighCPUUsage
expr: |
- sum(rate({job="grid_trading_bot", level="error"}[5m])) > 0.01
+ avg_over_time({job="grid_trading_bot"} | json | unwrap cpu [5m]) > 80
+ for: 2m
+ labels:
+ severity: warning
+ annotations:
+ summary: High CPU usage detected
+
+ - alert: OrderExecutionFailure
+ expr: |
+ count_over_time({job="grid_trading_bot"} |= "Failed to execute order" [5m]) > 3
for: 1m
labels:
severity: critical
annotations:
- description: High error rate detected in logs for job {{ $labels.job }} is above 0.01.
- summary: High error rate detected in logs
+ summary: Multiple order execution failures detected
diff --git a/monitoring/configs/promtail/promtail.yaml b/monitoring/configs/promtail/promtail.yaml
index cd37a48..7489fe6 100644
--- a/monitoring/configs/promtail/promtail.yaml
+++ b/monitoring/configs/promtail/promtail.yaml
@@ -1,6 +1,7 @@
server:
http_listen_port: 9080
grpc_listen_port: 0
+ log_level: debug
positions:
filename: /tmp/positions.yaml
@@ -15,4 +16,48 @@ scrape_configs:
- localhost
labels:
job: grid_trading_bot
- __path__: /logs/**/*.log
\ No newline at end of file
+ __path__: /logs/**/*.log
+ pipeline_stages:
+ # Extract detailed info from filename
+ - regex:
+ source: filename
+ expression: 'bot_(?P[A-Z]+)_(?P[A-Z]+)_(?P[A-Z]+)_strategy(?P[A-Z_]+)_spacing(?P[A-Z]+)_size(?P\d+)_range(?P\d+-\d+)_.*\.log'
+ - labels:
+ trading_pair: '{{ .base }}/{{ .quote }}'
+ trading_mode: '{{ .mode }}'
+ strategy_type: '{{ .strategy }}'
+ spacing_type: '{{ .spacing }}'
+ grid_size: '{{ .size }}'
+ grid_range: '{{ .range }}'
+
+ # Extract ticker price from log lines
+ - regex:
+ expression: 'Connected to WebSocket for (?P[A-Z/]+) ticker current price: (?P\d+\.\d+)'
+ - labels:
+ current_price: '{{ .current_price }}'
+
+ # Extract system metrics from log lines
+ - regex:
+ expression: 'System resource usage: \{\'cpu\': (?P\d+\.\d+), \'memory\': (?P\d+\.\d+), \'disk\': (?P\d+\.\d+), \'bot_cpu\': (?P\d+\.\d+), \'bot_memory\': (?P\d+\.\d+)\}'
+ - labels:
+ cpu: '{{ .cpu }}'
+ memory: '{{ .memory }}'
+ disk: '{{ .disk }}'
+ bot_cpu: '{{ .bot_cpu }}'
+ bot_memory: '{{ .bot_memory }}'
+
+ - regex:
+ expression: 'Fetched bot health status: .*\'exchange_status\': \'(?P\w+)\''
+ - labels:
+ exchange_status: '{{ .exchange_status }}'
+
+ # Parse log lines
+ - regex:
+ expression: '^(?P\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2},\d{3})\s+-\s+(?P[\w]+)\s+-\s+(?P\w+)\s+-\s+(?P.*)$'
+ - labels:
+ component:
+ level:
+
+ - timestamp:
+ source: timestamp
+ format: "2006-01-02 15:04:05,000"
\ No newline at end of file
diff --git a/monitoring/dashboards/grid_trading_bot_dashboard.json b/monitoring/dashboards/grid_trading_bot_dashboard.json
index e4e78e8..af13d8d 100644
--- a/monitoring/dashboards/grid_trading_bot_dashboard.json
+++ b/monitoring/dashboards/grid_trading_bot_dashboard.json
@@ -1,118 +1,103 @@
{
- "id": null,
- "uid": "grid_trading_bot_dashboard",
- "title": "Grid Trading Bot Monitoring",
- "tags": ["grid_trading_bot", "monitoring"],
- "timezone": "browser",
- "schemaVersion": 36,
- "version": 1,
- "refresh": "30s",
- "panels": [
+ "title": "Grid Trading Bot Dashboard",
+ "variables": [
{
- "id": 1,
- "type": "stat",
- "title": "Bot Uptime",
+ "name": "trading_pair",
+ "type": "query",
"datasource": "Loki",
- "targets": [
- {
- "expr": "{job=\"grid_trading_bot\"} |= \"Bot started successfully\"",
- "legendFormat": "Uptime",
- "refId": "A"
- }
- ],
- "gridPos": { "x": 0, "y": 0, "w": 4, "h": 3 }
+ "query": "label_values(trading_pair)"
},
{
- "id": 2,
- "type": "logs",
- "title": "Error Logs",
+ "name": "trading_mode",
+ "type": "query",
+ "datasource": "Loki",
+ "query": "label_values(trading_mode)"
+ },
+ {
+ "name": "strategy",
+ "type": "query",
+ "datasource": "Loki",
+ "query": "label_values(strategy_type)"
+ }
+ ],
+ "panels": [
+ {
+ "title": "Strategy Overview",
+ "type": "stat",
"datasource": "Loki",
"targets": [
{
- "expr": "{job=\"grid_trading_bot\"} |= \"ERROR\"",
- "refId": "A"
+ "expr": "{job=\"grid_trading_bot\", trading_pair=\"$trading_pair\", trading_mode=\"$trading_mode\", strategy_type=\"$strategy\"} | json | line_format \"{{.grid_size}} grids, Range: {{.grid_range}}, Spacing: {{.spacing_type}}\""
}
- ],
- "gridPos": { "x": 4, "y": 0, "w": 8, "h": 6 }
+ ]
},
{
- "id": 3,
+ "title": "ROI Over Time",
"type": "timeseries",
- "title": "Total Account Value",
"datasource": "Loki",
"targets": [
{
- "expr": "{job=\"grid_trading_bot\"} |= \"Account value\" | json | __value__=Account value",
- "refId": "A"
+ "expr": "{job=\"grid_trading_bot\", trading_pair=\"$trading_pair\", trading_mode=\"$trading_mode\", strategy_type=\"$strategy\"} | regexp \"ROI\\s+\\|\\s+(?P[\\-\\d\\.]+)%\" | unwrap roi"
}
- ],
- "gridPos": { "x": 0, "y": 3, "w": 12, "h": 6 }
+ ]
},
{
- "id": 4,
- "type": "piechart",
- "title": "Orders by Status",
+ "title": "Grid Level States",
+ "type": "table",
"datasource": "Loki",
"targets": [
{
- "expr": "{job=\"grid_trading_bot\"} |= \"Order status\" | json | order_status",
- "refId": "A"
+ "expr": "{job=\"grid_trading_bot\", trading_pair=\"$trading_pair\", trading_mode=\"$trading_mode\", strategy_type=\"$strategy\"} | json | grid_price != \"\" and grid_state != \"\" | line_format \"{{.grid_price}} - {{.grid_state}}\""
}
- ],
- "gridPos": { "x": 12, "y": 0, "w": 6, "h": 6 }
+ ]
},
{
- "id": 5,
+ "title": "Order Flow",
"type": "timeseries",
- "title": "Error Count Over Time",
"datasource": "Loki",
"targets": [
{
- "expr": "count_over_time({job=\"grid_trading_bot\"} |= \"ERROR\" [1m])",
- "refId": "A"
+ "expr": "sum(count_over_time({job=\"grid_trading_bot\", trading_pair=\"$trading_pair\", trading_mode=\"$trading_mode\", strategy_type=\"$strategy\", order_side=\"BUY\"}[5m]))",
+ "legendFormat": "Buy Orders"
+ },
+ {
+ "expr": "sum(count_over_time({job=\"grid_trading_bot\", trading_pair=\"$trading_pair\", trading_mode=\"$trading_mode\", strategy_type=\"$strategy\", order_side=\"SELL\"}[5m]))",
+ "legendFormat": "Sell Orders"
}
- ],
- "gridPos": { "x": 12, "y": 6, "w": 6, "h": 6 }
+ ]
},
{
- "id": 6,
- "type": "logs",
- "title": "Completed Orders",
+ "title": "Balance History",
+ "type": "timeseries",
"datasource": "Loki",
"targets": [
{
- "expr": "{job=\"grid_trading_bot\"} |= \"Order status: FILLED\"",
- "refId": "A"
+ "expr": "{job=\"grid_trading_bot\", trading_pair=\"$trading_pair\", trading_mode=\"$trading_mode\", strategy_type=\"$strategy\"} | regexp \"Balance: (?P[\\d\\.]+)\" | unwrap balance"
}
- ],
- "gridPos": { "x": 0, "y": 9, "w": 12, "h": 6 }
+ ]
},
{
- "id": 7,
- "type": "logs",
- "title": "Open Orders",
+ "title": "System Health",
+ "type": "gauge",
"datasource": "Loki",
"targets": [
{
- "expr": "{job=\"grid_trading_bot\"} |= \"Order status: OPEN\"",
- "refId": "A"
+ "expr": "{job=\"grid_trading_bot\", trading_pair=\"$trading_pair\"} | json | unwrap cpu"
}
],
- "gridPos": { "x": 12, "y": 9, "w": 12, "h": 6 }
+ "fieldConfig": {
+ "defaults": {
+ "thresholds": {
+ "steps": [
+ { "value": 0, "color": "green" },
+ { "value": 70, "color": "yellow" },
+ { "value": 85, "color": "red" }
+ ]
+ }
+ }
+ }
}
],
- "templating": {
- "list": []
- },
- "time": {
- "from": "now-24h",
- "to": "now"
- },
- "timepicker": {},
- "annotations": {
- "list": []
- },
- "variables": {
- "list": []
- }
+ "refresh": "10s",
+ "schemaVersion": 36
}
\ No newline at end of file