Skip to content

Commit

Permalink
update grafana and promtail default config
Browse files Browse the repository at this point in the history
  • Loading branch information
jordantete committed Jan 8, 2025
1 parent 8f0f3d2 commit 59cc85a
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 80 deletions.
2 changes: 1 addition & 1 deletion monitoring/configs/grafana/provisioning/datasources.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@ datasources:
type: loki
access: proxy
url: http://loki:3100
uid: loki-datasource
uid: Loki
editable: false
isDefault: true
18 changes: 13 additions & 5 deletions monitoring/configs/loki/rules.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
groups:
- name: log-rules
- name: grid_trading_alerts
rules:
- alert: HighErrorRate
- alert: HighCPUUsage
expr: |
sum(rate({job="grid_trading_bot", level="error"}[5m])) > 0.01
avg_over_time({job="grid_trading_bot"} | json | unwrap cpu [5m]) > 80
for: 2m
labels:
severity: warning
annotations:
summary: High CPU usage detected

- alert: OrderExecutionFailure
expr: |
count_over_time({job="grid_trading_bot"} |= "Failed to execute order" [5m]) > 3
for: 1m
labels:
severity: critical
annotations:
description: High error rate detected in logs for job {{ $labels.job }} is above 0.01.
summary: High error rate detected in logs
summary: Multiple order execution failures detected
47 changes: 46 additions & 1 deletion monitoring/configs/promtail/promtail.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
server:
http_listen_port: 9080
grpc_listen_port: 0
log_level: debug

positions:
filename: /tmp/positions.yaml
Expand All @@ -15,4 +16,48 @@ scrape_configs:
- localhost
labels:
job: grid_trading_bot
__path__: /logs/**/*.log
__path__: /logs/**/*.log
pipeline_stages:
# Extract detailed info from filename
- regex:
source: filename
expression: 'bot_(?P<base>[A-Z]+)_(?P<quote>[A-Z]+)_(?P<mode>[A-Z]+)_strategy(?P<strategy>[A-Z_]+)_spacing(?P<spacing>[A-Z]+)_size(?P<size>\d+)_range(?P<range>\d+-\d+)_.*\.log'
- labels:
trading_pair: '{{ .base }}/{{ .quote }}'
trading_mode: '{{ .mode }}'
strategy_type: '{{ .strategy }}'
spacing_type: '{{ .spacing }}'
grid_size: '{{ .size }}'
grid_range: '{{ .range }}'

# Extract ticker price from log lines
- regex:
expression: 'Connected to WebSocket for (?P<trading_pair>[A-Z/]+) ticker current price: (?P<current_price>\d+\.\d+)'
- labels:
current_price: '{{ .current_price }}'

# Extract system metrics from log lines
- regex:
expression: 'System resource usage: \{\'cpu\': (?P<cpu>\d+\.\d+), \'memory\': (?P<memory>\d+\.\d+), \'disk\': (?P<disk>\d+\.\d+), \'bot_cpu\': (?P<bot_cpu>\d+\.\d+), \'bot_memory\': (?P<bot_memory>\d+\.\d+)\}'
- labels:
cpu: '{{ .cpu }}'
memory: '{{ .memory }}'
disk: '{{ .disk }}'
bot_cpu: '{{ .bot_cpu }}'
bot_memory: '{{ .bot_memory }}'

- regex:
expression: 'Fetched bot health status: .*\'exchange_status\': \'(?P<exchange_status>\w+)\''
- labels:
exchange_status: '{{ .exchange_status }}'

# Parse log lines
- regex:
expression: '^(?P<timestamp>\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2},\d{3})\s+-\s+(?P<component>[\w]+)\s+-\s+(?P<level>\w+)\s+-\s+(?P<message>.*)$'
- labels:
component:
level:

- timestamp:
source: timestamp
format: "2006-01-02 15:04:05,000"
131 changes: 58 additions & 73 deletions monitoring/dashboards/grid_trading_bot_dashboard.json
Original file line number Diff line number Diff line change
@@ -1,118 +1,103 @@
{
"id": null,
"uid": "grid_trading_bot_dashboard",
"title": "Grid Trading Bot Monitoring",
"tags": ["grid_trading_bot", "monitoring"],
"timezone": "browser",
"schemaVersion": 36,
"version": 1,
"refresh": "30s",
"panels": [
"title": "Grid Trading Bot Dashboard",
"variables": [
{
"id": 1,
"type": "stat",
"title": "Bot Uptime",
"name": "trading_pair",
"type": "query",
"datasource": "Loki",
"targets": [
{
"expr": "{job=\"grid_trading_bot\"} |= \"Bot started successfully\"",
"legendFormat": "Uptime",
"refId": "A"
}
],
"gridPos": { "x": 0, "y": 0, "w": 4, "h": 3 }
"query": "label_values(trading_pair)"
},
{
"id": 2,
"type": "logs",
"title": "Error Logs",
"name": "trading_mode",
"type": "query",
"datasource": "Loki",
"query": "label_values(trading_mode)"
},
{
"name": "strategy",
"type": "query",
"datasource": "Loki",
"query": "label_values(strategy_type)"
}
],
"panels": [
{
"title": "Strategy Overview",
"type": "stat",
"datasource": "Loki",
"targets": [
{
"expr": "{job=\"grid_trading_bot\"} |= \"ERROR\"",
"refId": "A"
"expr": "{job=\"grid_trading_bot\", trading_pair=\"$trading_pair\", trading_mode=\"$trading_mode\", strategy_type=\"$strategy\"} | json | line_format \"{{.grid_size}} grids, Range: {{.grid_range}}, Spacing: {{.spacing_type}}\""
}
],
"gridPos": { "x": 4, "y": 0, "w": 8, "h": 6 }
]
},
{
"id": 3,
"title": "ROI Over Time",
"type": "timeseries",
"title": "Total Account Value",
"datasource": "Loki",
"targets": [
{
"expr": "{job=\"grid_trading_bot\"} |= \"Account value\" | json | __value__=Account value",
"refId": "A"
"expr": "{job=\"grid_trading_bot\", trading_pair=\"$trading_pair\", trading_mode=\"$trading_mode\", strategy_type=\"$strategy\"} | regexp \"ROI\\s+\\|\\s+(?P<roi>[\\-\\d\\.]+)%\" | unwrap roi"
}
],
"gridPos": { "x": 0, "y": 3, "w": 12, "h": 6 }
]
},
{
"id": 4,
"type": "piechart",
"title": "Orders by Status",
"title": "Grid Level States",
"type": "table",
"datasource": "Loki",
"targets": [
{
"expr": "{job=\"grid_trading_bot\"} |= \"Order status\" | json | order_status",
"refId": "A"
"expr": "{job=\"grid_trading_bot\", trading_pair=\"$trading_pair\", trading_mode=\"$trading_mode\", strategy_type=\"$strategy\"} | json | grid_price != \"\" and grid_state != \"\" | line_format \"{{.grid_price}} - {{.grid_state}}\""
}
],
"gridPos": { "x": 12, "y": 0, "w": 6, "h": 6 }
]
},
{
"id": 5,
"title": "Order Flow",
"type": "timeseries",
"title": "Error Count Over Time",
"datasource": "Loki",
"targets": [
{
"expr": "count_over_time({job=\"grid_trading_bot\"} |= \"ERROR\" [1m])",
"refId": "A"
"expr": "sum(count_over_time({job=\"grid_trading_bot\", trading_pair=\"$trading_pair\", trading_mode=\"$trading_mode\", strategy_type=\"$strategy\", order_side=\"BUY\"}[5m]))",
"legendFormat": "Buy Orders"
},
{
"expr": "sum(count_over_time({job=\"grid_trading_bot\", trading_pair=\"$trading_pair\", trading_mode=\"$trading_mode\", strategy_type=\"$strategy\", order_side=\"SELL\"}[5m]))",
"legendFormat": "Sell Orders"
}
],
"gridPos": { "x": 12, "y": 6, "w": 6, "h": 6 }
]
},
{
"id": 6,
"type": "logs",
"title": "Completed Orders",
"title": "Balance History",
"type": "timeseries",
"datasource": "Loki",
"targets": [
{
"expr": "{job=\"grid_trading_bot\"} |= \"Order status: FILLED\"",
"refId": "A"
"expr": "{job=\"grid_trading_bot\", trading_pair=\"$trading_pair\", trading_mode=\"$trading_mode\", strategy_type=\"$strategy\"} | regexp \"Balance: (?P<balance>[\\d\\.]+)\" | unwrap balance"
}
],
"gridPos": { "x": 0, "y": 9, "w": 12, "h": 6 }
]
},
{
"id": 7,
"type": "logs",
"title": "Open Orders",
"title": "System Health",
"type": "gauge",
"datasource": "Loki",
"targets": [
{
"expr": "{job=\"grid_trading_bot\"} |= \"Order status: OPEN\"",
"refId": "A"
"expr": "{job=\"grid_trading_bot\", trading_pair=\"$trading_pair\"} | json | unwrap cpu"
}
],
"gridPos": { "x": 12, "y": 9, "w": 12, "h": 6 }
"fieldConfig": {
"defaults": {
"thresholds": {
"steps": [
{ "value": 0, "color": "green" },
{ "value": 70, "color": "yellow" },
{ "value": 85, "color": "red" }
]
}
}
}
}
],
"templating": {
"list": []
},
"time": {
"from": "now-24h",
"to": "now"
},
"timepicker": {},
"annotations": {
"list": []
},
"variables": {
"list": []
}
"refresh": "10s",
"schemaVersion": 36
}

0 comments on commit 59cc85a

Please sign in to comment.