diff --git a/kubernetes-addons/Observability/README.md b/kubernetes-addons/Observability/README.md index a5cc48cb7..7732b59ca 100644 --- a/kubernetes-addons/Observability/README.md +++ b/kubernetes-addons/Observability/README.md @@ -121,13 +121,19 @@ Here are few Grafana dashboards for monitoring different aspects of OPEA applica You can either: - Import them manually to Grafana, -- Use [`update-dashboards.sh`](./update-dashboards.sh) script to add them to Kubernetes as Grafana dashboard configMaps - - (Script assumes Prometheus / Grafana to be installed according to above instructions) +- Use [`update-dashboards.sh`](./update-dashboards.sh) script to add them to Kubernetes as (more persistent) Grafana dashboard `configMap`s + - Script uses `$USER-` as dashboard `configMap` names, overwrites any pre-existing `configMap` with the same name, and _restarts Grafana_ so that it notices `configMap` updates - Or create your own dashboards based on them -Note: when dashboard is imported to Grafana, you can directly save changes to it, but those dashboards go away if Grafana is removed / re-installed. +When dashboard is imported to Grafana, you can directly save changes to it, but such dashboards go away if Grafana is removed / re-installed. When dashboard is in `configMap`, Grafana saves its changes to a (selected) file, but you need to re-apply those files to Kubernetes with the script, for your changes to be there when that Grafana dashboard page is reloaded in browser. -Whereas with dashboard configMaps, Grafana saves changes to a selected file, but you need to remember to re-apply them to Kubernetes / Grafana, for your changes to be there when that dashboard is reloaded. +Gotchas for dashboard `configMap` script usage: + +- If you change dashboard file name, you need to change also its 'uid' field (at end of the file), otherwise Grafana will see multiple `configMap`s for the same dashboard ID + - If there's no `uid` specified for the dashboard, Grafana will generate one on `configMap` load. Meaning that dashboard ID, and Grafana URL to it, will change on every reload +- Script assumes Prometheus / Grafana to be installed according to above instructions. If not, list of `labels` within script need to be updated to match Prometheus / Grafana installation + +NOTE: Services provide metrics only after they have processed at least one query, before that dashboards can be empty! ![TGI dashboard](./assets/tgi.png) ![Scaling dashboard](./assets/opea-scaling.png) diff --git a/kubernetes-addons/Observability/update-dashboards.sh b/kubernetes-addons/Observability/update-dashboards.sh index 96ba89b42..848b59b17 100755 --- a/kubernetes-addons/Observability/update-dashboards.sh +++ b/kubernetes-addons/Observability/update-dashboards.sh @@ -6,12 +6,18 @@ set -e # Grafana namespace -ns=monitoring +ns="monitoring" + +# Grafana app selector +selector="app.kubernetes.io/name=grafana" + +# command for fetching Grafana pod name +grafana="kubectl -n $ns get pod --selector $selector --field-selector=status.phase=Running -o name" # Labels needed in configMap to get (Helm installed) Grafana to load it as dashboard labels="grafana_dashboard=1 release=prometheus-stack app=kube-prometheus-stack-grafana" -usage () +error_exit () { name=${0##*/} echo @@ -25,15 +31,9 @@ usage () } if [ $# -lt 1 ]; then - usage "no files specified" + error_exit "no files specified" fi -for file in "$@"; do - if [ ! -f "$file" ]; then - usage "JSON file '$file' does not exist" - fi -done - if [ -z "$(which jq)" ]; then echo "ERROR: 'jq' required for dashboard checks, please install it first!" exit 1 @@ -41,7 +41,19 @@ fi echo "Creating/updating following Grafana dashboards to '$ns' namespace:" for file in "$@"; do - echo "- $file ($(jq .uid "$file" | tail -1)): $(jq .title "$file" | tail -1)" + if [ ! -f "$file" ]; then + error_exit "JSON file '$file' does not exist" + fi + # Dashboard 'uid' is optional, but it should have a title... + uid=$(jq .uid "$file" | tail -1) + if [ -z "$uid" ]; then + error_exit "'$file' dashboard has invalid JSON" + fi + title=$(jq .title "$file" | tail -1) + if [ "$title" = "null" ]; then + error_exit "'$file' dashboard has no 'title' field" + fi + echo "- $file (uid: $uid): $title" done # use tmp file so user can check what's wrong when there are errors @@ -60,11 +72,25 @@ cleanup () } trap cleanup EXIT +pod=$($grafana) +if [ -z "$pod" ]; then + echo "ERROR: Grafana missing from '$ns' namespace!" + exit +fi + echo for file in "$@"; do base=${file##*/} name=${base%.json} - name="$USER-$name" + # if no user prefix, add one + if [ "${name#"$USER"}" = "$name" ]; then + name="$USER-$name" + fi + # convert to k8s object name ("[a-z0-9][-a-z0-9]*[a-z0-9]"): + # - upper-case -> lowercase, '_' -> '-' + # - drop anything outside [-a-z] + # - drop '-' prefix & suffix and successive '-' chars + name=$(echo "$name" | tr A-Z_ a-z- | tr -d -c a-z- | sed -e 's/^-*//' -e 's/-*$//' -e 's/--*/-/g') echo "*** $ns/$name: $(jq .title "$file" | tail -1) ***" set -x # shellcheck disable=SC2086 @@ -77,4 +103,22 @@ done rm $tmp +echo +echo "Restarting Grafana so that it notices updated dashboards..." +pod=$($grafana) +echo "kubectl -n $ns delete $pod" +kubectl -n "$ns" delete "$pod" + +echo +echo "Waiting until new Grafana instance is running..." +while true; do + sleep 2 + pod=$($grafana) + if [ -n "$pod" ]; then + break + fi +done +echo "kubectl -n $ns wait $pod --for=condition=Ready" +kubectl -n "$ns" wait "$pod" --for=condition=Ready + echo "DONE!"