Skip to content

Commit

Permalink
feat: Export Trino metrics to Prometheus and enable autoscalling with…
Browse files Browse the repository at this point in the history
… KEDA (#712)

Co-authored-by: Ratnopam Charabarti <ratnopamc@yahoo.com>
  • Loading branch information
bbgu1 and ratnopamc authored Dec 23, 2024
1 parent f8dda1a commit eaec192
Show file tree
Hide file tree
Showing 7 changed files with 184 additions and 6 deletions.
11 changes: 11 additions & 0 deletions distributed-databases/trino/addons.tf
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,17 @@ module "eks_blueprints_addons" {
],
}

helm_releases = {
keda = {
chart = "keda"
chart_version = "2.16.0"
repository = "https://kedacore.github.io/charts"
description = "Keda helm Chart deployment"
namespace = "keda"
create_namespace = true
}
}

tags = local.tags
}

Expand Down
6 changes: 3 additions & 3 deletions distributed-databases/trino/examples/hive-setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ export GLUE_DB_NAME=taxi_hive_database
export CRAWLER_NAME=taxi-data-crawler
echo "The name of your bucket is: ${BUCKET}"

echo "Now copying the 2022 NY Taxi data into the S3 bucket..."
echo "Now copying sample data into the S3 bucket..."

## Copy the 2022 NY Taxi data into the S3 bucket
aws s3 cp "s3://nyc-tlc/trip data/" s3://$BUCKET/hive/ --exclude "*" --include "yellow_tripdata_2022*" --recursive
## Copy sample data into the S3 bucket. Later on, we'd replace sample data with a more robust dataset.
aws s3 cp "s3://aws-data-analytics-workshops/shared_datasets/tripdata/" s3://$BUCKET/hive/ --recursive

sleep 2
echo "Now we create the Glue Database..."
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
prometheus:
serviceAccount:
create: true
name: ${amp_sa}
annotations:
eks.amazonaws.com/role-arn: ${amp_irsa}
prometheusSpec:
remoteWrite:
- url: ${amp_remotewrite_url}
sigv4:
region: ${region}
queue_config:
max_samples_per_send: 1000
max_shards: 200
capacity: 2500
retention: 5h
scrapeInterval: 30s
evaluationInterval: 30s
scrapeTimeout: 10s
storageSpec:
volumeClaimTemplate:
metadata:
name: data
spec:
storageClassName: gp2
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 50Gi
serviceMonitorSelectorNilUsesHelmValues: false
alertmanager:
enabled: false

grafana:
enabled: true
defaultDashboardsEnabled: true
# Adding AMP datasource to Grafana config
serviceAccount:
create: false
name: ${amp_sa}
grafana.ini:
auth:
sigv4_auth_enabled: true
additionalDataSources:
- name: AMP
editable: true
jsonData:
sigV4Auth: true
sigV4Region: ${region}
type: prometheus
isDefault: false
url: ${amp_url}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ prometheus:
resources:
requests:
storage: 50Gi
serviceMonitorSelectorNilUsesHelmValues: false
alertmanager:
enabled: false

Expand Down
55 changes: 53 additions & 2 deletions distributed-databases/trino/helm-values/trino.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,17 @@
# └── Total: 80.6GB < 89GB ✓
---
image:
tag: "427"
repository: trinodb/trino
tag: 447
pullPolicy: IfNotPresent
server:
workers: 3
exchangeManager:
name: filesystem
baseDir: "s3://${exchange_bucket_id}"
autoscaling:
enabled: true
enabled: false
minReplicas: 1
maxReplicas: 20
targetCPUUtilizationPercentage: 75
targetMemoryUtilizationPercentage: 80
Expand Down Expand Up @@ -146,3 +150,50 @@ serviceAccount:
name: ${sa}
ingress:
enabled: false
jmx:
enabled: true
registryPort: 9080
serverPort: 9081
exporter:
# jmx.exporter.enabled -- Set to true to export JMX Metrics via HTTP for [Prometheus](https://github.com/prometheus/jmx_exporter) consumption
enabled: true
image: bitnami/jmx-exporter:latest
pullPolicy: Always
port: 5556
configProperties: |-
hostPort: localhost:{{- .Values.jmx.registryPort }}
startDelaySeconds: 0
ssl: false
lowercaseOutputName: false
lowercaseOutputLabelNames: false
whitelistObjectNames: ["trino.execution:name=QueryManager","trino.execution:name=SqlTaskManager","trino.execution.executor:name=TaskExecutor","trino.memory:name=ClusterMemoryManager","java.lang:type=Runtime","trino.memory:type=ClusterMemoryPool,name=general","java.lang:type=Memory","trino.memory:type=MemoryPool,name=general"]
autoExcludeObjectNameAttributes: true
excludeObjectNameAttributes:
"java.lang:type=OperatingSystem":
- "ObjectName"
"java.lang:type=Runtime":
- "ClassPath"
- "SystemProperties"
rules:
- pattern: ".*"
resources:
limits:
cpu: 200m
memory: 512Mi
requests:
cpu: 200m
memory: 512Mi

serviceMonitor:
enabled: true
labels:
prometheus: kube-prometheus
interval: "30s"
coordinator:
enabled: true
labels:
prometheus: kube-prometheus
worker:
enabled: true
labels:
prometheus: kube-prometheus
42 changes: 42 additions & 0 deletions distributed-databases/trino/trino-keda.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
apiVersion: keda.sh/v1alpha1
kind: ScaledObject
metadata:
name: keda-scaler-trino-worker
namespace: ${trino_namespace}
spec:
scaleTargetRef:
name: trino-worker
minReplicaCount: 3
maxReplicaCount: 15
pollingInterval: 30 # Seconds
cooldownPeriod: 600 # Seconds
fallback:
failureThreshold: 3
replicas: 6
advanced:
horizontalPodAutoscalerConfig:
behavior:
scaleDown:
stabilizationWindowSeconds: 600
policies:
- type: Pods
value: 1
periodSeconds: 300
scaleUp:
stabilizationWindowSeconds: 0
policies:
- type: Pods
value: 1
periodSeconds: 120
triggers:
- type: cpu
metricType: Utilization
metadata:
value: '80' # Target CPU utilization percentage
- type: prometheus
metricType: Value
metadata:
serverAddress: http://kube-prometheus-stack-prometheus.kube-prometheus-stack.svc.cluster.local:9090
threshold: '1'
metricName: queued_queries
query: sum by (job) (avg_over_time(trino_execution_QueryManager_QueuedQueries{job="trino"}[1m]))
22 changes: 21 additions & 1 deletion distributed-databases/trino/trino.tf
Original file line number Diff line number Diff line change
Expand Up @@ -149,11 +149,15 @@ resource "aws_iam_policy" "trino_exchange_bucket_policy" {
# Trino Helm Add-on
#---------------------------------------
module "trino_addon" {
depends_on = [
module.eks_blueprints_addons,
]

source = "aws-ia/eks-blueprints-addon/aws"
version = "~> 1.1.1" #ensure to update this to the latest/desired version

chart = "trino"
chart_version = "0.33.0"
chart_version = "0.34.0"
repository = "https://trinodb.github.io/charts"
description = "Trino Helm Chart deployment"
namespace = local.trino_namespace
Expand Down Expand Up @@ -190,3 +194,19 @@ module "trino_addon" {
}
}
}


#---------------------------------------------------------------
# KEDA ScaleObject - Trino Prometheus
#---------------------------------------------------------------
resource "kubectl_manifest" "trino_keda" {

yaml_body = templatefile("${path.module}/trino-keda.yaml", {
trino_namespace = local.trino_namespace
})

depends_on = [
module.eks_blueprints_addons,
module.trino_addon
]
}

0 comments on commit eaec192

Please sign in to comment.