Skip to content

Commit

Permalink
refactor amw, dce, dcr, and aks-metrics-enable
Browse files Browse the repository at this point in the history
- move metrics modules from ADO -> ARO-HCP
- add default node/k8s recording rule group for all clusters with the
  Azure Monitoring Workspace
- use bicep to deploy data collection endpoints and data collection
  rules
- enable metrics within aks-cluster-base.bicep
- remove unused make targets and pipeline steps
  • Loading branch information
tony-schndr committed Jan 10, 2025
1 parent e6425f1 commit 2f64286
Show file tree
Hide file tree
Showing 13 changed files with 382 additions and 48 deletions.
16 changes: 2 additions & 14 deletions dev-infrastructure/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ svc.kv.permission:
@scripts/kv-permissions.sh $(PRINCIPAL_ID) $(SVC_KV_RESOURCEGROUP) $(SVC_KV_NAME)
.PHONY: svc.kv.permission

svc.init: region svc svc.aks.admin-access svc.aks.kubeconfig svc.istio metrics-infra svc.enable-aks-metrics svc.oidc.storage.permissions
svc.init: region metrics-infra svc svc.aks.admin-access svc.aks.kubeconfig svc.istio svc.oidc.storage.permissions
.PHONY: svc.init

svc.what-if: svc.rg
Expand Down Expand Up @@ -340,18 +340,6 @@ mgmt: mgmt.wait mgmt.rg
persist=${PERSIST}
.PHONY: mgmt

mgmt.enable-aks-metrics:
@if [ "$$(az aks show --resource-group $(MGMT_RESOURCEGROUP) --name ${AKS_NAME} --query 'azureMonitorProfile.metrics.enabled' -o tsv)" = "true" ]; then \
echo "Azure Monitor metrics are already enabled."; \
else \
az aks update --enable-azure-monitor-metrics \
--resource-group $(MGMT_RESOURCEGROUP) \
--name ${AKS_NAME} \
--azure-monitor-workspace-resource-id $$(az deployment group show --resource-group $(REGIONAL_RESOURCEGROUP) --name metrics-infra --output tsv --query properties.outputs.monitorId.value) \
--grafana-resource-id $$(az deployment group show --resource-group $(REGIONAL_RESOURCEGROUP) --name metrics-infra --output tsv --query properties.outputs.grafanaId.value); \
fi
.PHONY: mgmt.enable-aks-metrics

mgmt.aks.admin-access:
@scripts/aks-admin-access.sh $(MGMT_RESOURCEGROUP) $(PRINCIPAL_ID)
.PHONY: mgmt.aks.admin-access
Expand All @@ -365,7 +353,7 @@ mgmt.aks.kubeconfigfile:
@echo ${MGMT_KUBECONFIG_FILE}
.PHONY: mgmt.aks.kubeconfigfile

mgmt.init: region mgmt mgmt.aks.admin-access mgmt.aks.kubeconfig metrics-infra mgmt.enable-aks-metrics
mgmt.init: region metrics-infra mgmt mgmt.aks.admin-access mgmt.aks.kubeconfig
.PHONY: mgmt.init

mgmt.what-if: mgmt.rg
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,6 @@ param mgmtKeyVaultName = '{{ .mgmtKeyVault.name }}'

// MI for deployment scripts
param aroDevopsMsiId = '{{ .aroDevopsMsiId }}'

// Azure Monitor Workspace
param azureMonitorWorkspaceName = '{{ .monitoring.workspaceName }}'
3 changes: 3 additions & 0 deletions dev-infrastructure/configurations/svc-cluster.tmpl.bicepparam
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,6 @@ param regionalDNSZoneName = '{{ .regionalDNSSubdomain}}.{{ .baseDnsZoneName }}'
param regionalResourceGroup = '{{ .regionRG }}'

param frontendIngressCertName = '{{ .frontend.cert.name }}'

// Azure Monitor Workspace
param azureMonitorWorkspaceName = '{{ .monitoring.workspaceName }}'
16 changes: 0 additions & 16 deletions dev-infrastructure/mgmt-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,3 @@ resourceGroups:
deploymentLevel: ResourceGroup
dependsOn:
- mgmt-infra
- name: enable-metrics
action: Shell
command: scripts/enable-aks-metrics.sh
variables:
- name: RESOURCEGROUP
configRef: mgmt.rg
- name: AKS_NAME
configRef: aksName
- name: GRAFANA_RESOURCEGROUP
configRef: regionRG
- name: MONITORING_WORKSPACE_NAME
configRef: monitoring.workspaceName
- name: GRAFANA_NAME
configRef: monitoring.grafanaName
dependsOn:
- mgmt-cluster
24 changes: 24 additions & 0 deletions dev-infrastructure/modules/aks-cluster-base.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ param aksClusterName string
param aksNodeResourceGroupName string
param aksEtcdKVEnableSoftDelete bool

// Metrics
param dcrId string
param metricLabelsAllowlist string = ''
param metricAnnotationsAllowList string = ''

// System agentpool spec(Infra)
param systemAgentMinCount int = 2
param systemAgentMaxCount int = 3
Expand Down Expand Up @@ -302,6 +307,15 @@ resource aksCluster 'Microsoft.ContainerService/managedClusters@2024-04-02-previ
nodeOSUpgradeChannel: 'NodeImage'
upgradeChannel: 'patch'
}
azureMonitorProfile: {
metrics: {
enabled: true
kubeStateMetrics: {
metricLabelsAllowlist: metricLabelsAllowlist
metricAnnotationsAllowList: metricAnnotationsAllowList
}
}
}
disableLocalAccounts: true
dnsPrefix: dnsPrefix
enableRBAC: true
Expand Down Expand Up @@ -517,6 +531,16 @@ resource aroDevopsMSIClusterAdmin 'Microsoft.Authorization/roleAssignments@2022-
}
}

// metrics dcr association
resource azuremonitormetrics_dcra_clusterResourceId 'Microsoft.Insights/dataCollectionRuleAssociations@2022-06-01' = {
name: '${resourceGroup().name}-${aksCluster.name}-dcra'
scope: aksCluster
properties: {
description: 'Association of data collection rule. Deleting this association will break the data collection for this AKS Cluster.'
dataCollectionRuleId: dcrId
}
}

// Outputs
output userAssignedIdentities array = [
for i in range(0, length(workloadIdentities)): {
Expand Down
60 changes: 60 additions & 0 deletions dev-infrastructure/modules/metrics/datacollection.bicep
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
param azureMonitorWorkspaceName string
param azureMonitorWorkspaceLocation string
param aksClusterName string
param regionalResourceGroup string

var dceName = take('MSProm-${azureMonitorWorkspaceLocation}-${aksClusterName}', 44)
var dcrName = take('MSProm-${azureMonitorWorkspaceLocation}-${aksClusterName}', 44)

resource amw 'microsoft.monitor/accounts@2021-06-03-preview' existing = {
name: azureMonitorWorkspaceName
scope: resourceGroup(regionalResourceGroup)
}

resource dce 'Microsoft.Insights/dataCollectionEndpoints@2022-06-01' = {
name: dceName
location: azureMonitorWorkspaceLocation
kind: 'Linux'
properties: {}
}

resource dcr 'Microsoft.Insights/dataCollectionRules@2022-06-01' = {
name: dcrName
location: azureMonitorWorkspaceLocation
kind: 'Linux'
properties: {
dataCollectionEndpointId: dce.id
dataFlows: [
{
destinations: [
'MonitoringAccount1'
]
streams: [
'Microsoft-PrometheusMetrics'
]
}
]
dataSources: {
prometheusForwarder: [
{
name: 'PrometheusDataSource'
streams: [
'Microsoft-PrometheusMetrics'
]
labelIncludeFilter: {}
}
]
}
description: 'DCR for Azure Monitor Metrics Profile (Managed Prometheus)'
destinations: {
monitoringAccounts: [
{
accountResourceId: amw.id
name: 'MonitoringAccount1'
}
]
}
}
}

output dcrId string = dcr.id
2 changes: 1 addition & 1 deletion dev-infrastructure/modules/metrics/metrics.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ module grafana 'br:arointacr.azurecr.io/grafana.bicep:metrics.20240814.1' = {
}
}

module monitor 'br:arointacr.azurecr.io/monitor.bicep:monitor.20241004.1' = {
module monitor 'monitor.bicep' = {
name: 'monitor'
params: {
globalResourceGroup: globalResourceGroup
Expand Down
58 changes: 58 additions & 0 deletions dev-infrastructure/modules/metrics/monitor.bicep
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
@description('Metrics global resource group name')
param globalResourceGroup string

@description('Metrics global MSI name')
param msiName string

@description('Metrics global Grafana name')
param grafanaName string

@description('Metrics region monitor name')
param monitorName string = 'aro-hcp-monitor'

resource monitor 'microsoft.monitor/accounts@2021-06-03-preview' = {
name: monitorName
location: resourceGroup().location
}

module defaultRuleGroups 'rules/defaultRecordingRuleGroups.bicep' ={
name: 'defaultRecordingRuleGroups'
params: {
azureMonitorWorkspaceLocation: resourceGroup().location
azureMonitorWorkspaceName: monitorName
regionalResourceGroup: resourceGroup().name
}
}
// Assign the Monitoring Data Reader role to the Azure Managed Grafana system-assigned managed identity at the workspace scope
var dataReader = 'b0d8363b-8ddd-447d-831f-62ca05bff136'

resource msi 'Microsoft.ManagedIdentity/userAssignedIdentities@2023-01-31' existing = {
name: msiName
scope: resourceGroup(globalResourceGroup)
}

resource grafana 'Microsoft.Dashboard/grafana@2023-09-01' existing = {
name: grafanaName
scope: resourceGroup(globalResourceGroup)
}

resource roleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
name: guid(monitor.id, grafana.id, dataReader)
scope: monitor
properties: {
principalId: grafana.identity.principalId
principalType: 'ServicePrincipal'
roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', dataReader)
}
}

module prometheus 'rules/prometheusAlertingRules.bicep' = {
name: 'prometheusAlertingRules'
params: {
azureMonitoring: monitor.id
}
}

output msiId string = msi.id
output grafanaId string = grafana.id
output monitorId string = monitor.id
Loading

0 comments on commit 2f64286

Please sign in to comment.