Skip to content

Commit

Permalink
Added canary url and stepfunction monitoring with sns email and slack…
Browse files Browse the repository at this point in the history
… integration

Signed-off-by: Brandon Shien <bshien@amazon.com>
  • Loading branch information
bshien committed May 22, 2024
1 parent 3e8f954 commit 37a387a
Show file tree
Hide file tree
Showing 11 changed files with 468 additions and 1 deletion.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ infrastructure/**/*.js
infrastructure/!jest.config.js
infrastructure/**/*.d.ts
infrastructure/node_modules
!infrastructure/canary/nodejs/node_modules/index.js

# CDK asset staging directory
infrastructure/.cdk.staging
Expand Down
1 change: 1 addition & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ dependencies {
implementation 'io.github.acm19:aws-request-signing-apache-interceptor:2.3.1'

implementation 'com.amazonaws:aws-lambda-java-core:1.2.3'
implementation 'com.amazonaws:aws-lambda-java-events:3.7.0'

implementation 'com.google.code.gson:gson:2.10.1'

Expand Down
102 changes: 102 additions & 0 deletions infrastructure/canary/nodejs/node_modules/index.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

116 changes: 116 additions & 0 deletions infrastructure/lib/constructs/snsMonitor.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import { Construct } from 'constructs';
import { Alarm } from 'aws-cdk-lib/aws-cloudwatch';
import * as cloudwatch from "aws-cdk-lib/aws-cloudwatch";
import * as sns from "aws-cdk-lib/aws-sns";
import * as subscriptions from "aws-cdk-lib/aws-sns-subscriptions";
import * as actions from "aws-cdk-lib/aws-cloudwatch-actions";
import { Canary } from 'aws-cdk-lib/aws-synthetics';
import {OpenSearchLambda} from "./lambda";

interface SnsMonitorsProps {
readonly region: string;
readonly accountId: string;
readonly stepFunctionSnsAlarms?: Array<{ alertName: string, stateMachineName: string }>;
readonly canaryAlarms?: Array<{ alertName: string, canary: Canary }>;
readonly alarmNameSpace: string;
readonly snsTopic: string;
readonly slackLambda: OpenSearchLambda;
}

export class SnsMonitors extends Construct {
private readonly region: string;
private readonly accountId: string;
private readonly stepFunctionSnsAlarms?: Array<{ alertName: string, stateMachineName: string }>;
private readonly canaryAlarms?: Array<{ alertName: string, canary: Canary }>;
private readonly alarmNameSpace: string;
private readonly snsTopic: string;
private readonly slackLambda: OpenSearchLambda;


constructor(scope: Construct, id: string, props: SnsMonitorsProps) {
super(scope, id);
this.region = props.region;
this.accountId = props.accountId;
this.stepFunctionSnsAlarms = props.stepFunctionSnsAlarms;
this.canaryAlarms = props.canaryAlarms;
this.alarmNameSpace = props.alarmNameSpace;
this.snsTopic = props.snsTopic;
this.slackLambda = props.slackLambda;

// The email_list for receiving alerts
let emailList: Array<string> = [
''
];

// Create alarms
const map: { [id: string]: any } = {};

if(this.stepFunctionSnsAlarms){
this.stepFunctionSnsAlarms.forEach(({ alertName, stateMachineName }) => {
const alarm = this.stepFunctionExecutionsFailed(alertName, stateMachineName);
map[alarm[1]] = alarm[0];
});
}

if(this.canaryAlarms){
this.canaryAlarms.forEach(({ alertName, canary }) => {
const alarm = this.canaryFailed(alertName, canary);
map[alarm[1]] = alarm[0];
});
}

// Create SNS topic for alarms to be sent to
const sns_topic = new sns.Topic(this, `OpenSearchMetrics-Alarm-${this.snsTopic}`, {
displayName: `OpenSearchMetrics-Alarm-${this.snsTopic}`
});

// Iterate map to create SNS topic and add alarms on it
Object.keys(map).map(key => {
// Connect the alarm to the SNS
map[key].addAlarmAction(new actions.SnsAction(sns_topic));
})

// Send email notification to the recipients
for (const email of emailList) {
sns_topic.addSubscription(new subscriptions.EmailSubscription(email));
}

// Send slack notification
sns_topic.addSubscription(new subscriptions.LambdaSubscription(this.slackLambda.lambda));
}

private stepFunctionExecutionsFailed(alertName: string, stateMachineName: string): [Alarm, string] {
const alarmObject = new cloudwatch.Alarm(this, `error_alarm_${alertName}`, {
metric: new cloudwatch.Metric({
namespace: this.alarmNameSpace,
metricName: "ExecutionsFailed",
statistic: "Sum",
dimensionsMap: {
StateMachineArn: `arn:aws:states:${this.region}:${this.accountId}:stateMachine:${stateMachineName}`
}
}),
threshold: 1,
evaluationPeriods: 1,
comparisonOperator: cloudwatch.ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
datapointsToAlarm: 1,
treatMissingData: cloudwatch.TreatMissingData.NOT_BREACHING,
alarmDescription: "Detect SF execution failure",
alarmName: alertName,
});
return [alarmObject, alertName];
}

private canaryFailed(alertName: string, canary: Canary): [Alarm, string] {
const alarmObject = new cloudwatch.Alarm(this, `error_alarm_${alertName}`, {
metric: canary.metricSuccessPercent(),
threshold: 100,
evaluationPeriods: 1,
comparisonOperator: cloudwatch.ComparisonOperator.LESS_THAN_THRESHOLD,
datapointsToAlarm: 1,
treatMissingData: cloudwatch.TreatMissingData.NOT_BREACHING,
alarmDescription: "Detect Canary failure",
alarmName: alertName,
});
return [alarmObject, alertName];
}
}
11 changes: 10 additions & 1 deletion infrastructure/lib/infrastructure-stack.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import {OpenSearchMetricsNginxReadonly} from "./stacks/opensearchNginxProxyReado
import {ArnPrincipal} from "aws-cdk-lib/aws-iam";
import {OpenSearchWAF} from "./stacks/waf";
import {OpenSearchMetricsNginxCognito} from "./constructs/opensearchNginxProxyCognito";
import {OpenSearchMetricsMonitoringStack} from "./stacks/monitoringDashboard";

// import * as sqs from 'aws-cdk-lib/aws-sqs';
export class InfrastructureStack extends Stack {
Expand All @@ -34,12 +35,20 @@ export class InfrastructureStack extends Stack {
}
});


// Create OpenSearch Metrics Lambda setup
const openSearchMetricsWorkflowStack = new OpenSearchMetricsWorkflowStack(app, 'OpenSearchMetrics-Workflow', {
opensearchDomainStack: openSearchDomainStack, vpcStack: vpcStack, lambdaPackage: Project.LAMBDA_PACKAGE})
openSearchMetricsWorkflowStack.node.addDependency(vpcStack, openSearchDomainStack);

// Create Monitoring Dashboard

const openSearchMetricsMonitoringStack = new OpenSearchMetricsMonitoringStack(app, "OpenSearchMetrics-Monitoring", {
region: Project.REGION,
account: Project.AWS_ACCOUNT,
workflowComponent: openSearchMetricsWorkflowStack.workflowComponent,
lambdaPackage: Project.LAMBDA_PACKAGE
})
openSearchMetricsMonitoringStack.node.addDependency(openSearchMetricsWorkflowStack);

// Create OpenSearch Metrics Frontend DNS
const metricsHostedZone = new OpenSearchHealthRoute53(app, "OpenSearchMetrics-HostedZone", {
Expand Down
9 changes: 9 additions & 0 deletions infrastructure/lib/stacks/metricsWorkflow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,12 @@ export interface OpenSearchMetricsStackProps extends StackProps {
readonly vpcStack: VpcStack;
readonly lambdaPackage: string
}

export interface WorkflowComponent {
opensearchMetricsWorkflowStateMachineName: string
}
export class OpenSearchMetricsWorkflowStack extends Stack {
public readonly workflowComponent: WorkflowComponent;
constructor(scope: Construct, id: string, props: OpenSearchMetricsStackProps) {
super(scope, id, props);

Expand All @@ -39,6 +44,10 @@ export class OpenSearchMetricsWorkflowStack extends Stack {
schedule: Schedule.expression('cron(0 7 * * ? *)'),
targets: [new SfnStateMachine(opensearchMetricsWorkflow)],
});

this.workflowComponent = {
opensearchMetricsWorkflowStateMachineName: opensearchMetricsWorkflow.stateMachineName
}
}

private createMetricsTask(scope: Construct, opensearchDomainStack: OpenSearchDomainStack,
Expand Down
91 changes: 91 additions & 0 deletions infrastructure/lib/stacks/monitoringDashboard.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import {Duration, Stack, StackProps} from "aws-cdk-lib";
import { Construct } from 'constructs';
import { WorkflowComponent } from "./metricsWorkflow";
import { SnsMonitors } from "../constructs/snsMonitor";
import {OpenSearchLambda} from "../constructs/lambda";
import * as synthetics from "aws-cdk-lib/aws-synthetics";
import * as path from "path";
import * as secretsmanager from 'aws-cdk-lib/aws-secretsmanager';
import Project from "../enums/project";

interface OpenSearchMetricsMonitoringStackProps extends StackProps {
readonly region: string;
readonly account: string;
readonly workflowComponent: WorkflowComponent;
readonly lambdaPackage: string;
}

export class OpenSearchMetricsMonitoringStack extends Stack {

private readonly slackLambda: OpenSearchLambda;

constructor(scope: Construct, id: string, readonly props: OpenSearchMetricsMonitoringStackProps) {
super(scope, id, props);

const secretsName = 'slack-creds';
const slackCredsSecrets = new secretsmanager.Secret(this, 'SlackApiCreds', {
secretName: secretsName,
});

this.slackLambda = new OpenSearchLambda(this, "OpenSearchMetricsSlackLambdaFunction", {
lambdaNameBase: "OpenSearchMetricsDashboardsSlackLambda",
handler: "org.opensearchmetrics.lambda.SlackLambda",
lambdaZipPath: `../../../build/distributions/${props.lambdaPackage}`,
environment: {
SLACK_CREDENTIALS_SECRETS: secretsName,
SECRETS_MANAGER_REGION: slackCredsSecrets.env.region
}
});
this.snsMonitorStepFunctionExecutionsFailed();
this.snsMonitorCanaryFailed('metrics_heartbeat', `https://${Project.METRICS_HOSTED_ZONE}`);
}

/**
* Create SNS alarms for failure StepFunction jobs.
*/
private snsMonitorStepFunctionExecutionsFailed(): void {
const stepFunctionSnsAlarms = [
{ alertName: 'StepFunction_execution_errors_MetricsWorkflow', stateMachineName: this.props.workflowComponent.opensearchMetricsWorkflowStateMachineName },
];

new SnsMonitors(this, "SnsMonitors-StepFunctionExecutionsFailed", {
region: this.props.region,
accountId: this.props.account,
stepFunctionSnsAlarms: stepFunctionSnsAlarms,
alarmNameSpace: "AWS/States",
snsTopic: "StepFunctionExecutionsFailed",
slackLambda: this.slackLambda
});
}

/**
* Create SNS alarms for failure Canaries.
*/
private snsMonitorCanaryFailed(canaryName: string, canaryUrl: string): void {
const canary = new synthetics.Canary(this, 'CanaryHeartbeatMonitor', {
canaryName: canaryName,
schedule: synthetics.Schedule.rate(Duration.minutes(1)),
test: synthetics.Test.custom({
code: synthetics.Code.fromAsset(path.join(__dirname, '../../canary')),
handler: 'index.handler',
}),
runtime: synthetics.Runtime.SYNTHETICS_NODEJS_PUPPETEER_6_2,
environmentVariables: {
SITE_URL: canaryUrl
}
});

const canaryAlarms = [
{ alertName: 'Canary_failed_MetricsWorkflow', canary: canary },
];

new SnsMonitors(this, "SnsMonitors-CanaryFailed", {
region: this.props.region,
accountId: this.props.account,
canaryAlarms: canaryAlarms,
alarmNameSpace: "CloudWatchSynthetics",
snsTopic: "CanaryFailed",
slackLambda: this.slackLambda
});
}
}
Loading

0 comments on commit 37a387a

Please sign in to comment.