Skip to content

Commit 2d17c1e

Browse files
authored
add-vpc and remove-vpc use Events to create/destroy per-ENI mirroring (#42)
* add-vpc uses events to create Traffic Mirroring Sessions Signed-off-by: Chris Helma <chelma+github@amazon.com> * remove-vpc uses events to destroy Traffic Mirroring Sessions * The CreateEniMirroring lambda now emits outcome metrics * Given the move towards an event-based architecture, we needed a way to more easily keep track of what is happening in our system. I added CloudWatch Metrics to the lambda to indicate when each possible outcome occurred. Signed-off-by: Chris Helma <chelma+github@amazon.com> * DestroyEniMirror Lambda now emits outcome metrics * Like the CreateEniMirror Lambda, the Destroy Lambda emits metrics to CloudWatch on each possible outcome to help evaluate what's happening in the system. Signed-off-by: Chris Helma <chelma+github@amazon.com> --------- Signed-off-by: Chris Helma <chelma+github@amazon.com>
1 parent 5221569 commit 2d17c1e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+1737
-468
lines changed

cdk-lib/capture-stacks/capture-nodes-stack.ts

+43-3
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,20 @@ import * as cdk from 'aws-cdk-lib';
22
import * as autoscaling from 'aws-cdk-lib/aws-autoscaling';
33
import * as ec2 from 'aws-cdk-lib/aws-ec2';
44
import * as ecs from 'aws-cdk-lib/aws-ecs';
5+
import * as events from 'aws-cdk-lib/aws-events';
6+
import * as targets from 'aws-cdk-lib/aws-events-targets';
57
import * as elbv2 from 'aws-cdk-lib/aws-elasticloadbalancingv2';
68
import * as iam from 'aws-cdk-lib/aws-iam';
79
import * as kms from 'aws-cdk-lib/aws-kms';
10+
import * as logs from 'aws-cdk-lib/aws-logs';
811
import * as opensearch from 'aws-cdk-lib/aws-opensearchservice';
912
import * as s3 from 'aws-cdk-lib/aws-s3';
1013
import * as secretsmanager from 'aws-cdk-lib/aws-secretsmanager';
1114
import * as ssm from 'aws-cdk-lib/aws-ssm';
1215
import * as path from 'path'
1316
import { Construct } from 'constructs';
1417

18+
import * as constants from '../core/constants'
1519
import {ClusterSsmValue} from '../core/ssm-wrangling'
1620

1721
export interface CaptureNodesStackProps extends cdk.StackProps {
@@ -204,15 +208,51 @@ export class CaptureNodesStack extends cdk.Stack {
204208
allowedPrincipals: [`arn:aws:iam::${this.account}:root`],
205209
});
206210

207-
// This SSM parameter will enable us share the details of our Capture setup.
208-
const clusterParamValue: ClusterSsmValue = {clusterName: props.clusterName, vpceServiceId: gwlbEndpointService.ref}
211+
/**
212+
* Set up shared resources for event-based management of mirroring.
213+
*/
214+
const clusterBus = new events.EventBus(this, 'ClusterBus', {})
215+
216+
// Store a copy of the Arkime events that occur for later replay
217+
clusterBus.archive('Archive', {
218+
archiveName: `Arkime-${props.clusterName}`,
219+
description: `Archive of Arkime events for Cluster ${props.clusterName}`,
220+
eventPattern: {
221+
source: [constants.EVENT_SOURCE],
222+
},
223+
retention: cdk.Duration.days(365), // Arbitrarily chosen
224+
});
225+
226+
// Make a human-readable log of the Arkime events that occur on the bus
227+
const clusterLogGroup = new logs.LogGroup(this, 'LogGroup', {
228+
logGroupName: `Arkime-${props.clusterName}`,
229+
removalPolicy: cdk.RemovalPolicy.DESTROY // The archive contains the real events
230+
});
231+
const logClusterEventsRule = new events.Rule(this, 'RuleLogClusterEvents', {
232+
eventBus: clusterBus,
233+
eventPattern: {
234+
source: [constants.EVENT_SOURCE],
235+
},
236+
targets: [new targets.CloudWatchLogGroup(clusterLogGroup)]
237+
});
238+
239+
/**
240+
* This SSM parameter will enable us share the details of our Capture setup.
241+
*/
242+
const clusterParamValue: ClusterSsmValue = {
243+
busArn: clusterBus.eventBusArn,
244+
busName: clusterBus.eventBusName,
245+
clusterName: props.clusterName,
246+
vpceServiceId: gwlbEndpointService.ref
247+
}
209248
const clusterParam = new ssm.StringParameter(this, 'ClusterParam', {
210249
allowedPattern: '.*',
211250
description: 'The Cluster\'s details',
212251
parameterName: props.ssmParamNameCluster,
213252
stringValue: JSON.stringify(clusterParamValue),
214253
tier: ssm.ParameterTier.STANDARD,
215254
});
216-
clusterParam.node.addDependency(gwlbEndpointService);
255+
clusterParam.node.addDependency(gwlbEndpointService);
256+
clusterParam.node.addDependency(clusterBus);
217257
}
218258
}

cdk-lib/cloud-demo.ts

+1
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ switch(params.type) {
7272
break;
7373
case 'MirrorMgmtParams':
7474
new VpcMirrorStack(app, params.nameVpcMirrorStack, {
75+
eventBusArn: params.arnEventBus,
7576
subnetIds: params.listSubnetIds,
7677
subnetSsmParamNames: params.listSubnetSsmParams,
7778
vpcId: params.idVpc,

cdk-lib/core/command-params.ts

+2
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ export interface ClusterMgmtParamsRaw extends CommandParamsRaw {
2929
*/
3030
export interface MirrorMgmtParamsRaw extends CommandParamsRaw {
3131
type: 'MirrorMgmtParamsRaw';
32+
arnEventBus: string;
3233
nameVpcMirrorStack: string;
3334
nameVpcSsmParam: string;
3435
idVni: string;
@@ -88,6 +89,7 @@ export interface ClusterMgmtParams extends CommandParams {
8889
*/
8990
export interface MirrorMgmtParams extends CommandParams {
9091
type: 'MirrorMgmtParams';
92+
arnEventBus: string;
9193
nameVpcMirrorStack: string;
9294
nameVpcSsmParam: string;
9395
idVni: string;

cdk-lib/core/constants.ts

+3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
export const CDK_CONTEXT_CMD_VAR: string = 'ARKIME_CMD'
55
export const CDK_CONTEXT_REGION_VAR: string = 'ARKIME_REGION'
66
export const CDK_CONTEXT_PARAMS_VAR: string = 'ARKIME_PARAMS'
7+
export const EVENT_SOURCE: string = "arkime";
8+
export const EVENT_DETAIL_TYPE_CREATE_ENI_MIRROR: string = "CreateEniMirror";
9+
export const EVENT_DETAIL_TYPE_DESTROY_ENI_MIRROR: string = "DestroyEniMirror";
710

811
/**
912
* These map directly to the specific commands executed by the user via the management CLI. Since the strings are

cdk-lib/core/context-wrangling.ts

+1
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ function validateArgs(args: ValidateArgs) : (prms.ClusterMgmtParams | prms.Deplo
110110
const rawMirrorMgmtParamsObj: prms.MirrorMgmtParamsRaw = JSON.parse(args.cmdParamsRaw)
111111
const mirrorMgmtParams: prms.MirrorMgmtParams = {
112112
type: 'MirrorMgmtParams',
113+
arnEventBus: rawMirrorMgmtParamsObj.arnEventBus,
113114
awsAccount: args.awsAccount,
114115
awsRegion: args.awsRegion,
115116
nameVpcMirrorStack: rawMirrorMgmtParamsObj.nameVpcMirrorStack,

cdk-lib/core/ssm-wrangling.ts

+2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
*/
66

77
export interface ClusterSsmValue {
8+
readonly busArn: string;
9+
readonly busName: string;
810
readonly clusterName: string;
911
readonly vpceServiceId: string;
1012
}

cdk-lib/mirror-stacks/vpc-mirror-stack.ts

+151-2
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,20 @@
11
import assert = require('assert');
22

33
import { Construct } from 'constructs';
4-
import { Stack, StackProps } from 'aws-cdk-lib';
4+
import { Duration, Stack, StackProps } from 'aws-cdk-lib';
55
import * as ec2 from 'aws-cdk-lib/aws-ec2';
6+
import * as events from 'aws-cdk-lib/aws-events';
7+
import * as targets from 'aws-cdk-lib/aws-events-targets';
8+
import * as iam from 'aws-cdk-lib/aws-iam';
9+
import * as lambda from 'aws-cdk-lib/aws-lambda';
610
import * as ssm from 'aws-cdk-lib/aws-ssm';
11+
import * as path from 'path'
712

813
import {SubnetSsmValue, VpcSsmValue} from '../core/ssm-wrangling'
14+
import * as constants from '../core/constants'
915

1016
export interface VpcMirrorStackProps extends StackProps {
17+
readonly eventBusArn: string;
1118
readonly subnetIds: string[];
1219
readonly subnetSsmParamNames: string[];
1320
readonly vpcId: string;
@@ -68,7 +75,7 @@ export class VpcMirrorStack extends Stack {
6875
// See: https://docs.aws.amazon.com/vpc/latest/mirroring/tm-example-non-vpc.html
6976
const filter = new ec2.CfnTrafficMirrorFilter(this, `Filter`, {
7077
description: 'Mirror non-local VPC traffic',
71-
tags: [{key: "Name", value: props.vpcId}]
78+
tags: [{key: 'Name', value: props.vpcId}]
7279
});
7380
new ec2.CfnTrafficMirrorFilterRule(this, `FRule-RejectLocalOutbound`, {
7481
destinationCidrBlock: '10.0.0.0/16', // TODO: Need to figure this out instead of hardcode
@@ -118,5 +125,147 @@ export class VpcMirrorStack extends Stack {
118125
tier: ssm.ParameterTier.STANDARD,
119126
});
120127
vpcParam.node.addDependency(filter);
128+
129+
/**
130+
* Configure the resources required for event-based mirroring configuration
131+
*/
132+
// Get a handle to the cluster event bus
133+
const clusterBus = events.EventBus.fromEventBusArn(this, 'ClusterBus', props.eventBusArn);
134+
135+
// Archive Arkime events related to this User VPC to enable replay, with a focus on shorter-term debugging
136+
clusterBus.archive('Archive', {
137+
archiveName: `Arkime-${props.vpcId}`,
138+
description: `Archive of Arkime events for VPC ${props.vpcId}`,
139+
eventPattern: {
140+
source: [constants.EVENT_SOURCE],
141+
detail: {
142+
'vpc_id': events.Match.exactString(props.vpcId)
143+
}
144+
},
145+
retention: Duration.days(30),
146+
});
147+
148+
// Create the Lambda that will set up the traffic mirroring for ENIs in our VPC
149+
const createLambda = new lambda.Function(this, 'CreateEniMirrorLambda', {
150+
functionName: `CreateEniMirror-${props.vpcId}`,
151+
runtime: lambda.Runtime.PYTHON_3_9,
152+
code: lambda.Code.fromAsset(path.resolve(__dirname, '..', '..', 'manage_arkime')),
153+
handler: 'lambda_handlers.create_eni_mirror_handler',
154+
timeout: Duration.seconds(30), // Something has gone very wrong if this is exceeded
155+
});
156+
createLambda.addToRolePolicy(
157+
new iam.PolicyStatement({
158+
effect: iam.Effect.ALLOW,
159+
actions: [
160+
// TODO: Should scope this down.
161+
// We need ec2:CreateTrafficMirrorSession in order to set up our session, but whenever I add *just*
162+
// that, I get an UnauthorizedOperation. The docs say that's all that should be required, but the
163+
// documentation appears wrong or there's something extra mysterious going on here. Even CloudTrail
164+
// indicates the only call being made is CreateTrafficMirrorSession, but it's still failing. The
165+
// exception also doesn't indicate otherwise.
166+
// See: https://docs.aws.amazon.com/vpc/latest/mirroring/traffic-mirroring-security.html
167+
'ec2:*',
168+
],
169+
resources: [
170+
`arn:aws:ec2:${this.region}:${this.account}:*`
171+
]
172+
})
173+
);
174+
createLambda.addToRolePolicy(
175+
new iam.PolicyStatement({
176+
effect: iam.Effect.ALLOW,
177+
actions: [
178+
'ssm:GetParameter',
179+
'ssm:PutParameter',
180+
],
181+
resources: [
182+
`arn:aws:ssm:${this.region}:${this.account}:*`
183+
]
184+
})
185+
);
186+
createLambda.addToRolePolicy(
187+
new iam.PolicyStatement({
188+
effect: iam.Effect.ALLOW,
189+
actions: [
190+
'cloudwatch:PutMetricData',
191+
],
192+
resources: [
193+
"*"
194+
]
195+
})
196+
);
197+
198+
// Create a rule to funnel appropriate events to our setup lambda
199+
const createRule = new events.Rule(this, 'RuleCreateEniMirror', {
200+
eventBus: clusterBus,
201+
eventPattern: {
202+
source: [constants.EVENT_SOURCE],
203+
detailType: [constants.EVENT_DETAIL_TYPE_CREATE_ENI_MIRROR],
204+
detail: {
205+
'vpc_id': events.Match.exactString(props.vpcId)
206+
}
207+
},
208+
targets: [new targets.LambdaFunction(createLambda)]
209+
});
210+
createRule.node.addDependency(clusterBus);
211+
212+
// Create the Lambda that will tear down the traffic mirroring for ENIs in our VPC
213+
const destroyLambda = new lambda.Function(this, 'DestroyEniMirrorLambda', {
214+
functionName: `DestroyEniMirror-${props.vpcId}`,
215+
runtime: lambda.Runtime.PYTHON_3_9,
216+
code: lambda.Code.fromAsset(path.resolve(__dirname, '..', '..', 'manage_arkime')),
217+
handler: 'lambda_handlers.destroy_eni_mirror_handler',
218+
timeout: Duration.seconds(30), // Something has gone very wrong if this is exceeded
219+
});
220+
destroyLambda.addToRolePolicy(
221+
new iam.PolicyStatement({
222+
effect: iam.Effect.ALLOW,
223+
actions: [
224+
// TODO: Should scope this down.
225+
// Just need ec2:DeleteTrafficMirroringSession, but failing similar to the Create Lambda
226+
'ec2:*',
227+
],
228+
resources: [
229+
`arn:aws:ec2:${this.region}:${this.account}:*`
230+
]
231+
})
232+
);
233+
destroyLambda.addToRolePolicy(
234+
new iam.PolicyStatement({
235+
effect: iam.Effect.ALLOW,
236+
actions: [
237+
'ssm:GetParameter',
238+
'ssm:DeleteParameter',
239+
],
240+
resources: [
241+
`arn:aws:ssm:${this.region}:${this.account}:*`
242+
]
243+
})
244+
);
245+
destroyLambda.addToRolePolicy(
246+
new iam.PolicyStatement({
247+
effect: iam.Effect.ALLOW,
248+
actions: [
249+
'cloudwatch:PutMetricData',
250+
],
251+
resources: [
252+
"*"
253+
]
254+
})
255+
);
256+
257+
// Create a rule to funnel appropriate events to our teardwon lambda
258+
const destroyRule = new events.Rule(this, 'RuleDestroyEniMirror', {
259+
eventBus: clusterBus,
260+
eventPattern: {
261+
source: [constants.EVENT_SOURCE],
262+
detailType: [constants.EVENT_DETAIL_TYPE_DESTROY_ENI_MIRROR],
263+
detail: {
264+
'vpc_id': events.Match.exactString(props.vpcId)
265+
}
266+
},
267+
targets: [new targets.LambdaFunction(destroyLambda)]
268+
});
269+
destroyRule.node.addDependency(clusterBus);
121270
}
122271
}

manage_arkime.py

+12-11
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,16 @@
33

44
import click
55

6-
from manage_arkime.commands.add_vpc import cmd_add_vpc
7-
from manage_arkime.commands.create_cluster import cmd_create_cluster
8-
from manage_arkime.commands.destroy_cluster import cmd_destroy_cluster
9-
from manage_arkime.commands.deploy_demo_traffic import cmd_deploy_demo_traffic
10-
from manage_arkime.commands.destroy_demo_traffic import cmd_destroy_demo_traffic
11-
from manage_arkime.commands.get_login_details import cmd_get_login_details
12-
from manage_arkime.commands.list_clusters import cmd_list_clusters
13-
from manage_arkime.commands.remove_vpc import cmd_remove_vpc
14-
import manage_arkime.constants as constants
15-
from manage_arkime.logging_wrangler import LoggingWrangler
6+
from commands.add_vpc import cmd_add_vpc
7+
from commands.create_cluster import cmd_create_cluster
8+
from commands.destroy_cluster import cmd_destroy_cluster
9+
from commands.deploy_demo_traffic import cmd_deploy_demo_traffic
10+
from commands.destroy_demo_traffic import cmd_destroy_demo_traffic
11+
from commands.get_login_details import cmd_get_login_details
12+
from commands.list_clusters import cmd_list_clusters
13+
from commands.remove_vpc import cmd_remove_vpc
14+
import constants as constants
15+
from logging_wrangler import LoggingWrangler, set_boto_log_level
1616

1717
logger = logging.getLogger(__name__)
1818

@@ -127,4 +127,5 @@ def main():
127127

128128

129129
if __name__ == "__main__":
130-
main()
130+
with set_boto_log_level("WARNING"): # Prevent overwhelming boto spam in our debug log
131+
main()

0 commit comments

Comments
 (0)