Skip to content

Commit

Permalink
Talos k8s with AWS CCM support
Browse files Browse the repository at this point in the history
  • Loading branch information
erikvveen authored and PhilipSchmid committed Feb 17, 2025
1 parent d399fc8 commit d6741fe
Show file tree
Hide file tree
Showing 7 changed files with 241 additions and 17 deletions.
19 changes: 13 additions & 6 deletions 00-locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,19 @@ locals {
cluster = {
id = var.cluster_id,
clusterName = var.cluster_name,
externalCloudProvider = {
enabled = var.enable_external_cloud_provider
manifests = [
var.enable_external_cloud_provider ? var.external_cloud_provider_manifest : null,
]
},
apiServer = {
certSANs = [
module.elb_k8s_elb.elb_dns_name
]
],
extraArgs = {
enable-admission-plugins = var.admission_plugins
}
},
controllerManager = {
extraArgs = {
Expand All @@ -55,16 +64,14 @@ locals {
allowSchedulingOnControlPlanes = var.allow_workload_on_cp_nodes
},
machine = {
kubelet = {
registerWithFQDN = true
},
certSANs = [
module.elb_k8s_elb.elb_dns_name
],
kubelet = {
extraArgs = {
rotate-server-certificates = true
}
},
registerWithFQDN = true
}
}
}
Expand Down Expand Up @@ -94,4 +101,4 @@ locals {
"kubernetes.io/cluster/${var.cluster_name}" = "owned"
}

}
}
54 changes: 52 additions & 2 deletions 00-variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,28 @@ variable "cluster_id" {
type = number
}

variable "iam_instance_profile_control_plane" {
description = "IAM instance profile to attach to the control plane instances to give AWS CCM the sufficient rights to execute."
type = string
default = null
}

variable "iam_instance_profile_worker" {
description = "IAM instance profile to attach to the worker instances to give AWS CCM the sufficient rights to execute."
type = string
default = null
}

variable "metadata_options" {
description = "Metadata to attach to the instances."
type = map(string)
default = {
http_endpoint = "enabled"
http_tokens = "optional"
http_put_response_hop_limit = 1
}
}

variable "cluster_architecture" {
default = "amd64"
description = "Cluster architecture. Choose 'arm64' or 'amd64'. If you choose 'arm64', ensure to also override the control_plane.instance_type and worker_groups.instance_type with an ARM64-based instance type like 'm7g.large'."
Expand Down Expand Up @@ -55,7 +77,7 @@ variable "disable_kube_proxy" {

variable "allow_workload_on_cp_nodes" {
default = false
description = "Allow workloads on CP nodes or not. Allowing it means Talos Linux default taints are removed from CP nodes. More details here: https://www.talos.dev/v1.5/talos-guides/howto/workers-on-controlplane/"
description = "Allow workloads on CP nodes or not. Allowing it means Talos Linux default taints are removed from CP nodes which is typically required for single-node clusters. More details here: https://www.talos.dev/v1.5/talos-guides/howto/workers-on-controlplane/"
type = bool
}

Expand Down Expand Up @@ -142,4 +164,32 @@ variable "config_patch_files" {
default = []
description = "Path to talos config path files that applies to all nodes"
type = list(string)
}
}

variable "admission_plugins" {
description = "List of admission plugins to enable"
type = string
default = "MutatingAdmissionWebhook,ValidatingAdmissionWebhook,ServiceAccount"
}

variable "enable_external_cloud_provider" {
default = false
description = "Whether to enable or disable externalCloudProvider support. See https://kubernetes.io/docs/tasks/administer-cluster/running-cloud-controller/."
type = bool
}

variable "deploy_external_cloud_provider_iam_policies" {
default = false
description = "Whether to auto-deploy the externalCloudProvider-required IAM policies. See https://cloud-provider-aws.sigs.k8s.io/prerequisites/."
type = bool
validation {
condition = (var.deploy_external_cloud_provider_iam_policies && var.enable_external_cloud_provider) || (!var.deploy_external_cloud_provider_iam_policies)
error_message = "externalCloudProvider support needs to be enabled when trying to deploy the externalCloudProvider-required IAM policies."
}
}

variable "external_cloud_provider_manifest" {
default = "https://raw.githubusercontent.com/isovalent/terraform-aws-talos/main/aws-cloud-controller.yaml"
description = "externalCloudProvider manifest to be applied if var.enable_external_cloud_provider is enabled. If you want to deploy it manually (e.g., via Helm chart), enable var.enable_external_cloud_provider but set this value to an empty string (\"\"). See https://kubernetes.io/docs/tasks/administer-cluster/running-cloud-controller/."
type = string
}
139 changes: 134 additions & 5 deletions 03-talos.tf
Original file line number Diff line number Diff line change
@@ -1,3 +1,117 @@
# https://cloud-provider-aws.sigs.k8s.io/prerequisites/
resource "aws_iam_policy" "control_plane_ccm_policy" {
count = var.enable_external_cloud_provider && var.deploy_external_cloud_provider_iam_policies ? 1 : 0

name = "${var.cluster_name}-control-plane-ccm-policy"
path = "/"
description = "IAM policy for the control plane nodes to allow CCM to manage AWS resources"

policy = jsonencode(
{
"Version" : "2012-10-17",
"Statement" : [
{
"Effect" : "Allow",
"Action" : [
"autoscaling:DescribeAutoScalingGroups",
"autoscaling:DescribeLaunchConfigurations",
"autoscaling:DescribeTags",
"ec2:DescribeInstances",
"ec2:DescribeRegions",
"ec2:DescribeRouteTables",
"ec2:DescribeSecurityGroups",
"ec2:DescribeSubnets",
"ec2:DescribeVolumes",
"ec2:DescribeAvailabilityZones",
"ec2:CreateSecurityGroup",
"ec2:CreateTags",
"ec2:CreateVolume",
"ec2:ModifyInstanceAttribute",
"ec2:ModifyVolume",
"ec2:AttachVolume",
"ec2:AuthorizeSecurityGroupIngress",
"ec2:CreateRoute",
"ec2:DeleteRoute",
"ec2:DeleteSecurityGroup",
"ec2:DeleteVolume",
"ec2:DetachVolume",
"ec2:RevokeSecurityGroupIngress",
"ec2:DescribeVpcs",
"ec2:DescribeInstanceTopology",
"elasticloadbalancing:AddTags",
"elasticloadbalancing:AttachLoadBalancerToSubnets",
"elasticloadbalancing:ApplySecurityGroupsToLoadBalancer",
"elasticloadbalancing:CreateLoadBalancer",
"elasticloadbalancing:CreateLoadBalancerPolicy",
"elasticloadbalancing:CreateLoadBalancerListeners",
"elasticloadbalancing:ConfigureHealthCheck",
"elasticloadbalancing:DeleteLoadBalancer",
"elasticloadbalancing:DeleteLoadBalancerListeners",
"elasticloadbalancing:DescribeLoadBalancers",
"elasticloadbalancing:DescribeLoadBalancerAttributes",
"elasticloadbalancing:DetachLoadBalancerFromSubnets",
"elasticloadbalancing:DeregisterInstancesFromLoadBalancer",
"elasticloadbalancing:ModifyLoadBalancerAttributes",
"elasticloadbalancing:RegisterInstancesWithLoadBalancer",
"elasticloadbalancing:SetLoadBalancerPoliciesForBackendServer",
"elasticloadbalancing:AddTags",
"elasticloadbalancing:CreateListener",
"elasticloadbalancing:CreateTargetGroup",
"elasticloadbalancing:DeleteListener",
"elasticloadbalancing:DeleteTargetGroup",
"elasticloadbalancing:DescribeListeners",
"elasticloadbalancing:DescribeLoadBalancerPolicies",
"elasticloadbalancing:DescribeTargetGroups",
"elasticloadbalancing:DescribeTargetHealth",
"elasticloadbalancing:ModifyListener",
"elasticloadbalancing:ModifyTargetGroup",
"elasticloadbalancing:RegisterTargets",
"elasticloadbalancing:DeregisterTargets",
"elasticloadbalancing:SetLoadBalancerPoliciesOfListener",
"iam:CreateServiceLinkedRole",
"kms:DescribeKey"
],
"Resource" : [
"*"
]
}
]
}
)
}

# https://cloud-provider-aws.sigs.k8s.io/prerequisites/
resource "aws_iam_policy" "worker_ccm_policy" {
count = var.enable_external_cloud_provider && var.deploy_external_cloud_provider_iam_policies ? 1 : 0

name = "${var.cluster_name}-worker-ccm-policy"
path = "/"
description = "IAM policy for the worker nodes to allow CCM to manage AWS resources"

policy = jsonencode(
{
"Version" : "2012-10-17",
"Statement" : [
{
"Effect" : "Allow",
"Action" : [
"ec2:DescribeInstances",
"ec2:DescribeRegions",
"ecr:GetAuthorizationToken",
"ecr:BatchCheckLayerAvailability",
"ecr:GetDownloadUrlForLayer",
"ecr:GetRepositoryPolicy",
"ecr:DescribeRepositories",
"ecr:ListImages",
"ecr:BatchGetImage"
],
"Resource" : "*"
}
]
}
)
}

module "talos_control_plane_nodes" {
source = "terraform-aws-modules/ec2-instance/aws"
version = "~> 5.5"
Expand All @@ -10,6 +124,13 @@ module "talos_control_plane_nodes" {
subnet_id = element(data.aws_subnets.public.ids, count.index)
associate_public_ip_address = true
tags = merge(var.tags, local.cluster_required_tags)
metadata_options = var.metadata_options
create_iam_instance_profile = var.enable_external_cloud_provider && var.deploy_external_cloud_provider_iam_policies ? true : false
iam_instance_profile = var.iam_instance_profile_control_plane
iam_role_use_name_prefix = false
iam_role_policies = var.enable_external_cloud_provider && var.deploy_external_cloud_provider_iam_policies ? {
"${var.cluster_name}-control-plane-ccm-policy" : aws_iam_policy.control_plane_ccm_policy[0].arn,
} : {}

vpc_security_group_ids = [module.cluster_sg.security_group_id]

Expand All @@ -32,6 +153,13 @@ module "talos_worker_group" {
subnet_id = element(data.aws_subnets.public.ids, tonumber(trimprefix(each.key, "${each.value.name}.")))
associate_public_ip_address = true
tags = merge(each.value.tags, var.tags, local.cluster_required_tags)
metadata_options = var.metadata_options
create_iam_instance_profile = var.enable_external_cloud_provider && var.deploy_external_cloud_provider_iam_policies ? true : false
iam_instance_profile = var.iam_instance_profile_worker
iam_role_use_name_prefix = false
iam_role_policies = var.enable_external_cloud_provider && var.deploy_external_cloud_provider_iam_policies ? {
"${var.cluster_name}-worker-ccm-policy" : aws_iam_policy.worker_ccm_policy[0].arn,
} : {}

vpc_security_group_ids = [module.cluster_sg.security_group_id]

Expand All @@ -45,6 +173,8 @@ module "talos_worker_group" {
resource "talos_machine_secrets" "this" {}

data "talos_machine_configuration" "controlplane" {
for_each = { for index in range(var.controlplane_count) : index => index }

cluster_name = var.cluster_name
cluster_endpoint = "https://${module.elb_k8s_elb.elb_dns_name}"
machine_type = "controlplane"
Expand Down Expand Up @@ -77,12 +207,11 @@ data "talos_machine_configuration" "worker_group" {
}

resource "talos_machine_configuration_apply" "controlplane" {
count = var.controlplane_count

for_each = { for index, instance in module.talos_control_plane_nodes : index => instance }
client_configuration = talos_machine_secrets.this.client_configuration
machine_configuration_input = data.talos_machine_configuration.controlplane.machine_configuration
endpoint = module.talos_control_plane_nodes[count.index].public_ip
node = module.talos_control_plane_nodes[count.index].private_ip
machine_configuration_input = data.talos_machine_configuration.controlplane[each.key].machine_configuration
endpoint = module.talos_control_plane_nodes[each.key].public_ip
node = module.talos_control_plane_nodes[each.key].private_ip
}

resource "talos_machine_configuration_apply" "worker_group" {
Expand Down
12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ A Terraform module to manage a Talos-based Kubernetes on AWS (EC2 instances). Is
- [Talos' KubePrism](https://www.talos.dev/v1.5/kubernetes-guides/configuration/kubeprism/) to get an internal endpoint for the KAPI (used for [Cilium Kube-Proxy replacement](https://docs.cilium.io/en/stable/network/kubernetes/kubeproxy-free/))
- [kubernetes-sigs/metrics-server](https://github.com/kubernetes-sigs/metrics-server/)
- [alex1989hu/kubelet-serving-cert-approver](https://github.com/alex1989hu/kubelet-serving-cert-approver) inspired by [Talos' Deploying Metrics Server](https://www.talos.dev/v1.5/kubernetes-guides/configuration/deploy-metrics-server/) guide.
- [AWS Cloud Provider](https://github.com/kubernetes/cloud-provider-aws/tree/master)
- Cilium features:
- [Kube-Proxy replacement](https://docs.cilium.io/en/stable/network/kubernetes/kubeproxy-free/)
- [IPAM modes](https://docs.cilium.io/en/stable/network/concepts/ipam/): `kubernetes`, `cluster-pool`
Expand Down Expand Up @@ -72,6 +73,8 @@ module "talos" {

| Name | Type |
|------|------|
| [aws_iam_policy.control_plane_ccm_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
| [aws_iam_policy.worker_ccm_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
| [local_file.kubeconfig](https://registry.terraform.io/providers/hashicorp/local/latest/docs/resources/file) | resource |
| [local_file.talosconfig](https://registry.terraform.io/providers/hashicorp/local/latest/docs/resources/file) | resource |
| [null_resource.wait_for_public_subnets](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource |
Expand All @@ -93,17 +96,24 @@ module "talos" {

| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
| <a name="input_admission_plugins"></a> [admission\_plugins](#input\_admission\_plugins) | List of admission plugins to enable | `string` | `"MutatingAdmissionWebhook,ValidatingAdmissionWebhook,ServiceAccount"` | no |
| <a name="input_allocate_node_cidrs"></a> [allocate\_node\_cidrs](#input\_allocate\_node\_cidrs) | Whether to assign PodCIDRs to Node resources or not. Only needed in case Cilium runs in 'kubernetes' IPAM mode. | `bool` | `true` | no |
| <a name="input_allow_workload_on_cp_nodes"></a> [allow\_workload\_on\_cp\_nodes](#input\_allow\_workload\_on\_cp\_nodes) | Allow workloads on CP nodes or not. Allowing it means Talos Linux default taints are removed from CP nodes. More details here: https://www.talos.dev/v1.5/talos-guides/howto/workers-on-controlplane/ | `bool` | `false` | no |
| <a name="input_allow_workload_on_cp_nodes"></a> [allow\_workload\_on\_cp\_nodes](#input\_allow\_workload\_on\_cp\_nodes) | Allow workloads on CP nodes or not. Allowing it means Talos Linux default taints are removed from CP nodes which is typically required for single-node clusters. More details here: https://www.talos.dev/v1.5/talos-guides/howto/workers-on-controlplane/ | `bool` | `false` | no |
| <a name="input_cluster_architecture"></a> [cluster\_architecture](#input\_cluster\_architecture) | Cluster architecture. Choose 'arm64' or 'amd64'. If you choose 'arm64', ensure to also override the control\_plane.instance\_type and worker\_groups.instance\_type with an ARM64-based instance type like 'm7g.large'. | `string` | `"amd64"` | no |
| <a name="input_cluster_id"></a> [cluster\_id](#input\_cluster\_id) | The ID of the cluster. | `number` | `"1"` | no |
| <a name="input_cluster_name"></a> [cluster\_name](#input\_cluster\_name) | Name of cluster | `string` | n/a | yes |
| <a name="input_config_patch_files"></a> [config\_patch\_files](#input\_config\_patch\_files) | Path to talos config path files that applies to all nodes | `list(string)` | `[]` | no |
| <a name="input_control_plane"></a> [control\_plane](#input\_control\_plane) | Info for control plane that will be created | <pre>object({<br/> instance_type = optional(string, "m5.large")<br/> config_patch_files = optional(list(string), [])<br/> tags = optional(map(string), {})<br/> })</pre> | `{}` | no |
| <a name="input_controlplane_count"></a> [controlplane\_count](#input\_controlplane\_count) | Defines how many controlplane nodes are deployed in the cluster. | `number` | `3` | no |
| <a name="input_deploy_external_cloud_provider_iam_policies"></a> [deploy\_external\_cloud\_provider\_iam\_policies](#input\_deploy\_external\_cloud\_provider\_iam\_policies) | Whether to auto-deploy the externalCloudProvider-required IAM policies. See https://cloud-provider-aws.sigs.k8s.io/prerequisites/. | `bool` | `false` | no |
| <a name="input_disable_kube_proxy"></a> [disable\_kube\_proxy](#input\_disable\_kube\_proxy) | Whether to deploy Kube-Proxy or not. By default, KP shouldn't be deployed. | `bool` | `true` | no |
| <a name="input_enable_external_cloud_provider"></a> [enable\_external\_cloud\_provider](#input\_enable\_external\_cloud\_provider) | Whether to enable or disable externalCloudProvider support. See https://kubernetes.io/docs/tasks/administer-cluster/running-cloud-controller/. | `bool` | `false` | no |
| <a name="input_external_cloud_provider_manifest"></a> [external\_cloud\_provider\_manifest](#input\_external\_cloud\_provider\_manifest) | externalCloudProvider manifest to be applied if var.enable\_external\_cloud\_provider is enabled. If you want to deploy it manually (e.g., via Helm chart), enable var.enable\_external\_cloud\_provider but set this value to an empty string (""). See https://kubernetes.io/docs/tasks/administer-cluster/running-cloud-controller/. | `string` | `"https://raw.githubusercontent.com/isovalent/terraform-aws-talos/main/aws-cloud-controller.yaml"` | no |
| <a name="input_iam_instance_profile_control_plane"></a> [iam\_instance\_profile\_control\_plane](#input\_iam\_instance\_profile\_control\_plane) | IAM instance profile to attach to the control plane instances to give AWS CCM the sufficient rights to execute. | `string` | `null` | no |
| <a name="input_iam_instance_profile_worker"></a> [iam\_instance\_profile\_worker](#input\_iam\_instance\_profile\_worker) | IAM instance profile to attach to the worker instances to give AWS CCM the sufficient rights to execute. | `string` | `null` | no |
| <a name="input_kubernetes_api_allowed_cidr"></a> [kubernetes\_api\_allowed\_cidr](#input\_kubernetes\_api\_allowed\_cidr) | The CIDR from which to allow to access the Kubernetes API | `string` | `"0.0.0.0/0"` | no |
| <a name="input_kubernetes_version"></a> [kubernetes\_version](#input\_kubernetes\_version) | Kubernetes version to use for the Talos cluster, if not set, the K8s version shipped with the selected Talos version will be used. Check https://www.talos.dev/latest/introduction/support-matrix/. For example '1.29.3'. | `string` | `""` | no |
| <a name="input_metadata_options"></a> [metadata\_options](#input\_metadata\_options) | Metadata to attach to the instances. | `map(string)` | <pre>{<br/> "http_endpoint": "enabled",<br/> "http_put_response_hop_limit": 1,<br/> "http_tokens": "optional"<br/>}</pre> | no |
| <a name="input_pod_cidr"></a> [pod\_cidr](#input\_pod\_cidr) | The CIDR to use for Pods. Only required in case allocate\_node\_cidrs is set to 'true'. Otherwise, simply configure it inside Cilium's Helm values. | `string` | `"100.64.0.0/14"` | no |
| <a name="input_region"></a> [region](#input\_region) | The region in which to create the Talos Linux cluster. | `string` | n/a | yes |
| <a name="input_service_cidr"></a> [service\_cidr](#input\_service\_cidr) | The CIDR to use for services. | `string` | `"100.68.0.0/16"` | no |
Expand Down
Loading

0 comments on commit d6741fe

Please sign in to comment.