Skip to content

Commit

Permalink
Merge pull request 'Ready for releasing v0.4' (#22) from feat/retry i…
Browse files Browse the repository at this point in the history
  • Loading branch information
Klavs Klavsen committed Jan 14, 2025
2 parents 603c858 + e02f748 commit 693802f
Show file tree
Hide file tree
Showing 49 changed files with 746 additions and 226 deletions.
20 changes: 18 additions & 2 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ jobs:
username: obmondo
password: ${{ secrets.GITHUB_TOKEN }}

- name: Build and push AMD64 and ARM64 container images
- name: Build and push KubeAid Bootstrap Script AMD64 and ARM64 container images
uses: docker/build-push-action@v4
with:
context: .
file: build/Dockerfile
file: build/kubeaid-bootstrap-script/Dockerfile
# NOTE : It takes pretty long to build container images for the ARM64 platform (even when
# using QEMU).
platforms: linux/amd64,linux/arm64
Expand All @@ -43,3 +43,19 @@ jobs:
# builds.
cache-from: type=gha
cache-to: type=gha,mode=max

- name: Build and push Hetzner Failover Script AMD64 and ARM64 container images
uses: docker/build-push-action@v4
with:
context: .
file: build/hetzner-failover-script/Dockerfile
# NOTE : It takes pretty long to build container images for the ARM64 platform (even when
# using QEMU).
platforms: linux/amd64,linux/arm64
tags: ghcr.io/obmondo/hetzner-failover-script:${{ github.event.release.tag_name }}
push: true
# Experimental cache exporter for GitHub Actions provided by buildx and BuildKit.
# It uses the GitHub Cache API to fetch and load the Docker layer cache blobs across
# builds.
cache-from: type=gha
cache-to: type=gha,mode=max
29 changes: 17 additions & 12 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ IMAGE_NAME=kubeaid-bootstrap-script-dev:latest

.PHONY: build-image-dev
build-image-dev:
@docker build -f ./build/Dockerfile.dev --build-arg CPU_ARCHITECTURE=arm64 -t $(IMAGE_NAME) .
@docker build -f ./build/kubeaid-bootstrap-script/Dockerfile.dev --build-arg CPU_ARCHITECTURE=arm64 -t $(IMAGE_NAME) .

.PHONY: remove-image-dev
remove-image-dev:
Expand Down Expand Up @@ -43,22 +43,22 @@ remove-container-dev: stop-container-dev

.PHONY: generate-sample-config-aws-dev
generate-sample-config-aws-dev:
@go run ./cmd config generate aws
@go run ./cmd/kubeaid-bootstrap-script/ config generate aws

.PHONY: bootstrap-cluster-dev-aws
bootstrap-cluster-dev-aws:
@go run ./cmd cluster bootstrap aws \
@go run ./cmd/kubeaid-bootstrap-script/ cluster bootstrap aws \
--debug \
--config /app/outputs/kubeaid-bootstrap-script.config.yaml \
--skip-clusterctl-move
--config /app/outputs/kubeaid-bootstrap-script.aws.config.yaml
# --skip-kubeaid-config-setup
# --skip-clusterctl-move

.PHONY: bootstrap-cluster-dev-hetzner
bootstrap-cluster-dev-hetzner:
@go run ./cmd cluster bootstrap hetzner \
@go run ./cmd/kubeaid-bootstrap-script/ cluster bootstrap hetzner \
--debug \
--config /app/outputs/kubeaid-bootstrap-script.config.yaml \
--skip-clusterctl-move
--config /app/outputs/kubeaid-bootstrap-script.hetzner.config.yaml \
--skip-clusterctl-move
# --skip-kubeaid-config-setup

.PHONY: use-management-cluster
Expand All @@ -69,10 +69,15 @@ use-management-cluster:
use-provisioned-cluster:
export KUBECONFIG=./outputs/provisioned-cluster.kubeconfig.yaml

.PHONY: delete-provisioned-cluster
delete-provisioned-cluster-dev:
@go run ./cmd cluster delete \
--config /app/outputs/kubeaid-bootstrap-script.config.yaml
.PHONY: delete-provisioned-cluster-dev-aws
delete-provisioned-cluster-dev-aws:
@go run ./cmd/kubeaid-bootstrap-script/ cluster delete \
--config /app/outputs/kubeaid-bootstrap-script.aws.config.yaml

.PHONY: delete-provisioned-cluster-dev-hetzner
delete-provisioned-cluster-dev-hetzner:
@go run ./cmd/kubeaid-bootstrap-script/ cluster delete \
--config /app/outputs/kubeaid-bootstrap-script.hetzner.config.yaml

.PHONY: delete-management-cluster
delete-management-cluster:
Expand Down
47 changes: 45 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ The `KubeAid Bootstrap Script` is used to bootstrap Kubernetes clusters using Cl

- [Bootstrapping a self-managed cluster in AWS](https://github.com/Obmondo/KubeAid/blob/master/docs/aws/capi/cluster.md)

## Developer Guide
## Developer Guide (AWS)

> Make sure, you've Docker installed in your system.
Expand All @@ -20,7 +20,31 @@ In a separate terminal window, use `make exec-container-dev` to execute into the

Once you're inside the container, use `make generate-sample-config-aws-dev` to generate a sample config file at [./outputs/kubeaid-bootstrap-script.config.yaml](./outputs/kubeaid-bootstrap-script.config.yaml), targetting the AWS cloud provider. Adjust the config file according to your needs.

Then run `make bootstrap-cluster-dev` to bootstrap the cluster!
Export your AWS credentials as environment variables like such :

```sh
export AWS_REGION=""
export AWS_ACCESS_KEY_ID=""
export AWS_SECRET_ACCESS_KEY=""
export AWS_SESSION_TOKEN=""
```

Then run `make bootstrap-cluster-dev-aws` to bootstrap the cluster!

> [!NOTE]
> If the `clusterawsadm bootstrap iam create-cloudformation-stack` command errors out with this message :
>
> the IAM CloudFormation Stack create / update failed and it's currently in a `ROLLBACK_COMPLETE` state
>
> then that means maybe there are pre-existing IAM resources with overlapping name. Then first delete them manually from the AWS Console and then retry running the script. Filter the IAM roles and policies in the corresponding region with the keyword : `cluster` / `clusterapi`.
If cluster provisioning gets stuck, then debug by :

- checking logs of ClusterAPI related pod.

- SSHing into the control-plane node. You can view cloud-init output logs stored at `/var/log/cloud-init-output.log`.

If you want to delete the provisioned cluster, then execute : `make delete-provisioned-cluster-dev-aws`.

## TODOs

Expand All @@ -30,6 +54,13 @@ Then run `make bootstrap-cluster-dev` to bootstrap the cluster!
- [ ] Support adding admin SSH keys via config file.
- [ ] Support using HTTPS for ArgoCD apps.
- [ ] Use ArgoCD sync waves so that we don't need to explicitly sync the Infrastructure Provider component first.
- [x] Support enabling `Audit Logging`.
- [x] Switch to IAM Role from (temporary) credentials after cluster bootstrap.
- [x] ETCD metrics enabled.
- [x] Support scale to / from zero for the node-groups.
> Currently, I have added extra ClusterRole and ClusterRoleBinding in the KubeAid [cluster-autoscaler Helm chart](https://github.com/Obmondo/kubeaid/tree/master/argocd-helm-charts/cluster-autoscaler) to support this feature.
> But I have also opened an issue in the kubernetes-sigs/autoscaler repository regarding this : [Allow adding extra rules to the Role / ClusterRole template of the Cluster AutoScaler Helm chart](https://github.com/kubernetes/autoscaler/issues/7680).
- [ ] `recover cluster` command

## REFERENCES

Expand All @@ -46,3 +77,15 @@ Then run `make bootstrap-cluster-dev` to bootstrap the cluster!
- [Secret Rotation](https://github.com/bitnami-labs/sealed-secrets?tab=readme-ov-file#secret-rotation)

- [Kubernetes Backups, Upgrades, Migrations - with Velero](https://youtu.be/zybLTQER0yY?si=qOZcizBqPOeouJ7y)

- [Failover](https://docs.hetzner.com/robot/dedicated-server/ip/failover/)

- [Auditing](https://kubernetes.io/docs/tasks/debug/debug-cluster/audit/)

- [Kube API server args](https://kubernetes.io/docs/reference/command-line-tools-reference/kube-apiserver/)

- [Using IAM roles in management cluster instead of AWS credentials](https://cluster-api-aws.sigs.k8s.io/topics/using-iam-roles-in-mgmt-cluster)

- [KubeadmControlPlane CRD](https://github.com/kubernetes-sigs/cluster-api/blob/main/controlplane/kubeadm/config/crd/bases/controlplane.cluster.x-k8s.io_kubeadmcontrolplanes.yaml)

- [How can you call a helm 'helper' template from a subchart with the correct context?](https://stackoverflow.com/questions/47791971/how-can-you-call-a-helm-helper-template-from-a-subchart-with-the-correct-conte)
28 changes: 28 additions & 0 deletions build/hetzner-failover-script/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# syntax=docker/dockerfile:1

#--- Builder stage ---

FROM golang:1.23.0 AS builder

WORKDIR /app

COPY go.mod go.sum ./
RUN go mod download

COPY . .
RUN go build -o hetzner-failover-script ./cmd/hetzner-failover-script

#--- Packager stage ---

FROM golang:1.23.0 AS packages

# Set the maintainer label
LABEL org.opencontainers.image.authors="ashish@obmondo.com, archisman@obmondo.com"

RUN apk add --no-cache procps

WORKDIR /root/

COPY --from=builder /app/hetzner-failover-script .

CMD ["./hetzner-failover-script"]
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,21 @@ COPY go.mod go.sum ./
RUN go mod download

COPY . .
RUN go build -o kubeaid-bootstrap-script ./cmd
RUN go build -o kubeaid-bootstrap-script ./cmd/kubeaid-bootstrap-script

#--- Packager stage ---

FROM golang:1.23.0 AS packages

# Set the maintainer label
LABEL org.opencontainers.image.authors="ashish@obmondo.com, archisman@obmondo.com"

WORKDIR /

COPY ./scripts/install-prerequisites.sh /install-prerequisites.sh
RUN chmod +x /install-prerequisites.sh
RUN CPU_ARCHITECTURE=$([ "$(uname -m)" = "x86_64" ] && echo "amd64" || echo "arm64") \
/install-prerequisites.sh
/install-prerequisites.sh

COPY --from=builder /app/kubeaid-bootstrap-script /usr/local/bin/kubeaid-bootstrap-script

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ WORKDIR /
COPY ./scripts/install-prerequisites.sh /install-prerequisites.sh
RUN chmod +x /install-prerequisites.sh
RUN CPU_ARCHITECTURE=$([ "$(uname -m)" = "x86_64" ] && echo "amd64" || echo "arm64") \
/install-prerequisites.sh
/install-prerequisites.sh

WORKDIR /app

Expand Down
86 changes: 86 additions & 0 deletions cmd/hetzner-failover-script/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package main

import (
"context"
"log"
"log/slog"
"os"
"time"

"github.com/Obmondo/kubeaid-bootstrap-script/utils"
"github.com/Obmondo/kubeaid-bootstrap-script/utils/assert"
"github.com/floshodan/hrobot-go/hrobot"
)

func main() {
ctx := context.Background()

// Read required environment variables.
var (
failoverIP = utils.GetEnv("FAILOVER_IP")

nodeIP = utils.GetEnv("NODE_IP")

username = os.Getenv("API_USERNAME") // (optional).
password = os.Getenv("API_PASSWORD") // (optional).

apiToken = os.Getenv("API_TOKEN") // (optional).
)

// Construct Hetzner Robot API client.
var hetznerRobotClient *hrobot.Client
switch {
case len(username) > 0 && len(password) > 0:
hetznerRobotClient = hrobot.NewClient(hrobot.WithBasicAuth(username, password))

case len(apiToken) > 0:
hetznerRobotClient = hrobot.NewClient(hrobot.WithToken(apiToken))

default:
log.Fatalf("Either provide username and password / api token as credentials, to communicate with the Hetzner Robot API")
}

/*
A Failover IP is an additional IP that you can switch from one server to another. You can order
it for any Hetzner dedicated root server, and you can switch it to any other Hetzner dedicated
root server, regardless of location.
Switching a failover IP takes between 90 and 110 seconds.
REFERENCE : https://docs.hetzner.com/robot/dedicated-server/ip/failover/.
*/
// Hetzner Robot Failover IP API spec : API REFERENCE : https://robot.hetzner.com/doc/webservice/en.html#failover.

// Get the Failover IP's current active server IP.
failoverIPDetails, _, err := hetznerRobotClient.Failover.GetFailoverIP(ctx, failoverIP)
assert.AssertErrNil(ctx, err, "Failed getting Failover IP details")

activeServerIP := failoverIPDetails.ActiveServerIP
slog.InfoContext(ctx, "Detected active server", slog.String("ip", activeServerIP))

if activeServerIP == nodeIP {
slog.InfoContext(ctx, "Active server IP is already same as the current server IP")
return
}

// Update Failover IP to the current node's IP (the current node, on which this script is
// running)
// NOTE : Contributed :
// https://github.com/floshodan/hrobot-go/commit/700f8ef9fdac565129608b3a50583b4b6564ff34.
_, _, err = hetznerRobotClient.Failover.SwitchFailover(ctx, failoverIP, activeServerIP)
assert.AssertErrNil(ctx, err, "Failed switching Failover IP to the current node IP")

// Wait for the update to complete.
for {
failoverIPDetails, _, err := hetznerRobotClient.Failover.GetFailoverIP(ctx, failoverIP)
assert.AssertErrNil(ctx, err, "Failed getting Failover IP details")

if failoverIPDetails.ActiveServerIP == nodeIP {
slog.InfoContext(ctx, "Successfully updated Failover IP", slog.String("active-server-ip", nodeIP))
break
}

slog.InfoContext(ctx, "Waiting for the Failover IP update to complete. Sleeping for a minute....")
time.Sleep(time.Minute)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ import (
)

var AWSCmd = &cobra.Command{
Use: "aws",
Use: "aws",
Short: "Bootstrap a self-managed Kubernetes cluster in AWS",
Run: func(cmd *cobra.Command, args []string) {
core.BootstrapCluster(cmd.Context(), skipKubeAidConfigSetup, skipClusterctlMove, aws.NewAWSCloudProvider(), false)
},
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ import (
)

var HetznerCmd = &cobra.Command{
Use: "hetzner",
Use: "hetzner",
Short: "Bootstrap a self-managed Kubernetes cluster in Hetzner (bare-metal)",
Run: func(cmd *cobra.Command, args []string) {
core.BootstrapCluster(cmd.Context(), skipKubeAidConfigSetup, skipClusterctlMove, hetzner.NewHetznerCloudProvider(), false)
},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package cluster

import (
"github.com/Obmondo/kubeaid-bootstrap-script/cmd/cluster/bootstrap"
delete_ "github.com/Obmondo/kubeaid-bootstrap-script/cmd/cluster/delete"
"github.com/Obmondo/kubeaid-bootstrap-script/cmd/kubeaid-bootstrap-script/cluster/bootstrap"
delete_ "github.com/Obmondo/kubeaid-bootstrap-script/cmd/kubeaid-bootstrap-script/cluster/delete"
"github.com/Obmondo/kubeaid-bootstrap-script/config"
"github.com/Obmondo/kubeaid-bootstrap-script/utils"
"github.com/spf13/cobra"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ import (
)

var DeleteCmd = &cobra.Command{
Use: "delete",
Use: "delete",
Short: "Delete a provisioned cluster",
Run: func(cmd *cobra.Command, args []string) {
core.DeleteCluster(cmd.Context())
},
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package config

import (
"github.com/Obmondo/kubeaid-bootstrap-script/cmd/config/generate"
"github.com/Obmondo/kubeaid-bootstrap-script/cmd/kubeaid-bootstrap-script/config/generate"
"github.com/Obmondo/kubeaid-bootstrap-script/constants"
"github.com/spf13/cobra"
)
Expand All @@ -13,9 +13,7 @@ var ConfigCmd = &cobra.Command{
},
}

var (
ConfigFilePath string
)
var ConfigFilePath string

func init() {
// Subcommands.
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
4 changes: 2 additions & 2 deletions cmd/main.go → cmd/kubeaid-bootstrap-script/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ import (
"log/slog"
"os"

"github.com/Obmondo/kubeaid-bootstrap-script/cmd/cluster"
"github.com/Obmondo/kubeaid-bootstrap-script/cmd/config"
"github.com/Obmondo/kubeaid-bootstrap-script/cmd/kubeaid-bootstrap-script/cluster"
"github.com/Obmondo/kubeaid-bootstrap-script/cmd/kubeaid-bootstrap-script/config"
"github.com/Obmondo/kubeaid-bootstrap-script/constants"
"github.com/Obmondo/kubeaid-bootstrap-script/utils/logger"
"github.com/spf13/cobra"
Expand Down
Loading

0 comments on commit 693802f

Please sign in to comment.