Skip to content

Commit

Permalink
Garbage collect zombie containers
Browse files Browse the repository at this point in the history
This is expected to happen when the service restarts while a container
is running, as we don't have a graceful cleanup in the service. It can
also happen unexpectedly during a crash.

For some reason, this also happens when the service hasn't
restarted (naturally or unexpectedly) and I haven't had a chance to
hunt that down yet.
  • Loading branch information
shepmaster committed Apr 2, 2024
1 parent 2499fe2 commit ce8b5e4
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 0 deletions.
1 change: 1 addition & 0 deletions ansible/roles/playground/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ vars_playground_repository_url: https://github.com/rust-lang/rust-playground.git

vars_playground_checkout_path: "{{ vars_playground_home_path }}/rust-playground"
vars_playground_update_path: "{{ vars_playground_home_path }}/update.sh"
vars_playground_gc_path: "{{ vars_playground_home_path }}/gc.sh"
vars_playground_artifacts_path: "{{ vars_playground_home_path }}/playground-artifacts"
vars_playground_executable_path: "{{ vars_playground_artifacts_path }}/ui"

Expand Down
12 changes: 12 additions & 0 deletions ansible/roles/playground/handlers/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,18 @@
state: restarted
daemon_reload: true

- name: start-playground-gc
systemd:
name: playground-gc
state: started
daemon_reload: true

- name: restart-playground-gc-timer
systemd:
name: playground-gc.timer
state: restarted
daemon_reload: true

- name: restart-playground
systemd:
name: playground
Expand Down
27 changes: 27 additions & 0 deletions ansible/roles/playground/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,33 @@
state: started
enabled: true

- name: Configure garbage collection script
template:
src: gc.sh
dest: "{{ vars_playground_gc_path }}"
mode: 0755
notify: start-playground-gc

- name: Configure garbage collection script service
template:
src: playground-gc.service
dest: /etc/systemd/system/playground-gc.service
mode: 0644
notify: start-playground-gc

- name: Configure garbage collection script service timer
template:
src: playground-gc.timer
dest: /etc/systemd/system/playground-gc.timer
mode: 0644
notify: restart-playground-gc-timer

- name: Start and enable garbage collection script service timer
systemd:
name: playground-gc.timer
state: started
enabled: true

- name: Configure playground service
template:
src: playground.service
Expand Down
33 changes: 33 additions & 0 deletions ansible/roles/playground/templates/gc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/bin/bash

#
# {{ ansible_managed }}
#

# {% raw %}

set -euv -o pipefail

# How long a container must be running to be killed.
# Number of seconds.
MAX_TIME=3600

now=$(date "+%s")
to_kill=()

readarray -t container_ids < <(docker ps --format '{{ .ID }}' --no-trunc)

while read -r id started_at; do
started_at=$(date --date "${started_at}" "+%s")
running_time=$((now - started_at))

if [[ "${running_time}" -gt "${MAX_TIME}" ]]; then
to_kill+=("${id}")
fi
done < <(docker inspect "${container_ids[@]}" --format '{{ .ID }} {{ .State.StartedAt }}')

if [[ ${#to_kill[@]} -gt 0 ]]; then
docker kill "${to_kill[@]}"
fi

# {% endraw %}
10 changes: 10 additions & 0 deletions ansible/roles/playground/templates/playground-gc.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#
# {{ ansible_managed }}
#

[Unit]
Description=Garbage collect dead playground containers

[Service]
Type=oneshot
ExecStart={{ vars_playground_gc_path }}
13 changes: 13 additions & 0 deletions ansible/roles/playground/templates/playground-gc.timer
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#
# {{ ansible_managed }}
#

[Unit]
Description = Garbage collect playground containers every 15 minutes

[Timer]
OnBootSec = 15min
OnUnitActiveSec = 15min

[Install]
WantedBy = timers.target

0 comments on commit ce8b5e4

Please sign in to comment.