Skip to content

Commit

Permalink
Separate flaky and nonflaky tests
Browse files Browse the repository at this point in the history
Signed-off-by: apostasie <spam_blackhole@farcloser.world>
  • Loading branch information
apostasie committed Oct 8, 2024
1 parent 1ea9b9c commit a2285c7
Show file tree
Hide file tree
Showing 11 changed files with 116 additions and 58 deletions.
65 changes: 27 additions & 38 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ jobs:
check-latest: true
cache: true
- name: "Run unit tests"
run: go test -v ./pkg/...
run: make test-unit

test-integration:
runs-on: "${{ matrix.runner }}"
Expand Down Expand Up @@ -99,12 +99,11 @@ jobs:
docker run --privileged --rm tonistiigi/binfmt --install linux/arm64
docker run --privileged --rm tonistiigi/binfmt --install linux/arm/v7
- name: "Run integration tests"
uses: nick-fields/retry@v3
with:
timeout_minutes: 30
max_attempts: 2
retry_on: error
command: docker run -t --rm --privileged test-integration
run: |
docker run -t --rm --privileged test-integration ./test-integration.sh
- name: "Run integration tests (flaky)"
run: |
docker run -t --rm --privileged test-integration ./test-integration.sh -test.only-flaky
test-integration-ipv6:
runs-on: "ubuntu-${{ matrix.ubuntu }}"
Expand Down Expand Up @@ -133,7 +132,7 @@ jobs:
echo '{"ipv6": true, "fixed-cidr-v6": "2001:db8:1::/64", "experimental": true, "ip6tables": true}' | sudo tee /etc/docker/daemon.json
sudo systemctl restart docker
- name: "Prepare integration test environment"
run: docker build -t test-integration-ipv6 --target test-integration-ipv6 --build-arg UBUNTU_VERSION=${UBUNTU_VERSION} --build-arg CONTAINERD_VERSION=${CONTAINERD_VERSION} .
run: docker build -t test-integration --target test-integration --build-arg UBUNTU_VERSION=${UBUNTU_VERSION} --build-arg CONTAINERD_VERSION=${CONTAINERD_VERSION} .
- name: "Remove snap loopback devices (conflicts with our loopback devices in TestRunDevice)"
run: |
sudo systemctl disable --now snapd.service snapd.socket
Expand All @@ -151,16 +150,11 @@ jobs:
docker run --privileged --rm tonistiigi/binfmt --install linux/arm/v7
- name: "Run integration tests"
# The nested IPv6 network inside docker and qemu is complex and needs a bunch of sysctl config.
# Therefore it's hard to debug why the IPv6 tests fail in such an isolation layer.
# Therefore, it's hard to debug why the IPv6 tests fail in such an isolation layer.
# On the other side, using the host network is easier at configuration.
# Besides, each job is running on a different instance, which means using host network here
# is safe and has no side effects on others.
uses: nick-fields/retry@v3
with:
timeout_minutes: 30
max_attempts: 2
retry_on: error
command: docker run --network host -t --rm --privileged test-integration-ipv6
run: docker run --network host -t --rm --privileged test-integration ./test-integration.sh -test.only-ipv6

test-integration-rootless:
runs-on: "ubuntu-${{ matrix.ubuntu }}"
Expand Down Expand Up @@ -230,12 +224,9 @@ jobs:
fi
echo "WORKAROUND_ISSUE_622=${WORKAROUND_ISSUE_622}" >> "$GITHUB_ENV"
- name: "Test (network driver=slirp4netns, port driver=builtin)"
uses: nick-fields/retry@v3
with:
timeout_minutes: 30
max_attempts: 2
retry_on: error
command: docker run -t --rm --privileged -e WORKAROUND_ISSUE_622=${WORKAROUND_ISSUE_622} ${TEST_TARGET}
run: docker run -t --rm --privileged -e WORKAROUND_ISSUE_622=${WORKAROUND_ISSUE_622} ${TEST_TARGET} /test-integration-rootless.sh ./test-integration.sh
- name: "Test (network driver=slirp4netns, port driver=builtin) (flaky)"
run: docker run -t --rm --privileged -e WORKAROUND_ISSUE_622=${WORKAROUND_ISSUE_622} ${TEST_TARGET} /test-integration-rootless.sh ./test-integration.sh -test.only-flaky

cross:
runs-on: ubuntu-24.04
Expand Down Expand Up @@ -284,22 +275,13 @@ jobs:
- name: "Prepare integration test environment"
run: |
sudo apt-get install -y expect
go install -v gotest.tools/gotestsum@v1.12.0
- name: "Ensure that the integration test suite is compatible with Docker"
uses: nick-fields/retry@v3
with:
timeout_minutes: 30
max_attempts: 2
retry_on: error
# See https://github.com/containerd/nerdctl/blob/main/docs/testing/README.md#about-parallelization
command: go test -p 1 -timeout 20m -v -exec sudo ./cmd/nerdctl/... -args -test.target=docker -test.allow-kill-daemon
run: ./test-integration.sh -test.target=docker
- name: "Ensure that the IPv6 integration test suite is compatible with Docker"
uses: nick-fields/retry@v3
with:
timeout_minutes: 30
max_attempts: 2
retry_on: error
# See https://github.com/containerd/nerdctl/blob/main/docs/testing/README.md#about-parallelization
command: go test -p 1 -timeout 20m -v -exec sudo ./cmd/nerdctl/... -args -test.target=docker -test.allow-kill-daemon -test.only-ipv6
run: ./test-integration.sh -test.target=docker -test.only-ipv6
- name: "Ensure that the integration test suite is compatible with Docker (flaky only)"
run: ./test-integration.sh -test.target=docker -test.only-flaky

test-integration-windows:
runs-on: windows-2022
Expand All @@ -317,6 +299,7 @@ jobs:
cache: true
check-latest: true
- run: go install ./cmd/nerdctl
- run: go install -v gotest.tools/gotestsum@v1.12.0
- uses: actions/checkout@v4.2.1
with:
repository: containerd/containerd
Expand All @@ -330,10 +313,16 @@ jobs:
env:
ctrdVersion: 1.7.22
run: powershell hack/configure-windows-ci.ps1
# TODO: Run unit tests
# FIXME: there is a lot more to fix to run on windows
# - name: "Run unit tests"
# run: |
# go test -v ./pkg/...
- name: "Run integration tests"
# See https://github.com/containerd/nerdctl/blob/main/docs/testing/README.md#about-parallelization
run: go test -p 1 -v ./cmd/nerdctl/...
run: |
./test-integration.sh
- name: "Run integration tests (flaky)"
run: |
./test-integration.sh -test.only-flaky
test-integration-freebsd:
name: FreeBSD
Expand Down
12 changes: 2 additions & 10 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,8 @@ ARG DEBIAN_FRONTEND=noninteractive
# `expect` package contains `unbuffer(1)`, which is used for emulating TTY for testing
RUN apt-get update -qq && apt-get install -qq --no-install-recommends \
expect \
git
git \
make
COPY --from=goversion /GOVERSION /GOVERSION
ARG TARGETARCH
RUN curl -fsSL --proto '=https' --tlsv1.2 https://golang.org/dl/$(cat /GOVERSION).linux-${TARGETARCH:-amd64}.tar.gz | tar xzvC /usr/local
Expand Down Expand Up @@ -318,8 +319,6 @@ RUN curl -o nydus-static.tgz -fsSL --proto '=https' --tlsv1.2 "https://github.co
tar xzf nydus-static.tgz && \
mv nydus-static/nydus-image nydus-static/nydusd nydus-static/nydusify /usr/bin/ && \
rm nydus-static.tgz
CMD ["gotestsum", "--format=testname", "--rerun-fails=2", "--packages=./cmd/nerdctl/...", \
"--", "-timeout=60m", "-p", "1", "-args", "-test.allow-kill-daemon"]

FROM test-integration AS test-integration-rootless
# Install SSH for creating systemd user session.
Expand All @@ -342,17 +341,10 @@ RUN systemctl disable test-integration-ipfs-offline
VOLUME /home/rootless/.local/share
COPY ./Dockerfile.d/test-integration-rootless.sh /
RUN chmod a+rx /test-integration-rootless.sh
CMD ["/test-integration-rootless.sh", \
"gotestsum", "--format=testname", "--rerun-fails=2", "--packages=./cmd/nerdctl/...", \
"--", "-timeout=60m", "-p", "1", "-args", "-test.allow-kill-daemon"]

# test for CONTAINERD_ROOTLESS_ROOTLESSKIT_PORT_DRIVER=slirp4netns
FROM test-integration-rootless AS test-integration-rootless-port-slirp4netns
COPY ./Dockerfile.d/home_rootless_.config_systemd_user_containerd.service.d_port-slirp4netns.conf /home/rootless/.config/systemd/user/containerd.service.d/port-slirp4netns.conf
RUN chown -R rootless:rootless /home/rootless/.config

FROM test-integration AS test-integration-ipv6
CMD ["gotestsum", "--format=testname", "--rerun-fails=2", "--packages=./cmd/nerdctl/...", \
"--", "-timeout=60m", "-p", "1", "-args", "-test.allow-kill-daemon", "-test.only-ipv6"]

FROM base AS demo
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ lint-yaml:
lint-shell: $(call recursive_wildcard,$(MAKEFILE_DIR)/,*.sh)
shellcheck -a -x $^

test-unit:
go test -v $(MAKEFILE_DIR)/pkg/...

binaries: nerdctl

install:
Expand Down
6 changes: 5 additions & 1 deletion cmd/nerdctl/container/container_create_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,11 @@ func TestCreateWithTty(t *testing.T) {
func TestIssue2993(t *testing.T) {
testCase := nerdtest.Setup()

testCase.Require = test.Not(nerdtest.Docker)
testCase.Require = test.Require(
test.Not(nerdtest.Docker),
// Maybe the use of a custom data root has an impact?
nerdtest.IsFlaky("https://github.com/containerd/nerdctl/issues/3518"),
)

const (
containersPathKey = "containersPath"
Expand Down
3 changes: 3 additions & 0 deletions cmd/nerdctl/image/image_history_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ func TestImageHistory(t *testing.T) {
test.Not(test.Windows),
// XXX Currently, history does not work on non-native platform, so, we cannot test reliably on other platforms
test.Arm64,
// XXX this here is very likely breaking other tests because of one of the variants of
// https://github.com/containerd/nerdctl/issues/3513 so, making it private to try avoid that
nerdtest.Private,
),
Setup: func(data test.Data, helpers test.Helpers) {
helpers.Ensure("pull", "--platform", "linux/arm64", testutil.CommonImage)
Expand Down
2 changes: 1 addition & 1 deletion cmd/nerdctl/image/image_inspect_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ func TestImageInspectSimpleCases(t *testing.T) {
testCase := &test.Case{
Description: "TestImageInspect",
Setup: func(data test.Data, helpers test.Helpers) {
helpers.Ensure("pull", testutil.CommonImage)
helpers.Ensure("pull", "--quiet", testutil.CommonImage)
},
SubTests: []*test.Case{
{
Expand Down
5 changes: 5 additions & 0 deletions pkg/cmd/builder/build_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package builder

import (
"reflect"
"runtime"
"testing"

specs "github.com/opencontainers/image-spec/specs-go/v1"
Expand Down Expand Up @@ -213,6 +214,10 @@ func TestParseBuildctlArgsForOCILayout(t *testing.T) {
},
}

if runtime.GOOS == "windows" {
tests[1].expectedErr = "The system cannot find the path specified."
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
args, err := parseBuildContextFromOCILayout(test.ociLayoutName, test.ociLayoutPath)
Expand Down
22 changes: 22 additions & 0 deletions pkg/ocihook/ocihook.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import (
"github.com/containerd/nerdctl/v2/pkg/bypass4netnsutil"
"github.com/containerd/nerdctl/v2/pkg/dnsutil/hostsstore"
"github.com/containerd/nerdctl/v2/pkg/labels"
"github.com/containerd/nerdctl/v2/pkg/lockutil"
"github.com/containerd/nerdctl/v2/pkg/namestore"
"github.com/containerd/nerdctl/v2/pkg/netutil"
"github.com/containerd/nerdctl/v2/pkg/netutil/nettype"
Expand Down Expand Up @@ -92,6 +93,27 @@ func Run(stdin io.Reader, stderr io.Writer, event, dataStore, cniPath, cniNetcon
}
}()

// FIXME: CNI plugins are not safe to use concurrently
// See
// https://github.com/containerd/nerdctl/issues/3518
// https://github.com/containerd/nerdctl/issues/2908
// and likely others
// Fixing these issues would require a lot of work, possibly even stopping using individual cni binaries altogether
// or at least being very mindful in what operation we call inside CNIEnv at what point, with filesystem locking.
// This below is a stopgap solution that just enforces a global lock
// Note this here is probably not enough, as concurrent CNI operations may happen outside of the scope of ocihooks
// through explicit calls to Remove, etc.
lockDir := filepath.Join(dataStore, "cnilock")
err = os.MkdirAll(lockDir, 0o777)
if err != nil {
return err
}
lock, err := lockutil.Lock(lockDir)
if err != nil {
return err
}
defer lockutil.Unlock(lock)

opts, err := newHandlerOpts(&state, dataStore, cniPath, cniNetconfPath)
if err != nil {
return err
Expand Down
17 changes: 9 additions & 8 deletions pkg/store/filestore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,10 +169,11 @@ func TestFileStoreConcurrent(t *testing.T) {

go func() {
lErr := tempStore.WithLock(func() error {
err := tempStore.Set([]byte("routine 1"), "concurrentkey")
// Windows does not allow files starting with con
err := tempStore.Set([]byte("routine 1"), "c0ncurrentkey")
assert.NilError(t, err, "writing should not error")
time.Sleep(1 * time.Second)
result, err := tempStore.Get("concurrentkey")
result, err := tempStore.Get("c0ncurrentkey")
assert.NilError(t, err, "reading should not error")
assert.Assert(t, string(result) == "routine 1")
return nil
Expand All @@ -183,10 +184,10 @@ func TestFileStoreConcurrent(t *testing.T) {
go func() {
time.Sleep(500 * time.Millisecond)
lErr := tempStore.WithLock(func() error {
err := tempStore.Set([]byte("routine 2"), "concurrentkey")
err := tempStore.Set([]byte("routine 2"), "c0ncurrentkey")
assert.NilError(t, err, "writing should not error")
time.Sleep(1 * time.Second)
result, err := tempStore.Get("concurrentkey")
result, err := tempStore.Get("c0ncurrentkey")
assert.NilError(t, err, "reading should not error")
assert.Assert(t, string(result) == "routine 2")
return nil
Expand All @@ -195,10 +196,10 @@ func TestFileStoreConcurrent(t *testing.T) {
}()

lErr := tempStore.WithLock(func() error {
err := tempStore.Set([]byte("main routine 1"), "concurrentkey")
err := tempStore.Set([]byte("main routine 1"), "c0ncurrentkey")
assert.NilError(t, err, "writing should not error")
time.Sleep(1 * time.Second)
result, err := tempStore.Get("concurrentkey")
result, err := tempStore.Get("c0ncurrentkey")
assert.NilError(t, err, "reading should not error")
assert.Assert(t, string(result) == "main routine 1")
return nil
Expand All @@ -208,10 +209,10 @@ func TestFileStoreConcurrent(t *testing.T) {
time.Sleep(750 * time.Millisecond)

lErr = tempStore.WithLock(func() error {
err := tempStore.Set([]byte("main routine 2"), "concurrentkey")
err := tempStore.Set([]byte("main routine 2"), "c0ncurrentkey")
assert.NilError(t, err, "writing should not error")
time.Sleep(1 * time.Second)
result, err := tempStore.Get("concurrentkey")
result, err := tempStore.Get("c0ncurrentkey")
assert.NilError(t, err, "reading should not error")
assert.Assert(t, string(result) == "main routine 2")
return nil
Expand Down
3 changes: 3 additions & 0 deletions pkg/testutil/testutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -800,6 +800,9 @@ func newBase(t *testing.T, ns string, ipv6Compatible bool, kubernetesCompatible
} else if !base.EnableKubernetes && base.KubernetesCompatible {
t.Skip("runner skips Kubernetes compatible tests in the non-Kubernetes environment")
}
if !GetFlakyEnvironment() && !GetEnableKubernetes() && !GetEnableIPv6() {
t.Skip("legacy tests are considered flaky by default and are skipped unless in the flaky environment")
}
var err error
switch base.Target {
case Nerdctl:
Expand Down
36 changes: 36 additions & 0 deletions test-integration.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/usr/bin/env bash

# Copyright The containerd Authors.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# shellcheck disable=SC2034,SC2015
set -o errexit -o errtrace -o functrace -o nounset -o pipefail
root="$(cd "$(dirname "${BASH_SOURCE[0]:-$PWD}")" 2>/dev/null 1>&2 && pwd)"
readonly root
readonly timeout="60m"

# See https://github.com/containerd/nerdctl/blob/main/docs/testing/README.md#about-parallelization
args=(--format=testname --jsonfile /tmp/test-integration.log --packages="$root"/cmd/nerdctl/...)

for arg in "$@"; do
if [ "$arg" == "-test.only-flaky" ]; then
args+=("--rerun-fails=2")
break
fi
done

gotestsum "${args[@]}" -- -timeout="$timeout" -p 1 -args -test.allow-kill-daemon "$@"

echo "These are the tests that took more than 10 seconds:"
gotestsum tool slowest --threshold 10s --jsonfile /tmp/test-integration.log

0 comments on commit a2285c7

Please sign in to comment.