diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2fae92dcbfa..49b9f18a5ce 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -52,7 +52,7 @@ jobs: check-latest: true cache: true - name: "Run unit tests" - run: go test -v ./pkg/... + run: make test-unit test-integration: runs-on: "${{ matrix.runner }}" @@ -99,12 +99,15 @@ jobs: docker run --privileged --rm tonistiigi/binfmt --install linux/arm64 docker run --privileged --rm tonistiigi/binfmt --install linux/arm/v7 - name: "Run integration tests" + run: | + docker run -t --rm --privileged test-integration ./test-integration.sh + - name: "Run integration tests (flaky)" uses: nick-fields/retry@v3 with: timeout_minutes: 30 max_attempts: 2 retry_on: error - command: docker run -t --rm --privileged test-integration + command: docker run -t --rm --privileged test-integration ./test-integration.sh -test.only-flaky test-integration-ipv6: runs-on: "ubuntu-${{ matrix.ubuntu }}" @@ -133,7 +136,7 @@ jobs: echo '{"ipv6": true, "fixed-cidr-v6": "2001:db8:1::/64", "experimental": true, "ip6tables": true}' | sudo tee /etc/docker/daemon.json sudo systemctl restart docker - name: "Prepare integration test environment" - run: docker build -t test-integration-ipv6 --target test-integration-ipv6 --build-arg UBUNTU_VERSION=${UBUNTU_VERSION} --build-arg CONTAINERD_VERSION=${CONTAINERD_VERSION} . + run: docker build -t test-integration --target test-integration --build-arg UBUNTU_VERSION=${UBUNTU_VERSION} --build-arg CONTAINERD_VERSION=${CONTAINERD_VERSION} . - name: "Remove snap loopback devices (conflicts with our loopback devices in TestRunDevice)" run: | sudo systemctl disable --now snapd.service snapd.socket @@ -151,16 +154,11 @@ jobs: docker run --privileged --rm tonistiigi/binfmt --install linux/arm/v7 - name: "Run integration tests" # The nested IPv6 network inside docker and qemu is complex and needs a bunch of sysctl config. - # Therefore it's hard to debug why the IPv6 tests fail in such an isolation layer. + # Therefore, it's hard to debug why the IPv6 tests fail in such an isolation layer. # On the other side, using the host network is easier at configuration. # Besides, each job is running on a different instance, which means using host network here # is safe and has no side effects on others. - uses: nick-fields/retry@v3 - with: - timeout_minutes: 30 - max_attempts: 2 - retry_on: error - command: docker run --network host -t --rm --privileged test-integration-ipv6 + run: docker run --network host -t --rm --privileged test-integration ./test-integration.sh -test.only-ipv6 test-integration-rootless: runs-on: "ubuntu-${{ matrix.ubuntu }}" @@ -230,12 +228,14 @@ jobs: fi echo "WORKAROUND_ISSUE_622=${WORKAROUND_ISSUE_622}" >> "$GITHUB_ENV" - name: "Test (network driver=slirp4netns, port driver=builtin)" + run: docker run -t --rm --privileged -e WORKAROUND_ISSUE_622=${WORKAROUND_ISSUE_622} ${TEST_TARGET} /test-integration-rootless.sh ./test-integration.sh + - name: "Test (network driver=slirp4netns, port driver=builtin) (flaky)" uses: nick-fields/retry@v3 with: timeout_minutes: 30 max_attempts: 2 retry_on: error - command: docker run -t --rm --privileged -e WORKAROUND_ISSUE_622=${WORKAROUND_ISSUE_622} ${TEST_TARGET} + command: docker run -t --rm --privileged -e WORKAROUND_ISSUE_622=${WORKAROUND_ISSUE_622} ${TEST_TARGET} /test-integration-rootless.sh ./test-integration.sh -test.only-flaky cross: runs-on: ubuntu-24.04 @@ -284,22 +284,18 @@ jobs: - name: "Prepare integration test environment" run: | sudo apt-get install -y expect + go install -v gotest.tools/gotestsum@v1.12.0 - name: "Ensure that the integration test suite is compatible with Docker" - uses: nick-fields/retry@v3 - with: - timeout_minutes: 30 - max_attempts: 2 - retry_on: error - # See https://github.com/containerd/nerdctl/blob/main/docs/testing/README.md#about-parallelization - command: go test -p 1 -timeout 20m -v -exec sudo ./cmd/nerdctl/... -args -test.target=docker -test.allow-kill-daemon + run: ./test-integration.sh -test.target=docker - name: "Ensure that the IPv6 integration test suite is compatible with Docker" + run: ./test-integration.sh -test.target=docker -test.only-ipv6 + - name: "Ensure that the integration test suite is compatible with Docker (flaky only)" uses: nick-fields/retry@v3 with: timeout_minutes: 30 max_attempts: 2 retry_on: error - # See https://github.com/containerd/nerdctl/blob/main/docs/testing/README.md#about-parallelization - command: go test -p 1 -timeout 20m -v -exec sudo ./cmd/nerdctl/... -args -test.target=docker -test.allow-kill-daemon -test.only-ipv6 + command: ./test-integration.sh -test.target=docker -test.only-flaky test-integration-windows: runs-on: windows-2022 @@ -317,6 +313,7 @@ jobs: cache: true check-latest: true - run: go install ./cmd/nerdctl + - run: go install -v gotest.tools/gotestsum@v1.12.0 - uses: actions/checkout@v4.2.1 with: repository: containerd/containerd @@ -330,10 +327,20 @@ jobs: env: ctrdVersion: 1.7.22 run: powershell hack/configure-windows-ci.ps1 - # TODO: Run unit tests + # FIXME: there is a lot more to fix to run on windows + # - name: "Run unit tests" + # run: | + # go test -v ./pkg/... - name: "Run integration tests" - # See https://github.com/containerd/nerdctl/blob/main/docs/testing/README.md#about-parallelization - run: go test -p 1 -v ./cmd/nerdctl/... + run: | + ./test-integration.sh + - name: "Run integration tests (flaky)" + uses: nick-fields/retry@v3 + with: + timeout_minutes: 30 + max_attempts: 2 + retry_on: error + command: ./test-integration.sh -test.only-flaky test-integration-freebsd: name: FreeBSD diff --git a/Dockerfile b/Dockerfile index 7e7d8b8554c..d986978c635 100644 --- a/Dockerfile +++ b/Dockerfile @@ -281,7 +281,8 @@ ARG DEBIAN_FRONTEND=noninteractive # `expect` package contains `unbuffer(1)`, which is used for emulating TTY for testing RUN apt-get update -qq && apt-get install -qq --no-install-recommends \ expect \ - git + git \ + make COPY --from=goversion /GOVERSION /GOVERSION ARG TARGETARCH RUN curl -fsSL --proto '=https' --tlsv1.2 https://golang.org/dl/$(cat /GOVERSION).linux-${TARGETARCH:-amd64}.tar.gz | tar xzvC /usr/local @@ -318,8 +319,6 @@ RUN curl -o nydus-static.tgz -fsSL --proto '=https' --tlsv1.2 "https://github.co tar xzf nydus-static.tgz && \ mv nydus-static/nydus-image nydus-static/nydusd nydus-static/nydusify /usr/bin/ && \ rm nydus-static.tgz -CMD ["gotestsum", "--format=testname", "--rerun-fails=2", "--packages=./cmd/nerdctl/...", \ - "--", "-timeout=60m", "-p", "1", "-args", "-test.allow-kill-daemon"] FROM test-integration AS test-integration-rootless # Install SSH for creating systemd user session. @@ -342,17 +341,10 @@ RUN systemctl disable test-integration-ipfs-offline VOLUME /home/rootless/.local/share COPY ./Dockerfile.d/test-integration-rootless.sh / RUN chmod a+rx /test-integration-rootless.sh -CMD ["/test-integration-rootless.sh", \ - "gotestsum", "--format=testname", "--rerun-fails=2", "--packages=./cmd/nerdctl/...", \ - "--", "-timeout=60m", "-p", "1", "-args", "-test.allow-kill-daemon"] # test for CONTAINERD_ROOTLESS_ROOTLESSKIT_PORT_DRIVER=slirp4netns FROM test-integration-rootless AS test-integration-rootless-port-slirp4netns COPY ./Dockerfile.d/home_rootless_.config_systemd_user_containerd.service.d_port-slirp4netns.conf /home/rootless/.config/systemd/user/containerd.service.d/port-slirp4netns.conf RUN chown -R rootless:rootless /home/rootless/.config -FROM test-integration AS test-integration-ipv6 -CMD ["gotestsum", "--format=testname", "--rerun-fails=2", "--packages=./cmd/nerdctl/...", \ - "--", "-timeout=60m", "-p", "1", "-args", "-test.allow-kill-daemon", "-test.only-ipv6"] - FROM base AS demo diff --git a/Makefile b/Makefile index 0831c640047..ae4e18c94f3 100644 --- a/Makefile +++ b/Makefile @@ -86,6 +86,9 @@ lint-yaml: lint-shell: $(call recursive_wildcard,$(MAKEFILE_DIR)/,*.sh) shellcheck -a -x $^ +test-unit: + go test -v $(MAKEFILE_DIR)/pkg/... + binaries: nerdctl install: diff --git a/cmd/nerdctl/container/container_create_linux_test.go b/cmd/nerdctl/container/container_create_linux_test.go index 56b71ce854a..e0823ec3af0 100644 --- a/cmd/nerdctl/container/container_create_linux_test.go +++ b/cmd/nerdctl/container/container_create_linux_test.go @@ -187,7 +187,11 @@ func TestCreateWithTty(t *testing.T) { func TestIssue2993(t *testing.T) { testCase := nerdtest.Setup() - testCase.Require = test.Not(nerdtest.Docker) + testCase.Require = test.Require( + test.Not(nerdtest.Docker), + // Maybe the use of a custom data root has an impact? + nerdtest.IsFlaky("https://github.com/containerd/nerdctl/issues/3518"), + ) const ( containersPathKey = "containersPath" diff --git a/cmd/nerdctl/image/image_history_test.go b/cmd/nerdctl/image/image_history_test.go index 1d2e07e97fc..5086b3edb9f 100644 --- a/cmd/nerdctl/image/image_history_test.go +++ b/cmd/nerdctl/image/image_history_test.go @@ -76,6 +76,9 @@ func TestImageHistory(t *testing.T) { test.Not(test.Windows), // XXX Currently, history does not work on non-native platform, so, we cannot test reliably on other platforms test.Arm64, + // XXX this here is very likely breaking other tests because of one of the variants of + // https://github.com/containerd/nerdctl/issues/3513 so, making it private to try avoid that + nerdtest.Private, ), Setup: func(data test.Data, helpers test.Helpers) { helpers.Ensure("pull", "--platform", "linux/arm64", testutil.CommonImage) diff --git a/cmd/nerdctl/image/image_inspect_test.go b/cmd/nerdctl/image/image_inspect_test.go index 93278de9294..68ee93b0534 100644 --- a/cmd/nerdctl/image/image_inspect_test.go +++ b/cmd/nerdctl/image/image_inspect_test.go @@ -35,7 +35,7 @@ func TestImageInspectSimpleCases(t *testing.T) { testCase := &test.Case{ Description: "TestImageInspect", Setup: func(data test.Data, helpers test.Helpers) { - helpers.Ensure("pull", testutil.CommonImage) + helpers.Ensure("pull", "--quiet", testutil.CommonImage) }, SubTests: []*test.Case{ { diff --git a/pkg/cmd/builder/build_test.go b/pkg/cmd/builder/build_test.go index 954738cdbc6..97c89cd8aeb 100644 --- a/pkg/cmd/builder/build_test.go +++ b/pkg/cmd/builder/build_test.go @@ -18,6 +18,7 @@ package builder import ( "reflect" + "runtime" "testing" specs "github.com/opencontainers/image-spec/specs-go/v1" @@ -213,6 +214,10 @@ func TestParseBuildctlArgsForOCILayout(t *testing.T) { }, } + if runtime.GOOS == "windows" { + tests[1].expectedErr = "The system cannot find the path specified." + } + for _, test := range tests { t.Run(test.name, func(t *testing.T) { args, err := parseBuildContextFromOCILayout(test.ociLayoutName, test.ociLayoutPath) diff --git a/pkg/ocihook/ocihook.go b/pkg/ocihook/ocihook.go index ffa98041611..2ae963c997a 100644 --- a/pkg/ocihook/ocihook.go +++ b/pkg/ocihook/ocihook.go @@ -39,6 +39,7 @@ import ( "github.com/containerd/nerdctl/v2/pkg/bypass4netnsutil" "github.com/containerd/nerdctl/v2/pkg/dnsutil/hostsstore" "github.com/containerd/nerdctl/v2/pkg/labels" + "github.com/containerd/nerdctl/v2/pkg/lockutil" "github.com/containerd/nerdctl/v2/pkg/namestore" "github.com/containerd/nerdctl/v2/pkg/netutil" "github.com/containerd/nerdctl/v2/pkg/netutil/nettype" @@ -92,6 +93,27 @@ func Run(stdin io.Reader, stderr io.Writer, event, dataStore, cniPath, cniNetcon } }() + // FIXME: CNI plugins are not safe to use concurrently + // See + // https://github.com/containerd/nerdctl/issues/3518 + // https://github.com/containerd/nerdctl/issues/2908 + // and likely others + // Fixing these issues would require a lot of work, possibly even stopping using individual cni binaries altogether + // or at least being very mindful in what operation we call inside CNIEnv at what point, with filesystem locking. + // This below is a stopgap solution that just enforces a global lock + // Note this here is probably not enough, as concurrent CNI operations may happen outside of the scope of ocihooks + // through explicit calls to Remove, etc. + lockDir := filepath.Join(dataStore, "cnilock") + err = os.MkdirAll(lockDir, 0o777) + if err != nil { + return err + } + lock, err := lockutil.Lock(lockDir) + if err != nil { + return err + } + defer lockutil.Unlock(lock) + opts, err := newHandlerOpts(&state, dataStore, cniPath, cniNetconfPath) if err != nil { return err diff --git a/pkg/store/filestore_test.go b/pkg/store/filestore_test.go index 2496b96cbb1..c39fa749b44 100644 --- a/pkg/store/filestore_test.go +++ b/pkg/store/filestore_test.go @@ -169,10 +169,11 @@ func TestFileStoreConcurrent(t *testing.T) { go func() { lErr := tempStore.WithLock(func() error { - err := tempStore.Set([]byte("routine 1"), "concurrentkey") + // Windows does not allow files starting with con + err := tempStore.Set([]byte("routine 1"), "c0ncurrentkey") assert.NilError(t, err, "writing should not error") time.Sleep(1 * time.Second) - result, err := tempStore.Get("concurrentkey") + result, err := tempStore.Get("c0ncurrentkey") assert.NilError(t, err, "reading should not error") assert.Assert(t, string(result) == "routine 1") return nil @@ -183,10 +184,10 @@ func TestFileStoreConcurrent(t *testing.T) { go func() { time.Sleep(500 * time.Millisecond) lErr := tempStore.WithLock(func() error { - err := tempStore.Set([]byte("routine 2"), "concurrentkey") + err := tempStore.Set([]byte("routine 2"), "c0ncurrentkey") assert.NilError(t, err, "writing should not error") time.Sleep(1 * time.Second) - result, err := tempStore.Get("concurrentkey") + result, err := tempStore.Get("c0ncurrentkey") assert.NilError(t, err, "reading should not error") assert.Assert(t, string(result) == "routine 2") return nil @@ -195,10 +196,10 @@ func TestFileStoreConcurrent(t *testing.T) { }() lErr := tempStore.WithLock(func() error { - err := tempStore.Set([]byte("main routine 1"), "concurrentkey") + err := tempStore.Set([]byte("main routine 1"), "c0ncurrentkey") assert.NilError(t, err, "writing should not error") time.Sleep(1 * time.Second) - result, err := tempStore.Get("concurrentkey") + result, err := tempStore.Get("c0ncurrentkey") assert.NilError(t, err, "reading should not error") assert.Assert(t, string(result) == "main routine 1") return nil @@ -208,10 +209,10 @@ func TestFileStoreConcurrent(t *testing.T) { time.Sleep(750 * time.Millisecond) lErr = tempStore.WithLock(func() error { - err := tempStore.Set([]byte("main routine 2"), "concurrentkey") + err := tempStore.Set([]byte("main routine 2"), "c0ncurrentkey") assert.NilError(t, err, "writing should not error") time.Sleep(1 * time.Second) - result, err := tempStore.Get("concurrentkey") + result, err := tempStore.Get("c0ncurrentkey") assert.NilError(t, err, "reading should not error") assert.Assert(t, string(result) == "main routine 2") return nil diff --git a/pkg/testutil/testutil.go b/pkg/testutil/testutil.go index 4287db9b305..46f4459934b 100644 --- a/pkg/testutil/testutil.go +++ b/pkg/testutil/testutil.go @@ -800,6 +800,9 @@ func newBase(t *testing.T, ns string, ipv6Compatible bool, kubernetesCompatible } else if !base.EnableKubernetes && base.KubernetesCompatible { t.Skip("runner skips Kubernetes compatible tests in the non-Kubernetes environment") } + if !GetFlakyEnvironment() && !GetEnableKubernetes() && !GetEnableIPv6() { + t.Skip("legacy tests are considered flaky by default and are skipped unless in the flaky environment") + } var err error switch base.Target { case Nerdctl: diff --git a/test-integration.sh b/test-integration.sh new file mode 100755 index 00000000000..2eee4a44443 --- /dev/null +++ b/test-integration.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +# Copyright The containerd Authors. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# shellcheck disable=SC2034,SC2015 +set -o errexit -o errtrace -o functrace -o nounset -o pipefail +root="$(cd "$(dirname "${BASH_SOURCE[0]:-$PWD}")" 2>/dev/null 1>&2 && pwd)" +readonly root + +readonly timeout="60m" + +# See https://github.com/containerd/nerdctl/blob/main/docs/testing/README.md#about-parallelization +gotestsum --format=testname --jsonfile /tmp/test-integration.log --packages="$root"/cmd/nerdctl/... -- -timeout="$timeout" -p 1 -args -test.allow-kill-daemon "$@" + +echo "These are the tests that took more than 10 seconds:" +gotestsum tool slowest --threshold 10s --jsonfile /tmp/test-integration.log