From f30391c170f120348021d58c1cd6be2eda94158c Mon Sep 17 00:00:00 2001 From: Taishi Kasuga Date: Sat, 28 Sep 2024 15:55:12 +0900 Subject: [PATCH] fix --- .github/workflows/test.yaml | 36 +++++++++++++++---------------- bin/pubsub | 1 + test/test_against_cluster_down.rb | 26 ++++++++++++++++++++-- 3 files changed, 43 insertions(+), 20 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 733a123..d24d439 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -67,9 +67,9 @@ jobs: ruby-version: ${{ matrix.ruby || '3.3' }} bundler-cache: true - name: Pull Docker images - run: docker compose -f $DOCKER_COMPOSE_FILE pull + run: docker compose --progress quiet -f $DOCKER_COMPOSE_FILE pull - name: Run containers - run: docker compose -f $DOCKER_COMPOSE_FILE up -d + run: docker compose --progress quiet -f $DOCKER_COMPOSE_FILE up -d - name: Wait for Redis cluster to be ready run: bundle exec rake wait - name: Print containers @@ -77,7 +77,7 @@ jobs: - name: Run minitest run: bundle exec rake ${{ matrix.task || 'test' }} - name: Stop containers - run: docker compose -f $DOCKER_COMPOSE_FILE down || true + run: docker compose --progress quiet -f $DOCKER_COMPOSE_FILE down || true nat-ted-env: name: NAT-ted Environments timeout-minutes: 5 @@ -98,9 +98,9 @@ jobs: host_ip_addr=$(ip a | grep eth0 | grep inet | awk '{print $2}' | cut -d'/' -f1) echo "HOST_IP_ADDR=$host_ip_addr" >> $GITHUB_ENV - name: Pull Docker images - run: docker compose -f $DOCKER_COMPOSE_FILE pull + run: docker compose --progress quiet -f $DOCKER_COMPOSE_FILE pull - name: Run containers - run: docker compose -f $DOCKER_COMPOSE_FILE up -d + run: docker compose --progress quiet -f $DOCKER_COMPOSE_FILE up -d env: HOST_ADDR: ${{ env.HOST_IP_ADDR }} - name: Wait for nodes to be ready @@ -135,7 +135,7 @@ jobs: - name: Run minitest run: bundle exec rake test - name: Stop containers - run: docker compose -f $DOCKER_COMPOSE_FILE down || true + run: docker compose --progress quiet -f $DOCKER_COMPOSE_FILE down || true lint: name: Lint timeout-minutes: 5 @@ -169,9 +169,9 @@ jobs: ruby-version: '3.3' bundler-cache: true - name: Pull Docker images - run: docker compose -f $DOCKER_COMPOSE_FILE pull + run: docker compose --progress quiet -f $DOCKER_COMPOSE_FILE pull - name: Run containers - run: docker compose -f $DOCKER_COMPOSE_FILE up -d + run: docker compose --progress quiet -f $DOCKER_COMPOSE_FILE up -d - name: Wait for Redis cluster to be ready run: bundle exec rake wait - name: Print containers @@ -209,7 +209,7 @@ jobs: docker compose -f $DOCKER_COMPOSE_FILE exec node$i tc qdisc del dev eth0 root netem || true done - name: Stop containers - run: docker compose -f $DOCKER_COMPOSE_FILE down || true + run: docker compose --progress quiet -f $DOCKER_COMPOSE_FILE down || true ips: name: IPS timeout-minutes: 10 @@ -229,9 +229,9 @@ jobs: ruby-version: '3.3' bundler-cache: true - name: Pull Docker images - run: docker compose -f $DOCKER_COMPOSE_FILE pull + run: docker compose --progress quiet -f $DOCKER_COMPOSE_FILE pull - name: Run containers - run: docker compose -f $DOCKER_COMPOSE_FILE up -d + run: docker compose --progress quiet -f $DOCKER_COMPOSE_FILE up -d - name: Wait for Redis cluster to be ready run: bundle exec rake wait - name: Print containers @@ -241,7 +241,7 @@ jobs: - name: Run iteration per second run: bundle exec rake ips - name: Stop containers - run: docker compose -f $DOCKER_COMPOSE_FILE down || true + run: docker compose --progress quiet -f $DOCKER_COMPOSE_FILE down || true profiling: name: Profiling timeout-minutes: 5 @@ -267,9 +267,9 @@ jobs: ruby-version: '3.3' bundler-cache: true - name: Pull Docker images - run: docker compose -f $DOCKER_COMPOSE_FILE pull + run: docker compose --progress quiet -f $DOCKER_COMPOSE_FILE pull - name: Run containers - run: docker compose -f $DOCKER_COMPOSE_FILE up -d + run: docker compose --progress quiet -f $DOCKER_COMPOSE_FILE up -d - name: Wait for Redis cluster to be ready run: bundle exec rake wait - name: Print containers @@ -279,7 +279,7 @@ jobs: env: PROFILE_MODE: ${{ matrix.mode }} - name: Stop containers - run: docker compose -f $DOCKER_COMPOSE_FILE down || true + run: docker compose --progress quiet -f $DOCKER_COMPOSE_FILE down || true massive: name: Massive Cluster timeout-minutes: 10 @@ -323,9 +323,9 @@ jobs: sudo sysctl -w net.ipv4.tcp_max_syn_backlog=1024 # backlog setting sudo sysctl -w net.core.somaxconn=1024 # up the number of connections per port - name: Pull Docker images - run: docker compose -f $DOCKER_COMPOSE_FILE pull + run: docker compose --progress quiet -f $DOCKER_COMPOSE_FILE pull - name: Run containers - run: docker compose -f $DOCKER_COMPOSE_FILE up -d + run: docker compose --progress quiet -f $DOCKER_COMPOSE_FILE up -d - name: Print memory info run: free -w - name: Wait for Redis cluster to be ready @@ -339,4 +339,4 @@ jobs: env: PROFILE_MODE: pipelining_in_moderation - name: Stop containers - run: docker compose -f $DOCKER_COMPOSE_FILE down || true + run: docker compose --progress quiet -f $DOCKER_COMPOSE_FILE down || true diff --git a/bin/pubsub b/bin/pubsub index e19a53a..98edb81 100755 --- a/bin/pubsub +++ b/bin/pubsub @@ -55,6 +55,7 @@ module PubSubDebug end rescue StandardError => e log "#{role}: dead: #{e.class}: #{e.message}" + ps&.close raise end end diff --git a/test/test_against_cluster_down.rb b/test/test_against_cluster_down.rb index b770078..a1bb0f1 100644 --- a/test/test_against_cluster_down.rb +++ b/test/test_against_cluster_down.rb @@ -3,7 +3,7 @@ require 'testing_helper' class TestAgainstClusterDown < TestingWrapper - WAIT_SEC = 1 + WAIT_SEC = 0.1 def setup @captured_commands = ::Middlewares::CommandCapture::CommandBuffer.new @@ -15,6 +15,8 @@ def setup @last_pubsub_message = nil @down_counter_lock = Mutex.new @pubsub_message_lock = Mutex.new + @captured_commands.clear + @redirect_count.clear end def teardown @@ -39,6 +41,11 @@ def test_recoverability_from_cluster_down @controller = build_controller @controller.wait_for_cluster_to_be_ready + wait_for_threads_to_be_stable + + refute(refer_down_count.zero?, 'Case: cluster down count') + refute(@captured_commands.count('cluster', 'nodes').zero?, 'Case: cluster nodes calls') + client = build_client @clients << client @@ -47,7 +54,7 @@ def test_recoverability_from_cluster_down transaction_value1 = client.call('get', 'transaction', &:to_i) pubsub_message1 = refer_pubsub_message.to_i - sleep WAIT_SEC * 3 + sleep WAIT_SEC * 30 single_value2 = client.call('get', 'single', &:to_i) pipeline_value2 = client.call('get', 'pipeline', &:to_i) @@ -194,7 +201,22 @@ def update_pubsub_message(message) @pubsub_message_lock.synchronize { @last_pubsub_message = message } end + def refer_down_count + @down_counter_lock.synchronize { @cluster_down_error_count } + end + def refer_pubsub_message @pubsub_message_lock.synchronize { @last_pubsub_message } end + + def wait_for_threads_to_be_stable(attempts: 30) + loop do + raise MaxRetryExceeded if attempts <= 0 + + attempts -= 1 + before = refer_down_count + sleep WAIT_SEC * (@threads.size * 2) + break if before == refer_down_count + end + end end