From 395dfaecfb42d9f7117f5f633ed3ae4556d67010 Mon Sep 17 00:00:00 2001 From: Taishi Kasuga Date: Wed, 2 Oct 2024 10:38:53 +0900 Subject: [PATCH 1/7] test: move a test case to the right place --- bin/pubsub | 2 +- test/cluster_controller.rb | 4 +++- test/test_against_cluster_broken.rb | 31 ++++++++++++++++++++--------- test/test_against_cluster_state.rb | 8 -------- 4 files changed, 26 insertions(+), 19 deletions(-) diff --git a/bin/pubsub b/bin/pubsub index bd19f38..d043a71 100755 --- a/bin/pubsub +++ b/bin/pubsub @@ -49,7 +49,7 @@ module PubSubDebug handle_errors('Subscriber') do e = ps.next_event(0.01) log "#{role}: recv: #{e.nil? ? 'nil' : e}" - ps.call('ssubscribe', c) if e.first == 'sunsubscribe' + ps.call('ssubscribe', c) if !e.nil? && e.first == 'sunsubscribe' end ensure sleep 1.0 diff --git a/test/cluster_controller.rb b/test/cluster_controller.rb index 0ddb752..69e3697 100644 --- a/test/cluster_controller.rb +++ b/test/cluster_controller.rb @@ -398,9 +398,11 @@ def wait_failover(clients, primary_node_key:, replica_node_key:, max_attempts:) def wait_replication_delay(clients, replica_size:, timeout:) timeout_msec = timeout.to_i * 1000 + server_side_timeout = timeout_msec > 100 ? timeout_msec - 100 : 10 + wait_for_state(clients, max_attempts: clients.size + 1) do |client| swap_timeout(client, timeout: 0.1) do |cli| - cli.blocking_call(timeout, 'WAIT', replica_size, timeout_msec - 100) if primary_client?(cli) + cli.blocking_call(timeout, 'WAIT', replica_size, server_side_timeout) if primary_client?(cli) end true rescue ::RedisClient::ConnectionError diff --git a/test/test_against_cluster_broken.rb b/test/test_against_cluster_broken.rb index 164a7cd..73f32ed 100644 --- a/test/test_against_cluster_broken.rb +++ b/test/test_against_cluster_broken.rb @@ -21,9 +21,7 @@ def setup print "\n" @logger.info('setup: test') prepare_test_data - @clients = Array.new(3) do - build_client.tap { |c| c.call('echo', 'init') } - end + @clients = Array.new(3) { build_client.tap { |c| c.call('echo', 'init') } } end def teardown @@ -32,28 +30,29 @@ def teardown @clients.each(&:close) @controller&.close refute(@captured_commands.count('cluster', 'nodes').zero?, @captured_commands.to_a.map(&:command)) - print "#{@redirect_count.get}, "\ - "ClusterNodesCall: #{@captured_commands.count('cluster', 'nodes')}, "\ - "ClusterDownError: #{@cluster_down_error_count} = " end def test_client_patience + failover_manually + wait_for_cluster_to_be_ready + do_assertions(offset: 0) + # a replica sacrifice_replica = @controller.select_sacrifice_of_replica kill_a_node(sacrifice_replica) wait_for_cluster_to_be_ready(ignore: [sacrifice_replica]) - do_assertions(offset: 0) + do_assertions(offset: 1) # a primary sacrifice_primary = @controller.select_sacrifice_of_primary kill_a_node(sacrifice_primary) wait_for_cluster_to_be_ready(ignore: [sacrifice_replica, sacrifice_primary]) - do_assertions(offset: 1) + do_assertions(offset: 2) # recovery revive_dead_nodes wait_for_cluster_to_be_ready - do_assertions(offset: 2) + do_assertions(offset: 3) end private @@ -122,6 +121,8 @@ def do_assertions(offset:) assert_equal(want, got, 'Case: Transaction: SET') end end + + log_metrics end end @@ -139,6 +140,12 @@ def wait_for_cluster_to_be_ready(ignore: []) end end + def failover_manually + log_info('failover') do + @controller.failover + end + end + def kill_a_node(sacrifice) log_info("kill #{sacrifice.config.host}:#{sacrifice.config.port}") do refute_nil(sacrifice, "#{sacrifice.config.host}:#{sacrifice.config.port}") @@ -171,6 +178,12 @@ def log_info(message) @logger.info(" done: #{message}") end + def log_metrics + print "#{@redirect_count.get}, "\ + "ClusterNodesCall: #{@captured_commands.count('cluster', 'nodes')}, "\ + "ClusterDownError: #{@cluster_down_error_count}\n" + end + def retryable(attempts: MAX_ATTEMPTS, wait_sec: WAIT_SEC) loop do raise MaxRetryExceeded if attempts <= 0 diff --git a/test/test_against_cluster_state.rb b/test/test_against_cluster_state.rb index 5ebdeb2..f7e2c4f 100644 --- a/test/test_against_cluster_state.rb +++ b/test/test_against_cluster_state.rb @@ -29,14 +29,6 @@ def teardown "ClusterNodesCall: #{@captured_commands.count('cluster', 'nodes')} = " end - def test_the_state_of_cluster_failover - @controller.failover - 1000.times { |i| assert_equal('OK', @client.call('SET', "key#{i}", i)) } - wait_for_replication - 1000.times { |i| assert_equal(i.to_s, @client.call('GET', "key#{i}")) } - refute(@redirect_count.zero?, @redirect_count.get) - end - def test_the_state_of_cluster_resharding resharded_keys = nil do_resharding_test do |keys| From 4a7d2bec105f4e3c8f60f60a9c8c61464e8b63c4 Mon Sep 17 00:00:00 2001 From: Taishi Kasuga Date: Wed, 2 Oct 2024 10:47:49 +0900 Subject: [PATCH 2/7] fix --- test/test_against_cluster_broken.rb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/test_against_cluster_broken.rb b/test/test_against_cluster_broken.rb index 73f32ed..d278ad6 100644 --- a/test/test_against_cluster_broken.rb +++ b/test/test_against_cluster_broken.rb @@ -76,6 +76,9 @@ def prepare_test_data end def do_assertions(offset:) + @captured_commands.clear + @redirect_count.clear + log_info('assertions') do log_info('assertions: single') do NUMBER_OF_KEYS.times do |i| From 1ba6a49dc4137d760d4602449e0e967ef7a4586b Mon Sep 17 00:00:00 2001 From: Taishi Kasuga Date: Wed, 2 Oct 2024 10:50:10 +0900 Subject: [PATCH 3/7] fix --- test/test_against_cluster_broken.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_against_cluster_broken.rb b/test/test_against_cluster_broken.rb index d278ad6..ac4e671 100644 --- a/test/test_against_cluster_broken.rb +++ b/test/test_against_cluster_broken.rb @@ -33,7 +33,7 @@ def teardown end def test_client_patience - failover_manually + do_manual_failover wait_for_cluster_to_be_ready do_assertions(offset: 0) @@ -143,7 +143,7 @@ def wait_for_cluster_to_be_ready(ignore: []) end end - def failover_manually + def do_manual_failover log_info('failover') do @controller.failover end From 1439ba341a516976038c8f315a71b7432fbf6e20 Mon Sep 17 00:00:00 2001 From: Taishi Kasuga Date: Wed, 2 Oct 2024 10:53:56 +0900 Subject: [PATCH 4/7] fix --- test/test_against_cluster_broken.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_against_cluster_broken.rb b/test/test_against_cluster_broken.rb index ac4e671..e97427a 100644 --- a/test/test_against_cluster_broken.rb +++ b/test/test_against_cluster_broken.rb @@ -78,6 +78,7 @@ def prepare_test_data def do_assertions(offset:) @captured_commands.clear @redirect_count.clear + @cluster_down_error_count = 0 log_info('assertions') do log_info('assertions: single') do From 4ca2865ebcdb3f4c017e9638f2c6e570f168b6c6 Mon Sep 17 00:00:00 2001 From: Taishi Kasuga Date: Wed, 2 Oct 2024 10:58:06 +0900 Subject: [PATCH 5/7] fix --- test/test_against_cluster_broken.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/test/test_against_cluster_broken.rb b/test/test_against_cluster_broken.rb index e97427a..757dee9 100644 --- a/test/test_against_cluster_broken.rb +++ b/test/test_against_cluster_broken.rb @@ -29,7 +29,6 @@ def teardown revive_dead_nodes @clients.each(&:close) @controller&.close - refute(@captured_commands.count('cluster', 'nodes').zero?, @captured_commands.to_a.map(&:command)) end def test_client_patience From d348e9b4f1e8606faf3fd74f261080954b4064ef Mon Sep 17 00:00:00 2001 From: Taishi Kasuga Date: Wed, 2 Oct 2024 11:06:48 +0900 Subject: [PATCH 6/7] fix --- Gemfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Gemfile b/Gemfile index b872899..75902e5 100644 --- a/Gemfile +++ b/Gemfile @@ -6,6 +6,7 @@ gemspec name: 'redis-cluster-client' gem 'async-redis', platform: :mri gem 'benchmark-ips' gem 'hiredis-client', '~> 0.6' +gem 'logger' gem 'memory_profiler' gem 'minitest' gem 'rake' From 6a417a6d4845f2fc90269d2eeb7c69daedff46fd Mon Sep 17 00:00:00 2001 From: Taishi Kasuga Date: Wed, 2 Oct 2024 11:16:57 +0900 Subject: [PATCH 7/7] fix --- test/redis_client/test_cluster.rb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/redis_client/test_cluster.rb b/test/redis_client/test_cluster.rb index bfa0c0a..7a787e2 100644 --- a/test/redis_client/test_cluster.rb +++ b/test/redis_client/test_cluster.rb @@ -580,6 +580,7 @@ def test_pubsub_with_wrong_command assert_nil(pubsub.call_v(%w[SUBSCRIBE])) assert_raises(::RedisClient::CommandError, 'unknown command') { pubsub.next_event } assert_raises(::RedisClient::CommandError, 'wrong number of arguments') { pubsub.next_event } + assert_nil(pubsub.next_event(0.01)) pubsub.close end @@ -891,6 +892,8 @@ def wait_for_replication server_side_timeout = (TEST_TIMEOUT_SEC * 1000).to_i swap_timeout(@client, timeout: 0.1) do |client| client&.blocking_call(client_side_timeout, 'WAIT', TEST_REPLICA_SIZE, server_side_timeout) + rescue RedisClient::ConnectionError + # ignore end end