diff --git a/examples/local/scripts/vtgate-up.sh b/examples/local/scripts/vtgate-up.sh index 49671444f55..cb33e27839b 100755 --- a/examples/local/scripts/vtgate-up.sh +++ b/examples/local/scripts/vtgate-up.sh @@ -39,7 +39,6 @@ vtgate \ --tablet_types_to_wait PRIMARY,REPLICA \ --service_map 'grpc-vtgateservice' \ --pid_file $VTDATAROOT/tmp/vtgate.pid \ - --enable_buffer \ --mysql_auth_server_impl none \ > $VTDATAROOT/tmp/vtgate.out 2>&1 & diff --git a/go/vt/discovery/keyspace_events.go b/go/vt/discovery/keyspace_events.go index 2869ebf9e01..0b3fa7e9efe 100644 --- a/go/vt/discovery/keyspace_events.go +++ b/go/vt/discovery/keyspace_events.go @@ -23,14 +23,12 @@ import ( "google.golang.org/protobuf/proto" - "vitess.io/vitess/go/vt/key" "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/vt/proto/query" + topodatapb "vitess.io/vitess/go/vt/proto/topodata" "vitess.io/vitess/go/vt/srvtopo" "vitess.io/vitess/go/vt/topo" "vitess.io/vitess/go/vt/topo/topoproto" - - querypb "vitess.io/vitess/go/vt/proto/query" - topodatapb "vitess.io/vitess/go/vt/proto/topodata" ) // KeyspaceEventWatcher is an auxiliary watcher that watches all availability incidents @@ -67,7 +65,7 @@ type KeyspaceEvent struct { type ShardEvent struct { Tablet *topodatapb.TabletAlias - Target *querypb.Target + Target *query.Target Serving bool } @@ -126,27 +124,17 @@ func (kss *keyspaceState) beingResharded(currentShard string) bool { kss.mu.Lock() defer kss.mu.Unlock() - // If the keyspace is gone, has no known availability events, or is in the middle of a - // MoveTables then the keyspace cannot be in the middle of a resharding operation. + // if the keyspace is gone, or if it has no known availability events, the keyspace + // cannot be in the middle of a resharding operation if kss.deleted || kss.consistent { return false } - // If there are unequal and overlapping shards in the keyspace and any of them are - // currently serving then we assume that we are in the middle of a Reshard. - _, ckr, err := topo.ValidateShardName(currentShard) - if err != nil || ckr == nil { // Assume not and avoid potential panic - return false - } + // for all the known shards, try to find a primary shard besides the one we're trying to access + // and which is currently healthy. if there are other healthy primaries in the keyspace, it means + // we're in the middle of a resharding operation for shard, sstate := range kss.shards { - if !sstate.serving || shard == currentShard { - continue - } - _, skr, err := topo.ValidateShardName(shard) - if err != nil || skr == nil { // Assume not and avoid potential panic - return false - } - if key.KeyRangesIntersect(ckr, skr) { + if shard != currentShard && sstate.serving { return true } } @@ -155,7 +143,7 @@ func (kss *keyspaceState) beingResharded(currentShard string) bool { } type shardState struct { - target *querypb.Target + target *query.Target serving bool externallyReparented int64 currentPrimary *topodatapb.TabletAlias @@ -438,7 +426,7 @@ func (kew *KeyspaceEventWatcher) getKeyspaceStatus(keyspace string) *keyspaceSta // This is not a fully accurate heuristic, but it's good enough that we'd want to buffer the // request for the given target under the assumption that the reason why it cannot be completed // right now is transitory. -func (kew *KeyspaceEventWatcher) TargetIsBeingResharded(target *querypb.Target) bool { +func (kew *KeyspaceEventWatcher) TargetIsBeingResharded(target *query.Target) bool { if target.TabletType != topodatapb.TabletType_PRIMARY { return false } @@ -458,7 +446,7 @@ func (kew *KeyspaceEventWatcher) TargetIsBeingResharded(target *querypb.Target) // The shard state keeps track of the current primary and the last externally reparented time, which we can use // to determine that there was a serving primary which now became non serving. This is only possible in a DemotePrimary // RPC which are only called from ERS and PRS. So buffering will stop when these operations succeed. -func (kew *KeyspaceEventWatcher) PrimaryIsNotServing(target *querypb.Target) bool { +func (kew *KeyspaceEventWatcher) PrimaryIsNotServing(target *query.Target) bool { if target.TabletType != topodatapb.TabletType_PRIMARY { return false } diff --git a/go/vt/discovery/keyspace_events_test.go b/go/vt/discovery/keyspace_events_test.go index 652e4ff7c7b..456d8566e87 100644 --- a/go/vt/discovery/keyspace_events_test.go +++ b/go/vt/discovery/keyspace_events_test.go @@ -24,12 +24,10 @@ import ( "github.com/stretchr/testify/require" - "vitess.io/vitess/go/vt/topo" - "vitess.io/vitess/go/vt/topo/faketopo" - - querypb "vitess.io/vitess/go/vt/proto/query" topodatapb "vitess.io/vitess/go/vt/proto/topodata" vschemapb "vitess.io/vitess/go/vt/proto/vschema" + "vitess.io/vitess/go/vt/topo" + "vitess.io/vitess/go/vt/topo/faketopo" ) func TestSrvKeyspaceWithNilNewKeyspace(t *testing.T) { @@ -40,7 +38,6 @@ func TestSrvKeyspaceWithNilNewKeyspace(t *testing.T) { ts := faketopo.NewFakeTopoServer(factory) ts2 := &fakeTopoServer{} hc := NewHealthCheck(context.Background(), 1*time.Millisecond, time.Hour, ts, cell, "") - defer hc.Close() kew := NewKeyspaceEventWatcher(context.Background(), ts2, hc, cell) kss := &keyspaceState{ kew: kew, @@ -58,220 +55,6 @@ func TestSrvKeyspaceWithNilNewKeyspace(t *testing.T) { require.True(t, kss.onSrvKeyspace(nil, nil)) } -// TestKeyspaceEventTypes confirms that the keyspace event watcher determines -// that the unavailability event is caused by the correct scenario. We should -// consider it to be caused by a resharding operation when the following -// conditions are present: -// 1. The keyspace is inconsistent (in the middle of an availability event) -// 2. The target tablet is a primary -// 3. The keyspace has overlapping shards -// 4. The overlapping shard's tablet is serving -// And we should consider the cause to be a primary not serving when the -// following conditions exist: -// 1. The keyspace is inconsistent (in the middle of an availability event) -// 2. The target tablet is a primary -// 3. The target tablet is not serving -// 4. The shard's externallyReparented time is not 0 -// 5. The shard's currentPrimary state is not nil -// We should never consider both as a possible cause given the same -// keyspace state. -func TestKeyspaceEventTypes(t *testing.T) { - cell := "cell" - keyspace := "testks" - factory := faketopo.NewFakeTopoFactory() - factory.AddCell(cell) - ts := faketopo.NewFakeTopoServer(factory) - ts2 := &fakeTopoServer{} - hc := NewHealthCheck(context.Background(), 1*time.Millisecond, time.Hour, ts, cell, "") - defer hc.Close() - kew := NewKeyspaceEventWatcher(context.Background(), ts2, hc, cell) - - type testCase struct { - name string - kss *keyspaceState - shardToCheck string - expectResharding bool - expectPrimaryNotServing bool - } - - testCases := []testCase{ - { - name: "one to two resharding in progress", - kss: &keyspaceState{ - kew: kew, - keyspace: keyspace, - shards: map[string]*shardState{ - "-": { - target: &querypb.Target{ - Keyspace: keyspace, - Shard: "-", - TabletType: topodatapb.TabletType_PRIMARY, - }, - serving: false, - }, - "-80": { - target: &querypb.Target{ - Keyspace: keyspace, - Shard: "-80", - TabletType: topodatapb.TabletType_PRIMARY, - }, - serving: true, - }, - "80-": { - target: &querypb.Target{ - Keyspace: keyspace, - Shard: "80-", - TabletType: topodatapb.TabletType_PRIMARY, - }, - serving: false, - }, - }, - consistent: false, - }, - shardToCheck: "-", - expectResharding: true, - expectPrimaryNotServing: false, - }, - { - name: "two to four resharding in progress", - kss: &keyspaceState{ - kew: kew, - keyspace: keyspace, - shards: map[string]*shardState{ - "-80": { - target: &querypb.Target{ - Keyspace: keyspace, - Shard: "-80", - TabletType: topodatapb.TabletType_PRIMARY, - }, - serving: false, - }, - "80-": { - target: &querypb.Target{ - Keyspace: keyspace, - Shard: "80-", - TabletType: topodatapb.TabletType_PRIMARY, - }, - serving: true, - }, - "-40": { - target: &querypb.Target{ - Keyspace: keyspace, - Shard: "-40", - TabletType: topodatapb.TabletType_PRIMARY, - }, - serving: true, - }, - "40-80": { - target: &querypb.Target{ - Keyspace: keyspace, - Shard: "40-80", - TabletType: topodatapb.TabletType_PRIMARY, - }, - serving: true, - }, - "80-c0": { - target: &querypb.Target{ - Keyspace: keyspace, - Shard: "80-c0", - TabletType: topodatapb.TabletType_PRIMARY, - }, - serving: false, - }, - "c0-": { - target: &querypb.Target{ - Keyspace: keyspace, - Shard: "c0-", - TabletType: topodatapb.TabletType_PRIMARY, - }, - serving: false, - }, - }, - consistent: false, - }, - shardToCheck: "-80", - expectResharding: true, - expectPrimaryNotServing: false, - }, - { - name: "unsharded primary not serving", - kss: &keyspaceState{ - kew: kew, - keyspace: keyspace, - shards: map[string]*shardState{ - "-": { - target: &querypb.Target{ - Keyspace: keyspace, - Shard: "-", - TabletType: topodatapb.TabletType_PRIMARY, - }, - serving: false, - externallyReparented: time.Now().UnixNano(), - currentPrimary: &topodatapb.TabletAlias{ - Cell: cell, - Uid: 100, - }, - }, - }, - consistent: false, - }, - shardToCheck: "-", - expectResharding: false, - expectPrimaryNotServing: true, - }, - { - name: "sharded primary not serving", - kss: &keyspaceState{ - kew: kew, - keyspace: keyspace, - shards: map[string]*shardState{ - "-80": { - target: &querypb.Target{ - Keyspace: keyspace, - Shard: "-80", - TabletType: topodatapb.TabletType_PRIMARY, - }, - serving: false, - externallyReparented: time.Now().UnixNano(), - currentPrimary: &topodatapb.TabletAlias{ - Cell: cell, - Uid: 100, - }, - }, - "80-": { - target: &querypb.Target{ - Keyspace: keyspace, - Shard: "80-", - TabletType: topodatapb.TabletType_PRIMARY, - }, - serving: true, - }, - }, - consistent: false, - }, - shardToCheck: "-80", - expectResharding: false, - expectPrimaryNotServing: true, - }, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - kew.mu.Lock() - kew.keyspaces[keyspace] = tc.kss - kew.mu.Unlock() - - require.NotNil(t, tc.kss.shards[tc.shardToCheck], "the specified shardToCheck of %q does not exist in the shardState", tc.shardToCheck) - - resharding := kew.TargetIsBeingResharded(tc.kss.shards[tc.shardToCheck].target) - require.Equal(t, resharding, tc.expectResharding, "TargetIsBeingResharded should return %t", tc.expectResharding) - - primaryDown := kew.PrimaryIsNotServing(tc.kss.shards[tc.shardToCheck].target) - require.Equal(t, primaryDown, tc.expectPrimaryNotServing, "PrimaryIsNotServing should return %t", tc.expectPrimaryNotServing) - }) - } -} - type fakeTopoServer struct { }