Skip to content

Commit

Permalink
Add an e2e test and robustness failpoint around recovering from snaps…
Browse files Browse the repository at this point in the history
…hot backend

Signed-off-by: Marek Siarkowicz <siarkowicz@google.com>
  • Loading branch information
serathius committed Jan 4, 2024
1 parent d39d86a commit 3471ef1
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 3 deletions.
2 changes: 1 addition & 1 deletion server/etcdserver/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -1001,7 +1001,7 @@ func (s *EtcdServer) applySnapshot(ep *etcdProgress, toApply *toApply) {
// wait for raftNode to persist snapshot onto the disk
<-toApply.notifyc

// gofail: var beforeOpenSnapshotBackend struct{}
// gofail: var applyBeforeOpenSnapshot struct{}
newbe, err := serverstorage.OpenSnapshotBackend(s.Cfg, s.snapshotter, toApply.snapshot, s.beHooks)
if err != nil {
lg.Panic("failed to open snapshot backend", zap.Error(err))
Expand Down
98 changes: 98 additions & 0 deletions tests/e2e/leader_snapshot_no_proxy_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !cluster_proxy

package e2e

import (
"context"
"sync"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/zap"

clientv3 "go.etcd.io/etcd/client/v3"
"go.etcd.io/etcd/pkg/v3/expect"
"go.etcd.io/etcd/tests/v3/framework/config"
"go.etcd.io/etcd/tests/v3/framework/e2e"
"go.etcd.io/etcd/tests/v3/robustness/failpoint"
)

func TestRecoverSnapshotBackend(t *testing.T) {
e2e.BeforeTest(t)
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()

epc, err := e2e.NewEtcdProcessCluster(ctx, t,
e2e.WithClusterSize(3),
e2e.WithKeepDataDir(true),
e2e.WithPeerProxy(true),
e2e.WithSnapshotCatchUpEntries(50),
e2e.WithSnapshotCount(50),
e2e.WithGoFailEnabled(true),
e2e.WithIsPeerTLS(true),
)
require.NoError(t, err)

defer epc.Close()

blackholedMember := epc.Procs[0]
otherMember := epc.Procs[1]

wg := sync.WaitGroup{}

trafficCtx, trafficCancel := context.WithCancel(ctx)
c, err := clientv3.New(clientv3.Config{
Endpoints: otherMember.EndpointsGRPC(),
Logger: zap.NewNop(),
DialKeepAliveTime: 10 * time.Second,
DialKeepAliveTimeout: 100 * time.Millisecond,
})
require.NoError(t, err)
defer c.Close()
wg.Add(1)
go func() {
defer wg.Done()
for {
select {
case <-trafficCtx.Done():
return
default:
}
putCtx, putCancel := context.WithTimeout(trafficCtx, 50*time.Millisecond)
c.Put(putCtx, "a", "b")
putCancel()
time.Sleep(10 * time.Millisecond)
}
}()

err = blackholedMember.Failpoints().SetupHTTP(ctx, "applyBeforeOpenSnapshot", "panic")
require.NoError(t, err)
err = failpoint.Blackhole(ctx, t, blackholedMember, epc, true)
require.NoError(t, err)
err = blackholedMember.Wait(ctx)
require.NoError(t, err)
trafficCancel()
wg.Wait()
err = blackholedMember.Start(ctx)
require.NoError(t, err)
_, err = blackholedMember.Logs().ExpectWithContext(ctx, expect.ExpectedResponse{Value: "Recovering from snapshot backend"})
assert.NoError(t, err)
err = blackholedMember.Etcdctl().Put(ctx, "a", "1", config.PutOptions{})
assert.NoError(t, err)
}
1 change: 1 addition & 0 deletions tests/robustness/failpoint/failpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ var (
DropPeerNetwork,
RaftBeforeSaveSleep,
RaftAfterSaveSleep,
ApplyBeforeOpenSnapshot,
}
)

Expand Down
1 change: 1 addition & 0 deletions tests/robustness/failpoint/gofail.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ var (
RaftAfterWALReleasePanic Failpoint = goPanicFailpoint{"raftAfterWALRelease", triggerBlackhole{waitTillSnapshot: true}, Follower}
RaftBeforeSaveSnapPanic Failpoint = goPanicFailpoint{"raftBeforeSaveSnap", triggerBlackhole{waitTillSnapshot: true}, Follower}
RaftAfterSaveSnapPanic Failpoint = goPanicFailpoint{"raftAfterSaveSnap", triggerBlackhole{waitTillSnapshot: true}, Follower}
ApplyBeforeOpenSnapshot Failpoint = goPanicFailpoint{"applyBeforeOpenSnapshot", triggerBlackhole{waitTillSnapshot: true}, Follower}
BeforeApplyOneConfChangeSleep Failpoint = killAndGofailSleep{"beforeApplyOneConfChange", time.Second}
RaftBeforeSaveSleep Failpoint = gofailSleepAndDeactivate{"raftBeforeSave", time.Second}
RaftAfterSaveSleep Failpoint = gofailSleepAndDeactivate{"raftAfterSave", time.Second}
Expand Down
4 changes: 2 additions & 2 deletions tests/robustness/failpoint/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ type triggerBlackhole struct {
}

func (tb triggerBlackhole) Trigger(ctx context.Context, t *testing.T, member e2e.EtcdProcess, clus *e2e.EtcdProcessCluster) error {
return blackhole(ctx, t, member, clus, tb.waitTillSnapshot)
return Blackhole(ctx, t, member, clus, tb.waitTillSnapshot)
}

func (tb triggerBlackhole) Available(config e2e.EtcdProcessClusterConfig, process e2e.EtcdProcess) bool {
Expand All @@ -61,7 +61,7 @@ func (tb triggerBlackhole) Available(config e2e.EtcdProcessClusterConfig, proces
return config.ClusterSize > 1 && process.PeerProxy() != nil
}

func blackhole(ctx context.Context, t *testing.T, member e2e.EtcdProcess, clus *e2e.EtcdProcessCluster, shouldWaitTillSnapshot bool) error {
func Blackhole(ctx context.Context, t *testing.T, member e2e.EtcdProcess, clus *e2e.EtcdProcessCluster, shouldWaitTillSnapshot bool) error {
proxy := member.PeerProxy()

// Blackholing will cause peers to not be able to use streamWriters registered with member
Expand Down

0 comments on commit 3471ef1

Please sign in to comment.