Skip to content

Commit

Permalink
Reproduce issue etcd-io#17529
Browse files Browse the repository at this point in the history
Signed-off-by: Marek Siarkowicz <[email protected]>
  • Loading branch information
serathius committed Jun 15, 2024
1 parent 15f9a59 commit 24f0a7f
Show file tree
Hide file tree
Showing 15 changed files with 68 additions and 16 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ require (
github.com/spf13/pflag v1.0.5 // indirect
github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 // indirect
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect
go.etcd.io/gofail v0.1.0 // indirect
go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.52.0 // indirect
go.opentelemetry.io/otel v1.27.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.27.0 // indirect
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,8 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
go.etcd.io/bbolt v1.4.0-alpha.1 h1:3yrqQzbRRPFPdOMWS/QQIVxVnzSkAZQYeWlZFv1kbj4=
go.etcd.io/bbolt v1.4.0-alpha.1/go.mod h1:S/Z/Nm3iuOnyO1W4XuFfPci51Gj6F1Hv0z8hisyYYOw=
go.etcd.io/gofail v0.1.0 h1:XItAMIhOojXFQMgrxjnd2EIIHun/d5qL0Pf7FzVTkFg=
go.etcd.io/gofail v0.1.0/go.mod h1:VZBCXYGZhHAinaBiiqYvuDynvahNsAyLFwB3kEHKz1M=
go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390 h1:GGzKGOClkyeDNcshzpNHh7hyou+ErMhThPLYZ1qUhFs=
go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390/go.mod h1:d0hc7ZE3PPyYmNnpCX+sFVBzMUznSvNkmJmzUNDiDaA=
go.etcd.io/raft/v3 v3.6.0-alpha.0 h1:cMmjAEjCKMGiQPowjSWM43Y5ZnBEeNP8RSYcm3ewtns=
go.etcd.io/raft/v3 v3.6.0-alpha.0/go.mod h1:QpxpKeYmocQQFHP75LxNrdJTukZmqQig9lotwYLsUJY=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.52.0 h1:vS1Ao/R55RNV4O7TA2Qopok8yN+X0LIP6RVWLFkprck=
Expand Down
3 changes: 1 addition & 2 deletions tests/framework/e2e/etcd_process.go
Original file line number Diff line number Diff line change
Expand Up @@ -400,8 +400,7 @@ func (f *BinaryFailpoints) DeactivateHTTP(ctx context.Context, failpoint string)
return err
}
httpClient := http.Client{
// TODO: Decrease after deactivate is not blocked by sleep https://github.com/etcd-io/gofail/issues/64
Timeout: 2 * time.Second,
Timeout: time.Second,
}
if f.clientTimeout != 0 {
httpClient.Timeout = f.clientTimeout
Expand Down
2 changes: 1 addition & 1 deletion tests/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ require (
go.etcd.io/etcd/etcdutl/v3 v3.6.0-alpha.0
go.etcd.io/etcd/pkg/v3 v3.6.0-alpha.0
go.etcd.io/etcd/server/v3 v3.6.0-alpha.0
go.etcd.io/gofail v0.1.0
go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390
go.etcd.io/raft/v3 v3.6.0-alpha.0
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.52.0
go.opentelemetry.io/otel v1.27.0
Expand Down
4 changes: 2 additions & 2 deletions tests/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,8 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
go.etcd.io/bbolt v1.4.0-alpha.1 h1:3yrqQzbRRPFPdOMWS/QQIVxVnzSkAZQYeWlZFv1kbj4=
go.etcd.io/bbolt v1.4.0-alpha.1/go.mod h1:S/Z/Nm3iuOnyO1W4XuFfPci51Gj6F1Hv0z8hisyYYOw=
go.etcd.io/gofail v0.1.0 h1:XItAMIhOojXFQMgrxjnd2EIIHun/d5qL0Pf7FzVTkFg=
go.etcd.io/gofail v0.1.0/go.mod h1:VZBCXYGZhHAinaBiiqYvuDynvahNsAyLFwB3kEHKz1M=
go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390 h1:GGzKGOClkyeDNcshzpNHh7hyou+ErMhThPLYZ1qUhFs=
go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390/go.mod h1:d0hc7ZE3PPyYmNnpCX+sFVBzMUznSvNkmJmzUNDiDaA=
go.etcd.io/raft/v3 v3.6.0-alpha.0 h1:cMmjAEjCKMGiQPowjSWM43Y5ZnBEeNP8RSYcm3ewtns=
go.etcd.io/raft/v3 v3.6.0-alpha.0/go.mod h1:QpxpKeYmocQQFHP75LxNrdJTukZmqQig9lotwYLsUJY=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.52.0 h1:vS1Ao/R55RNV4O7TA2Qopok8yN+X0LIP6RVWLFkprck=
Expand Down
1 change: 1 addition & 0 deletions tests/robustness/failpoint/failpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ var (
RaftBeforeSaveSleep,
RaftAfterSaveSleep,
ApplyBeforeOpenSnapshot,
SleepBeforeSendWatchResponse,
}
)

Expand Down
4 changes: 1 addition & 3 deletions tests/robustness/failpoint/gofail.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ var (
BeforeApplyOneConfChangeSleep Failpoint = killAndGofailSleep{"beforeApplyOneConfChange", time.Second}
RaftBeforeSaveSleep Failpoint = gofailSleepAndDeactivate{"raftBeforeSave", time.Second}
RaftAfterSaveSleep Failpoint = gofailSleepAndDeactivate{"raftAfterSave", time.Second}
SleepBeforeSendWatchResponse Failpoint = gofailSleepAndDeactivate{"beforeSendWatchResponse", time.Second}
)

type goPanicFailpoint struct {
Expand Down Expand Up @@ -238,9 +239,6 @@ func (f gofailSleepAndDeactivate) Name() string {
}

func (f gofailSleepAndDeactivate) Available(config e2e.EtcdProcessClusterConfig, member e2e.EtcdProcess) bool {
if config.ClusterSize == 1 {
return false
}
memberFailpoints := member.Failpoints()
if memberFailpoints == nil {
return false
Expand Down
20 changes: 20 additions & 0 deletions tests/robustness/makefile.mk
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ test-robustness-issue15271: /tmp/etcd-v3.5.7-failpoints/bin
GO_TEST_FLAGS='-v --run=TestRobustnessRegression/Issue15271 --count 100 --failfast --bin-dir=/tmp/etcd-v3.5.7-failpoints/bin' make test-robustness && \
echo "Failed to reproduce" || echo "Successful reproduction"

.PHONY: test-robustness-issue17529
test-robustness-issue17529: /tmp/etcd-v3.5.12-beforeSendWatchResponse/bin
GO_TEST_FLAGS='-v --run=TestRobustnessRegression/Issue17529 --count 100 --failfast --bin-dir=/tmp/etcd-v3.5.12-beforeSendWatchResponse/bin' make test-robustness && \
echo "Failed to reproduce" || echo "Successful reproduction"

# Failpoints

GOPATH = $(shell go env GOPATH)
Expand Down Expand Up @@ -98,6 +103,21 @@ $(GOPATH)/bin/gofail: tools/mod/go.mod tools/mod/go.sum
(cd etcdutl; go get go.etcd.io/gofail@${GOFAIL_VERSION}); \
FAILPOINTS=true ./build;

/tmp/etcd-v3.5.12-beforeSendWatchResponse/bin: $(GOPATH)/bin/gofail
rm -rf /tmp/etcd-v3.5.12-beforeSendWatchResponse/
mkdir -p /tmp/etcd-v3.5.12-beforeSendWatchResponse/
git clone --depth 1 --branch v3.5.12 https://github.com/etcd-io/etcd.git /tmp/etcd-v3.5.12-beforeSendWatchResponse/
cp -r ./tests/robustness/patches/beforeSendWatchResponse /tmp/etcd-v3.5.12-beforeSendWatchResponse/
cd /tmp/etcd-v3.5.12-beforeSendWatchResponse/; \
patch -l server/etcdserver/api/v3rpc/watch.go ./beforeSendWatchResponse/watch.patch; \
patch -l build.sh ./beforeSendWatchResponse/build.patch; \
go get go.etcd.io/gofail@${GOFAIL_VERSION}; \
(cd server; go get go.etcd.io/gofail@${GOFAIL_VERSION}); \
(cd etcdctl; go get go.etcd.io/gofail@${GOFAIL_VERSION}); \
(cd etcdutl; go get go.etcd.io/gofail@${GOFAIL_VERSION}); \
(cd tools/mod; go get go.etcd.io/gofail@${GOFAIL_VERSION}); \
FAILPOINTS=true ./build;

/tmp/etcd-release-3.5-failpoints/bin: $(GOPATH)/bin/gofail
rm -rf /tmp/etcd-release-3.5-failpoints/
mkdir -p /tmp/etcd-release-3.5-failpoints/
Expand Down
9 changes: 9 additions & 0 deletions tests/robustness/patches/beforeSendWatchResponse/build.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
@@ -25,7 +26,7 @@ GOFAIL_VERSION=$(cd tools/mod && go list -m -f {{.Version}} go.etcd.io/gofail)
toggle_failpoints() {
mode="$1"
if command -v gofail >/dev/null 2>&1; then
- run gofail "$mode" server/etcdserver/ server/mvcc/ server/wal/ server/mvcc/backend/
+ run gofail "$mode" server/etcdserver/ server/mvcc/ server/wal/ server/mvcc/backend/ server/etcdserver/api/v3rpc/
if [[ "$mode" == "enable" ]]; then
go get go.etcd.io/gofail@${GOFAIL_VERSION}
cd ./server && go get go.etcd.io/gofail@${GOFAIL_VERSION}
12 changes: 12 additions & 0 deletions tests/robustness/patches/beforeSendWatchResponse/watch.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
diff --git a/server/etcdserver/api/v3rpc/watch.go b/server/etcdserver/api/v3rpc/watch.go
index cd834aa..e6aaf2b 100644
--- a/server/etcdserver/api/v3rpc/watch.go
+++ b/server/etcdserver/api/v3rpc/watch.go
@@ -460,6 +460,7 @@ func (sws *serverWatchStream) sendLoop() {
sws.mu.RUnlock()

var serr error
+ // gofail: var beforeSendWatchResponse struct{}
if !fragmented && !ok {
serr = sws.gRPCStream.Send(wr)
} else {
11 changes: 11 additions & 0 deletions tests/robustness/scenarios.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,17 @@ func regressionScenarios(t *testing.T) []testScenario {
e2e.WithClusterSize(1),
),
})
scenarios = append(scenarios, testScenario{
name: "Issue17529",
profile: traffic.HighTrafficProfile,
traffic: traffic.Kubernetes,
failpoint: failpoint.SleepBeforeSendWatchResponse,
cluster: *e2e.NewConfig(
e2e.WithClusterSize(1),
e2e.WithGoFailEnabled(true),
options.WithSnapshotCount(100),
),
})
if v.Compare(version.V3_5) >= 0 {
opts := []e2e.EPClusterOption{
e2e.WithSnapshotCount(100),
Expand Down
6 changes: 3 additions & 3 deletions tests/robustness/traffic/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ var (
resource: "pods",
namespace: "default",
writeChoices: []choiceWeight[KubernetesRequestType]{
{choice: KubernetesUpdate, weight: 85},
{choice: KubernetesDelete, weight: 5},
{choice: KubernetesCreate, weight: 5},
{choice: KubernetesUpdate, weight: 75},
{choice: KubernetesDelete, weight: 10},
{choice: KubernetesCreate, weight: 10},
{choice: KubernetesCompact, weight: 5},
},
}
Expand Down
2 changes: 1 addition & 1 deletion tests/robustness/traffic/traffic.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ import (
var (
DefaultLeaseTTL int64 = 7200
RequestTimeout = 200 * time.Millisecond
WatchTimeout = 400 * time.Millisecond
WatchTimeout = time.Second
MultiOpTxnOpCount = 4

LowTraffic = Profile{
Expand Down
2 changes: 1 addition & 1 deletion tools/mod/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ require (
github.com/google/addlicense v1.1.1
github.com/google/yamlfmt v0.11.0
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0
go.etcd.io/gofail v0.1.0
go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390
go.etcd.io/protodoc v0.0.0-20180829002748-484ab544e116
go.etcd.io/raft/v3 v3.6.0-alpha.0
gotest.tools/gotestsum v1.11.0
Expand Down
2 changes: 2 additions & 0 deletions tools/mod/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,8 @@ go-simpler.org/sloglint v0.7.0 h1:rMZRxD9MbaGoRFobIOicMxZzum7AXNFDlez6xxJs5V4=
go-simpler.org/sloglint v0.7.0/go.mod h1:g9SXiSWY0JJh4LS39/Q0GxzP/QX2cVcbTOYhDpXrJEs=
go.etcd.io/gofail v0.1.0 h1:XItAMIhOojXFQMgrxjnd2EIIHun/d5qL0Pf7FzVTkFg=
go.etcd.io/gofail v0.1.0/go.mod h1:VZBCXYGZhHAinaBiiqYvuDynvahNsAyLFwB3kEHKz1M=
go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390 h1:GGzKGOClkyeDNcshzpNHh7hyou+ErMhThPLYZ1qUhFs=
go.etcd.io/gofail v0.1.1-0.20240517100945-baefa9867390/go.mod h1:d0hc7ZE3PPyYmNnpCX+sFVBzMUznSvNkmJmzUNDiDaA=
go.etcd.io/protodoc v0.0.0-20180829002748-484ab544e116 h1:QQiUXlqz+d96jyNG71NE+IGTgOK6Xlhdx+PzvfbLHlQ=
go.etcd.io/protodoc v0.0.0-20180829002748-484ab544e116/go.mod h1:F9kog+iVAuvPJucb1dkYcDcbV0g4uyGEHllTP5NrXiw=
go.etcd.io/raft/v3 v3.6.0-alpha.0 h1:cMmjAEjCKMGiQPowjSWM43Y5ZnBEeNP8RSYcm3ewtns=
Expand Down

0 comments on commit 24f0a7f

Please sign in to comment.