diff --git a/tests/framework/e2e/cluster.go b/tests/framework/e2e/cluster.go index 4aff11b9d6f9..64880385b406 100644 --- a/tests/framework/e2e/cluster.go +++ b/tests/framework/e2e/cluster.go @@ -564,6 +564,7 @@ func (cfg *EtcdProcessClusterConfig) EtcdServerProcessConfig(tb testing.TB, i in "--initial-cluster-token=" + cfg.ServerConfig.InitialClusterToken, "--data-dir", dataDirPath, "--snapshot-count=" + fmt.Sprintf("%d", cfg.ServerConfig.SnapshotCount), + "--max-wals=1000", "--max-snapshots=1000", } var clientHTTPURL string if cfg.ClientHTTPSeparate { diff --git a/tests/robustness/failpoint/cluster.go b/tests/robustness/failpoint/cluster.go index 201bf48a4519..bcb2f4fc8355 100644 --- a/tests/robustness/failpoint/cluster.go +++ b/tests/robustness/failpoint/cluster.go @@ -181,23 +181,23 @@ func (f memberDowngrade) Inject(ctx context.Context, t *testing.T, lg *zap.Logge member = clus.Procs[memberID] lg.Info("Downgrading member", zap.String("member", member.Config().Name)) for member.IsRunning() { - err = member.Kill() + err = member.Stop() if err != nil { - lg.Info("Sending kill signal failed", zap.Error(err)) + lg.Info("Stopping server failed", zap.Error(err)) } err = member.Wait(ctx) if err != nil && !strings.Contains(err.Error(), "unexpected exit code") { - lg.Info("Failed to kill the process", zap.Error(err)) - return nil, fmt.Errorf("failed to kill the process within %s, err: %w", triggerTimeout, err) - } - } - if lazyfs := member.LazyFS(); lazyfs != nil { - lg.Info("Removing data that was not fsynced") - err := lazyfs.ClearCache(ctx) - if err != nil { - return nil, err + lg.Info("Failed to stop the process", zap.Error(err)) + return nil, fmt.Errorf("failed to stop the process within %s, err: %w", triggerTimeout, err) } } + // if lazyfs := member.LazyFS(); lazyfs != nil { + // lg.Info("Removing data that was not fsynced") + // err := lazyfs.ClearCache(ctx) + // if err != nil { + // return nil, err + // } + // } member.Config().ExecPath = e2e.BinPath.EtcdLastRelease err = patchArgs(member.Config().Args, "initial-cluster-state", "existing") if err != nil { @@ -208,9 +208,11 @@ func (f memberDowngrade) Inject(ctx context.Context, t *testing.T, lg *zap.Logge if err != nil { return nil, err } + time.Sleep(etcdserver.HealthInterval) err = verifyVersion(t, clus, member, targetVersion) } time.Sleep(etcdserver.HealthInterval) + lg.Info("Finished downgrading members", zap.Any("members", membersToDowngrade)) return nil, err } diff --git a/tests/robustness/failpoint/failpoint.go b/tests/robustness/failpoint/failpoint.go index 17c0d11b8e7f..5cdf25c81aab 100644 --- a/tests/robustness/failpoint/failpoint.go +++ b/tests/robustness/failpoint/failpoint.go @@ -36,22 +36,22 @@ const ( ) var allFailpoints = []Failpoint{ - KillFailpoint, BeforeCommitPanic, AfterCommitPanic, RaftBeforeSavePanic, RaftAfterSavePanic, - DefragBeforeCopyPanic, DefragBeforeRenamePanic, BackendBeforePreCommitHookPanic, BackendAfterPreCommitHookPanic, - BackendBeforeStartDBTxnPanic, BackendAfterStartDBTxnPanic, BackendBeforeWritebackBufPanic, - BackendAfterWritebackBufPanic, CompactBeforeCommitScheduledCompactPanic, CompactAfterCommitScheduledCompactPanic, - CompactBeforeSetFinishedCompactPanic, CompactAfterSetFinishedCompactPanic, CompactBeforeCommitBatchPanic, - CompactAfterCommitBatchPanic, RaftBeforeLeaderSendPanic, BlackholePeerNetwork, DelayPeerNetwork, - RaftBeforeFollowerSendPanic, RaftBeforeApplySnapPanic, RaftAfterApplySnapPanic, RaftAfterWALReleasePanic, - RaftBeforeSaveSnapPanic, RaftAfterSaveSnapPanic, BlackholeUntilSnapshot, - BeforeApplyOneConfChangeSleep, - MemberReplace, + // KillFailpoint, BeforeCommitPanic, AfterCommitPanic, RaftBeforeSavePanic, RaftAfterSavePanic, + // DefragBeforeCopyPanic, DefragBeforeRenamePanic, BackendBeforePreCommitHookPanic, BackendAfterPreCommitHookPanic, + // BackendBeforeStartDBTxnPanic, BackendAfterStartDBTxnPanic, BackendBeforeWritebackBufPanic, + // BackendAfterWritebackBufPanic, CompactBeforeCommitScheduledCompactPanic, CompactAfterCommitScheduledCompactPanic, + // CompactBeforeSetFinishedCompactPanic, CompactAfterSetFinishedCompactPanic, CompactBeforeCommitBatchPanic, + // CompactAfterCommitBatchPanic, RaftBeforeLeaderSendPanic, BlackholePeerNetwork, DelayPeerNetwork, + // RaftBeforeFollowerSendPanic, RaftBeforeApplySnapPanic, RaftAfterApplySnapPanic, RaftAfterWALReleasePanic, + // RaftBeforeSaveSnapPanic, RaftAfterSaveSnapPanic, BlackholeUntilSnapshot, + // BeforeApplyOneConfChangeSleep, + // MemberReplace, MemberDowngrade, - DropPeerNetwork, - RaftBeforeSaveSleep, - RaftAfterSaveSleep, - ApplyBeforeOpenSnapshot, - SleepBeforeSendWatchResponse, + // DropPeerNetwork, + // RaftBeforeSaveSleep, + // RaftAfterSaveSleep, + // ApplyBeforeOpenSnapshot, + // SleepBeforeSendWatchResponse, } func PickRandom(clus *e2e.EtcdProcessCluster, profile traffic.Profile) (Failpoint, error) { diff --git a/tests/robustness/scenarios/scenarios.go b/tests/robustness/scenarios/scenarios.go index afad5879feea..99bb5fc08c55 100644 --- a/tests/robustness/scenarios/scenarios.go +++ b/tests/robustness/scenarios/scenarios.go @@ -81,22 +81,23 @@ func Exploratory(_ *testing.T) []TestScenario { // 60% with all members of current version {Choice: options.ClusterOptions{options.WithVersion(e2e.CurrentVersion)}, Weight: 60}, // 10% with 2 members of current version, 1 member last version, leader is current version - {Choice: options.ClusterOptions{options.WithVersion(e2e.MinorityLastVersion), options.WithInitialLeaderIndex(0)}, Weight: 10}, - // 10% with 2 members of current version, 1 member last version, leader is last version - {Choice: options.ClusterOptions{options.WithVersion(e2e.MinorityLastVersion), options.WithInitialLeaderIndex(2)}, Weight: 10}, - // 10% with 2 members of last version, 1 member current version, leader is last version - {Choice: options.ClusterOptions{options.WithVersion(e2e.QuorumLastVersion), options.WithInitialLeaderIndex(0)}, Weight: 10}, - // 10% with 2 members of last version, 1 member current version, leader is current version - {Choice: options.ClusterOptions{options.WithVersion(e2e.QuorumLastVersion), options.WithInitialLeaderIndex(2)}, Weight: 10}, + // {Choice: options.ClusterOptions{options.WithVersion(e2e.MinorityLastVersion), options.WithInitialLeaderIndex(0)}, Weight: 10}, + // // 10% with 2 members of current version, 1 member last version, leader is last version + // {Choice: options.ClusterOptions{options.WithVersion(e2e.MinorityLastVersion), options.WithInitialLeaderIndex(2)}, Weight: 10}, + // // 10% with 2 members of last version, 1 member current version, leader is last version + // {Choice: options.ClusterOptions{options.WithVersion(e2e.QuorumLastVersion), options.WithInitialLeaderIndex(0)}, Weight: 10}, + // // 10% with 2 members of last version, 1 member current version, leader is current version + // {Choice: options.ClusterOptions{options.WithVersion(e2e.QuorumLastVersion), options.WithInitialLeaderIndex(2)}, Weight: 10}, } mixedVersionOption := options.WithClusterOptionGroups(random.PickRandom[options.ClusterOptions](mixedVersionOptionChoices)) baseOptions := []e2e.EPClusterOption{ - options.WithSnapshotCount(50, 100, 1000), + options.WithSnapshotCount(100000), options.WithSubsetOptions(randomizableOptions...), e2e.WithGoFailEnabled(true), + e2e.WithKeepDataDir(true), // Set low minimal compaction batch limit to allow for triggering multi batch compaction failpoints. - options.WithCompactionBatchLimit(10, 100, 1000), + options.WithCompactionBatchLimit(100000), e2e.WithWatchProcessNotifyInterval(100 * time.Millisecond), } @@ -104,17 +105,17 @@ func Exploratory(_ *testing.T) []TestScenario { baseOptions = append(baseOptions, e2e.WithSnapshotCatchUpEntries(100)) } scenarios := []TestScenario{} - for _, tp := range trafficProfiles { - name := filepath.Join(tp.Name, "ClusterOfSize1") - clusterOfSize1Options := baseOptions - clusterOfSize1Options = append(clusterOfSize1Options, e2e.WithClusterSize(1)) - scenarios = append(scenarios, TestScenario{ - Name: name, - Traffic: tp.Traffic, - Profile: tp.Profile, - Cluster: *e2e.NewConfig(clusterOfSize1Options...), - }) - } + // for _, tp := range trafficProfiles { + // name := filepath.Join(tp.Name, "ClusterOfSize1") + // clusterOfSize1Options := baseOptions + // clusterOfSize1Options = append(clusterOfSize1Options, e2e.WithClusterSize(1)) + // scenarios = append(scenarios, TestScenario{ + // Name: name, + // Traffic: tp.Traffic, + // Profile: tp.Profile, + // Cluster: *e2e.NewConfig(clusterOfSize1Options...), + // }) + // } for _, tp := range trafficProfiles { name := filepath.Join(tp.Name, "ClusterOfSize3")