diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java index 4c3fe100523..cd932f6efde 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java @@ -51,7 +51,6 @@ import org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerRatisUtils; import org.apache.hadoop.ozone.om.snapshot.OmSnapshotUtils; import org.apache.ozone.test.GenericTestUtils; -import org.apache.ozone.test.tag.Unhealthy; import org.apache.ratis.server.protocol.TermIndex; import org.assertj.core.api.Fail; import org.junit.jupiter.api.AfterEach; @@ -105,7 +104,6 @@ * Tests the Ratis snapshots feature in OM. */ @Timeout(5000) -@Unhealthy("HDDS-8876") public class TestOMRatisSnapshots { private MiniOzoneHAClusterImpl cluster = null; @@ -260,7 +258,7 @@ public void testInstallSnapshot(int numSnapshotsToCreate) throws Exception { GenericTestUtils.waitFor(() -> { return followerOM.getOmRatisServer().getLastAppliedTermIndex().getIndex() >= leaderOMSnapshotIndex - 1; - }, 100, 10000); + }, 100, 30_000); long followerOMLastAppliedIndex = followerOM.getOmRatisServer().getLastAppliedTermIndex().getIndex(); @@ -296,7 +294,7 @@ public void testInstallSnapshot(int numSnapshotsToCreate) throws Exception { // Verify RPC server is running GenericTestUtils.waitFor(() -> { return followerOM.isOmRpcServerRunning(); - }, 100, 5000); + }, 100, 30_000); assertLogCapture(logCapture, "Install Checkpoint is finished"); @@ -420,7 +418,7 @@ public void testInstallIncrementalSnapshot(@TempDir Path tempDir) // Wait the follower download the snapshot,but get stuck by injector GenericTestUtils.waitFor(() -> { return followerOM.getOmSnapshotProvider().getNumDownloaded() == 1; - }, 1000, 10000); + }, 1000, 30_000); // Get two incremental tarballs, adding new keys/snapshot for each. IncrementData firstIncrement = getNextIncrementalTarball(160, 2, leaderOM, @@ -445,7 +443,7 @@ public void testInstallIncrementalSnapshot(@TempDir Path tempDir) GenericTestUtils.waitFor(() -> { return followerOM.getOmRatisServer().getLastAppliedTermIndex().getIndex() >= leaderOMSnapshotIndex - 1; - }, 1000, 30000); + }, 1000, 30_000); assertEquals(3, followerOM.getOmSnapshotProvider().getNumDownloaded()); // Verify that the follower OM's DB contains the transactions which were @@ -480,7 +478,7 @@ public void testInstallIncrementalSnapshot(@TempDir Path tempDir) // Verify RPC server is running GenericTestUtils.waitFor(() -> { return followerOM.isOmRpcServerRunning(); - }, 100, 5000); + }, 100, 30_000); // Read & Write after snapshot installed. List newKeys = writeKeys(1); @@ -493,7 +491,7 @@ public void testInstallIncrementalSnapshot(@TempDir Path tempDir) } catch (IOException e) { throw new RuntimeException(e); } - }, 100, 10000); + }, 100, 30_000); // Verify follower candidate directory get cleaned String[] filesInCandidate = followerOM.getOmSnapshotProvider(). @@ -552,7 +550,7 @@ private IncrementData getNextIncrementalTarball( // by injector GenericTestUtils.waitFor(() -> followerOM.getOmSnapshotProvider().getNumDownloaded() == - expectedNumDownloads, 1000, 10000); + expectedNumDownloads, 1000, 30_000); assertTrue(followerOM.getOmRatisServer(). getLastAppliedTermIndex().getIndex() @@ -627,7 +625,7 @@ public void testInstallIncrementalSnapshotWithFailure() throws Exception { // Wait the follower download the snapshot,but get stuck by injector GenericTestUtils.waitFor(() -> { return followerOM.getOmSnapshotProvider().getNumDownloaded() == 1; - }, 1000, 10000); + }, 1000, 30_000); // Do some transactions, let leader OM take a new snapshot and purge the // old logs, so that follower must download the new snapshot again. @@ -644,7 +642,7 @@ public void testInstallIncrementalSnapshotWithFailure() throws Exception { // by injector GenericTestUtils.waitFor(() -> { return followerOM.getOmSnapshotProvider().getNumDownloaded() == 2; - }, 1000, 10000); + }, 1000, 30_000); // Corrupt the mixed checkpoint in the candidate DB dir File followerCandidateDir = followerOM.getOmSnapshotProvider(). @@ -675,7 +673,7 @@ public void testInstallIncrementalSnapshotWithFailure() throws Exception { GenericTestUtils.waitFor(() -> { return followerOM.getOmRatisServer().getLastAppliedTermIndex().getIndex() >= leaderOMSnapshotIndex - 1; - }, 1000, 10000); + }, 1000, 30_000); // Verify that the follower OM's DB contains the transactions which were // made while it was inactive. @@ -697,28 +695,30 @@ public void testInstallIncrementalSnapshotWithFailure() throws Exception { } // Verify the metrics + /* HDDS-8876 GenericTestUtils.waitFor(() -> { DBCheckpointMetrics dbMetrics = leaderOM.getMetrics().getDBCheckpointMetrics(); return dbMetrics.getLastCheckpointStreamingNumSSTExcluded() == 0; - }, 100, 10000); + }, 100, 30_000); GenericTestUtils.waitFor(() -> { DBCheckpointMetrics dbMetrics = leaderOM.getMetrics().getDBCheckpointMetrics(); return dbMetrics.getNumIncrementalCheckpoints() >= 1; - }, 100, 10000); + }, 100, 30_000); GenericTestUtils.waitFor(() -> { DBCheckpointMetrics dbMetrics = leaderOM.getMetrics().getDBCheckpointMetrics(); return dbMetrics.getNumCheckpoints() >= 3; - }, 100, 10000); + }, 100, 30_000); + */ // Verify RPC server is running GenericTestUtils.waitFor(() -> { return followerOM.isOmRpcServerRunning(); - }, 100, 5000); + }, 100, 30_000); // Read & Write after snapshot installed. List newKeys = writeKeys(1); @@ -731,7 +731,7 @@ public void testInstallIncrementalSnapshotWithFailure() throws Exception { } catch (IOException e) { throw new RuntimeException(e); } - }, 100, 10000); + }, 100, 30_000); // Verify follower candidate directory get cleaned String[] filesInCandidate = followerOM.getOmSnapshotProvider(). @@ -790,7 +790,7 @@ public void testInstallSnapshotWithClientWrite() throws Exception { GenericTestUtils.waitFor(() -> { return followerOM.getOmRatisServer().getLastAppliedTermIndex().getIndex() >= leaderOMSnapshotIndex - 1; - }, 100, 3000); + }, 100, 30_000); // Verify checkpoint installation was happened. String msg = "Reloaded OM state"; @@ -894,7 +894,7 @@ public void testInstallSnapshotWithClientRead() throws Exception { GenericTestUtils.waitFor(() -> { return followerOM.getOmRatisServer().getLastAppliedTermIndex().getIndex() >= leaderOMSnapshotIndex - 1; - }, 100, 3000); + }, 100, 30_000); long followerOMLastAppliedIndex = followerOM.getOmRatisServer().getLastAppliedTermIndex().getIndex(); @@ -1112,7 +1112,7 @@ private void assertLogCapture(GenericTestUtils.LogCapturer logCapture, throws InterruptedException, TimeoutException { GenericTestUtils.waitFor(() -> { return logCapture.getOutput().contains(msg); - }, 100, 5000); + }, 100, 30_000); } // Returns temp dir where tarball was untarred.