From ca68e8552efb7de3cdb997f516ecb0f811e38306 Mon Sep 17 00:00:00 2001 From: chungen0126 Date: Thu, 19 Dec 2024 03:28:11 +0800 Subject: [PATCH 01/16] HDDS-11959. Remove tests for non-Ratis SCM --- .../upgrade/TestDatanodeUpgradeToScmHA.java | 604 ------------------ .../hdds/scm/block/TestDeletedBlockLog.java | 2 - ...uration.java => TestSCMConfiguration.java} | 73 +-- .../TestStatefulServiceStateManagerImpl.java | 2 - .../hdds/scm/node/TestSCMNodeManager.java | 3 + ...SCMHAUnfinalizedStateValidationAction.java | 54 +- .../hdds/scm/TestSCMInstallSnapshot.java | 1 - .../hadoop/hdds/scm/TestSCMSnapshot.java | 1 - .../hdds/scm/TestStorageContainerManager.java | 55 +- .../TestSCMContainerManagerMetrics.java | 1 + .../node/TestDecommissionAndMaintenance.java | 2 + .../scm/storage/TestContainerCommandsEC.java | 2 + .../hadoop/ozone/MiniOzoneClusterImpl.java | 26 +- .../hadoop/ozone/MiniOzoneHAClusterImpl.java | 1 - .../client/rpc/TestContainerStateMachine.java | 2 + .../rpc/TestDeleteWithInAdequateDN.java | 5 + .../commandhandler/TestBlockDeletion.java | 4 + .../TestDeleteContainerHandler.java | 4 + .../ozone/recon/TestReconScmHASnapshot.java | 65 -- .../recon/TestReconScmNonHASnapshot.java | 64 -- .../hadoop/ozone/recon/TestReconTasks.java | 1 + .../shell/TestDeletedBlocksTxnShell.java | 2 - 22 files changed, 53 insertions(+), 921 deletions(-) delete mode 100644 hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/upgrade/TestDatanodeUpgradeToScmHA.java rename hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/{TestSCMHAConfiguration.java => TestSCMConfiguration.java} (80%) delete mode 100644 hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconScmHASnapshot.java delete mode 100644 hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconScmNonHASnapshot.java diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/upgrade/TestDatanodeUpgradeToScmHA.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/upgrade/TestDatanodeUpgradeToScmHA.java deleted file mode 100644 index d4a27e74cda..00000000000 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/upgrade/TestDatanodeUpgradeToScmHA.java +++ /dev/null @@ -1,604 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.ozone.container.upgrade; - -import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State; -import org.apache.hadoop.hdds.scm.ScmConfigKeys; -import org.apache.hadoop.hdds.scm.pipeline.MockPipeline; -import org.apache.hadoop.hdds.scm.pipeline.Pipeline; -import org.apache.hadoop.hdds.upgrade.HDDSLayoutFeature; -import org.apache.hadoop.ipc.RPC; -import org.apache.hadoop.ozone.container.common.SCMTestUtils; -import org.apache.hadoop.ozone.container.common.ScmTestMock; -import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; -import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher; -import org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine; -import org.apache.hadoop.ozone.container.common.utils.HddsVolumeUtil; -import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; -import org.apache.hadoop.ozone.container.replication.ContainerImporter; -import org.apache.hadoop.ozone.container.replication.ContainerReplicationSource; -import org.apache.hadoop.ozone.container.replication.OnDemandContainerReplicationSource; -import org.apache.ozone.test.LambdaTestUtils; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.io.TempDir; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.ValueSource; - -import java.io.File; -import java.io.FileOutputStream; -import java.net.InetSocketAddress; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.StandardCopyOption; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.UUID; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; - -import static org.apache.hadoop.ozone.container.replication.CopyContainerCompression.NO_COMPRESSION; -import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - -/** - * Tests upgrading a single datanode from pre-SCM HA volume format that used - * SCM ID to the post-SCM HA volume format using cluster ID. If SCM HA was - * already being used before the upgrade, there should be no changes. - */ -public class TestDatanodeUpgradeToScmHA { - @TempDir - private Path tempFolder; - - private DatanodeStateMachine dsm; - private ContainerDispatcher dispatcher; - private OzoneConfiguration conf; - private static final String CLUSTER_ID = "clusterID"; - private boolean scmHAAlreadyEnabled; - - private RPC.Server scmRpcServer; - private InetSocketAddress address; - private ScmTestMock scmServerImpl; - - private void setScmHAEnabled(boolean enableSCMHA) - throws Exception { - this.scmHAAlreadyEnabled = enableSCMHA; - conf = new OzoneConfiguration(); - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, scmHAAlreadyEnabled); - setup(); - } - - private void setup() throws Exception { - address = SCMTestUtils.getReuseableAddress(); - conf.setSocketAddr(ScmConfigKeys.OZONE_SCM_NAMES, address); - } - - @AfterEach - public void teardown() throws Exception { - if (scmRpcServer != null) { - scmRpcServer.stop(); - } - - if (dsm != null) { - dsm.close(); - } - } - - @ParameterizedTest(name = "{index}: scmHAAlreadyEnabled={0}") - @ValueSource(booleans = {true, false}) - public void testReadsDuringFinalization(boolean enableSCMHA) - throws Exception { - setScmHAEnabled(enableSCMHA); - // start DN and SCM - startScmServer(); - UpgradeTestHelper.addHddsVolume(conf, tempFolder); - dsm = UpgradeTestHelper.startPreFinalizedDatanode(conf, tempFolder, dsm, address, - HDDSLayoutFeature.INITIAL_VERSION.layoutVersion()); - dispatcher = dsm.getContainer().getDispatcher(); - final Pipeline pipeline = MockPipeline.createPipeline( - Collections.singletonList(dsm.getDatanodeDetails())); - - // Add data to read. - final long containerID = UpgradeTestHelper.addContainer(dispatcher, pipeline); - ContainerProtos.WriteChunkRequestProto writeChunk = - UpgradeTestHelper.putBlock(dispatcher, containerID, pipeline); - UpgradeTestHelper.closeContainer(dispatcher, containerID, pipeline); - - // Create thread to keep reading during finalization. - ExecutorService executor = Executors.newFixedThreadPool(1); - Future readFuture = executor.submit(() -> { - // Layout version check should be thread safe. - while (!dsm.getLayoutVersionManager() - .isAllowed(HDDSLayoutFeature.SCM_HA)) { - UpgradeTestHelper.readChunk(dispatcher, writeChunk, pipeline); - } - // Make sure we can read after finalizing too. - UpgradeTestHelper.readChunk(dispatcher, writeChunk, pipeline); - return null; - }); - - dsm.finalizeUpgrade(); - // If there was a failure reading during the upgrade, the exception will - // be thrown here. - readFuture.get(); - } - - @ParameterizedTest(name = "{index}: scmHAAlreadyEnabled={0}") - @ValueSource(booleans = {true, false}) - public void testImportContainer(boolean enableSCMHA) throws Exception { - setScmHAEnabled(enableSCMHA); - // start DN and SCM - startScmServer(); - UpgradeTestHelper.addHddsVolume(conf, tempFolder); - dsm = UpgradeTestHelper.startPreFinalizedDatanode(conf, tempFolder, dsm, address, - HDDSLayoutFeature.INITIAL_VERSION.layoutVersion()); - dispatcher = dsm.getContainer().getDispatcher(); - final Pipeline pipeline = MockPipeline.createPipeline( - Collections.singletonList(dsm.getDatanodeDetails())); - - // Pre-export a container to continuously import and delete. - final long exportContainerID = UpgradeTestHelper.addContainer(dispatcher, pipeline); - ContainerProtos.WriteChunkRequestProto exportWriteChunk = - UpgradeTestHelper.putBlock(dispatcher, exportContainerID, pipeline); - UpgradeTestHelper.closeContainer(dispatcher, exportContainerID, pipeline); - File exportedContainerFile = exportContainer(exportContainerID); - UpgradeTestHelper.deleteContainer(dispatcher, exportContainerID, pipeline); - - // Export another container to import while pre-finalized and read - // finalized. - final long exportContainerID2 = UpgradeTestHelper.addContainer(dispatcher, pipeline); - ContainerProtos.WriteChunkRequestProto exportWriteChunk2 = - UpgradeTestHelper.putBlock(dispatcher, exportContainerID2, pipeline); - UpgradeTestHelper.closeContainer(dispatcher, exportContainerID2, pipeline); - File exportedContainerFile2 = exportContainer(exportContainerID2); - UpgradeTestHelper.deleteContainer(dispatcher, exportContainerID2, pipeline); - - // Make sure we can import and read a container pre-finalized. - importContainer(exportContainerID2, exportedContainerFile2); - UpgradeTestHelper.readChunk(dispatcher, exportWriteChunk2, pipeline); - - // Now SCM and enough other DNs finalize to enable SCM HA. This DN is - // restarted with SCM HA config and gets a different SCM ID. - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); - changeScmID(); - - dsm = UpgradeTestHelper.restartDatanode(conf, dsm, true, tempFolder, address, - HDDSLayoutFeature.INITIAL_VERSION.layoutVersion(), true); - dispatcher = dsm.getContainer().getDispatcher(); - - // Make sure the existing container can be read. - UpgradeTestHelper.readChunk(dispatcher, exportWriteChunk2, pipeline); - - // Create thread to keep importing containers during the upgrade. - // Since the datanode's MLV is behind SCM's, container creation is not - // allowed. We will keep importing and deleting the same container since - // we cannot create new ones to import here. - ExecutorService executor = Executors.newFixedThreadPool(1); - Future importFuture = executor.submit(() -> { - // Layout version check should be thread safe. - while (!dsm.getLayoutVersionManager() - .isAllowed(HDDSLayoutFeature.SCM_HA)) { - importContainer(exportContainerID, exportedContainerFile); - UpgradeTestHelper.readChunk(dispatcher, exportWriteChunk, pipeline); - UpgradeTestHelper.deleteContainer(dispatcher, exportContainerID, pipeline); - } - // Make sure we can import after finalizing too. - importContainer(exportContainerID, exportedContainerFile); - UpgradeTestHelper.readChunk(dispatcher, exportWriteChunk, pipeline); - return null; - }); - - dsm.finalizeUpgrade(); - // If there was a failure importing during the upgrade, the exception will - // be thrown here. - importFuture.get(); - - // Make sure we can read the container that was imported while - // pre-finalized after finalizing. - UpgradeTestHelper.readChunk(dispatcher, exportWriteChunk2, pipeline); - } - - @ParameterizedTest(name = "{index}: scmHAAlreadyEnabled={0}") - @ValueSource(booleans = {true, false}) - public void testFailedVolumeDuringFinalization(boolean enableSCMHA) - throws Exception { - setScmHAEnabled(enableSCMHA); - /// SETUP /// - - startScmServer(); - String originalScmID = scmServerImpl.getScmId(); - File volume = UpgradeTestHelper.addHddsVolume(conf, tempFolder); - dsm = UpgradeTestHelper.startPreFinalizedDatanode(conf, tempFolder, dsm, address, - HDDSLayoutFeature.INITIAL_VERSION.layoutVersion()); - dispatcher = dsm.getContainer().getDispatcher(); - final Pipeline pipeline = MockPipeline.createPipeline( - Collections.singletonList(dsm.getDatanodeDetails())); - - /// PRE-FINALIZED: Write and Read from formatted volume /// - - assertEquals(1, - dsm.getContainer().getVolumeSet().getVolumesList().size()); - assertEquals(0, - dsm.getContainer().getVolumeSet().getFailedVolumesList().size()); - - // Add container with data, make sure it can be read and written. - final long containerID = UpgradeTestHelper.addContainer(dispatcher, pipeline); - ContainerProtos.WriteChunkRequestProto writeChunk = - UpgradeTestHelper.putBlock(dispatcher, containerID, pipeline); - UpgradeTestHelper.readChunk(dispatcher, writeChunk, pipeline); - - checkPreFinalizedVolumePathID(volume, originalScmID, CLUSTER_ID); - checkContainerPathID(containerID, originalScmID, CLUSTER_ID); - - // FINALIZE: With failed volume /// - - failVolume(volume); - // Since volume is failed, container should be marked unhealthy. - // Finalization should proceed anyways. - UpgradeTestHelper.closeContainer(dispatcher, containerID, pipeline, - ContainerProtos.Result.CONTAINER_FILES_CREATE_ERROR); - State containerState = dsm.getContainer().getContainerSet() - .getContainer(containerID).getContainerState(); - assertEquals(State.UNHEALTHY, containerState); - dsm.finalizeUpgrade(); - LambdaTestUtils.await(2000, 500, - () -> dsm.getLayoutVersionManager() - .isAllowed(HDDSLayoutFeature.SCM_HA)); - - /// FINALIZED: Volume marked failed but gets restored on disk /// - - // Check that volume is marked failed during finalization. - assertEquals(0, - dsm.getContainer().getVolumeSet().getVolumesList().size()); - assertEquals(1, - dsm.getContainer().getVolumeSet().getFailedVolumesList().size()); - - // Since the volume was out during the upgrade, it should maintain its - // original format. - checkPreFinalizedVolumePathID(volume, originalScmID, CLUSTER_ID); - checkContainerPathID(containerID, originalScmID, CLUSTER_ID); - - // Now that we are done finalizing, restore the volume. - restoreVolume(volume); - // After restoring the failed volume, its containers are readable again. - // However, since it is marked as failed no containers can be created or - // imported to it. - // This should log a warning about reading from an unhealthy container - // but otherwise proceed successfully. - UpgradeTestHelper.readChunk(dispatcher, writeChunk, pipeline); - - /// FINALIZED: Restart datanode to upgrade the failed volume /// - - dsm = UpgradeTestHelper.restartDatanode(conf, dsm, true, tempFolder, address, - HDDSLayoutFeature.SCM_HA.layoutVersion(), false); - dispatcher = dsm.getContainer().getDispatcher(); - - assertEquals(1, - dsm.getContainer().getVolumeSet().getVolumesList().size()); - assertEquals(0, - dsm.getContainer().getVolumeSet().getFailedVolumesList().size()); - - checkFinalizedVolumePathID(volume, originalScmID, CLUSTER_ID); - checkContainerPathID(containerID, originalScmID, CLUSTER_ID); - - // Read container from before upgrade. The upgrade required it to be closed. - UpgradeTestHelper.readChunk(dispatcher, writeChunk, pipeline); - // Write and read container after upgrade. - long newContainerID = UpgradeTestHelper.addContainer(dispatcher, pipeline); - ContainerProtos.WriteChunkRequestProto newWriteChunk = - UpgradeTestHelper.putBlock(dispatcher, newContainerID, pipeline); - UpgradeTestHelper.readChunk(dispatcher, newWriteChunk, pipeline); - // The new container should use cluster ID in its path. - // The volume it is placed on is up to the implementation. - checkContainerPathID(newContainerID, CLUSTER_ID); - } - - @ParameterizedTest(name = "{index}: scmHAAlreadyEnabled={0}") - @ValueSource(booleans = {true, false}) - public void testFormattingNewVolumes(boolean enableSCMHA) throws Exception { - setScmHAEnabled(enableSCMHA); - /// SETUP /// - - startScmServer(); - String originalScmID = scmServerImpl.getScmId(); - File preFinVolume1 = UpgradeTestHelper.addHddsVolume(conf, tempFolder); - dsm = UpgradeTestHelper.startPreFinalizedDatanode(conf, tempFolder, dsm, address, - HDDSLayoutFeature.INITIAL_VERSION.layoutVersion()); - dispatcher = dsm.getContainer().getDispatcher(); - final Pipeline pipeline = MockPipeline.createPipeline( - Collections.singletonList(dsm.getDatanodeDetails())); - - /// PRE-FINALIZED: Write and Read from formatted volume /// - - assertEquals(1, - dsm.getContainer().getVolumeSet().getVolumesList().size()); - assertEquals(0, - dsm.getContainer().getVolumeSet().getFailedVolumesList().size()); - - // Add container with data, make sure it can be read and written. - final long containerID = UpgradeTestHelper.addContainer(dispatcher, pipeline); - ContainerProtos.WriteChunkRequestProto writeChunk = - UpgradeTestHelper.putBlock(dispatcher, containerID, pipeline); - UpgradeTestHelper.readChunk(dispatcher, writeChunk, pipeline); - - checkPreFinalizedVolumePathID(preFinVolume1, originalScmID, CLUSTER_ID); - checkContainerPathID(containerID, originalScmID, CLUSTER_ID); - - /// PRE-FINALIZED: Restart with SCM HA enabled and new SCM ID /// - - // Now SCM and enough other DNs finalize to enable SCM HA. This DN is - // restarted with SCM HA config and gets a different SCM ID. - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); - changeScmID(); - // A new volume is added that must be formatted. - File preFinVolume2 = UpgradeTestHelper.addHddsVolume(conf, tempFolder); - - dsm = UpgradeTestHelper.restartDatanode(conf, dsm, true, tempFolder, address, - HDDSLayoutFeature.INITIAL_VERSION.layoutVersion(), true); - dispatcher = dsm.getContainer().getDispatcher(); - - assertEquals(2, - dsm.getContainer().getVolumeSet().getVolumesList().size()); - assertEquals(0, - dsm.getContainer().getVolumeSet().getFailedVolumesList().size()); - - // Because DN mlv would be behind SCM mlv, only reads are allowed. - UpgradeTestHelper.readChunk(dispatcher, writeChunk, pipeline); - - // On restart, there should have been no changes to the paths already used. - checkPreFinalizedVolumePathID(preFinVolume1, originalScmID, CLUSTER_ID); - checkContainerPathID(containerID, originalScmID, CLUSTER_ID); - // No new containers can be created on this volume since SCM MLV is ahead - // of DN MLV at this point. - // cluster ID should always be used for the new volume since SCM HA is now - // enabled. - checkVolumePathID(preFinVolume2, CLUSTER_ID); - - /// FINALIZE /// - - UpgradeTestHelper.closeContainer(dispatcher, containerID, pipeline); - dsm.finalizeUpgrade(); - LambdaTestUtils.await(2000, 500, - () -> dsm.getLayoutVersionManager() - .isAllowed(HDDSLayoutFeature.SCM_HA)); - - /// FINALIZED: Add a new volume and check its formatting /// - - // Add a new volume that should be formatted with cluster ID only, since - // DN has finalized. - File finVolume = UpgradeTestHelper.addHddsVolume(conf, tempFolder); - // Yet another SCM ID is received this time, but it should not matter. - changeScmID(); - - dsm = UpgradeTestHelper.restartDatanode(conf, dsm, true, tempFolder, address, - HDDSLayoutFeature.SCM_HA.layoutVersion(), false); - dispatcher = dsm.getContainer().getDispatcher(); - - assertEquals(3, - dsm.getContainer().getVolumeSet().getVolumesList().size()); - assertEquals(0, - dsm.getContainer().getVolumeSet().getFailedVolumesList().size()); - - checkFinalizedVolumePathID(preFinVolume1, originalScmID, CLUSTER_ID); - checkVolumePathID(preFinVolume2, CLUSTER_ID); - checkContainerPathID(containerID, originalScmID, CLUSTER_ID); - // New volume should have been formatted with cluster ID only, since the - // datanode is finalized. - checkVolumePathID(finVolume, CLUSTER_ID); - - /// FINALIZED: Read old data and write + read new data /// - - // Read container from before upgrade. The upgrade required it to be closed. - UpgradeTestHelper.readChunk(dispatcher, writeChunk, pipeline); - // Write and read container after upgrade. - long newContainerID = UpgradeTestHelper.addContainer(dispatcher, pipeline); - ContainerProtos.WriteChunkRequestProto newWriteChunk = - UpgradeTestHelper.putBlock(dispatcher, newContainerID, pipeline); - UpgradeTestHelper.readChunk(dispatcher, newWriteChunk, pipeline); - // The new container should use cluster ID in its path. - // The volume it is placed on is up to the implementation. - checkContainerPathID(newContainerID, CLUSTER_ID); - } - - /// CHECKS FOR TESTING /// - - public void checkContainerPathID(long containerID, String scmID, - String clusterID) { - if (scmHAAlreadyEnabled) { - checkContainerPathID(containerID, clusterID); - } else { - checkContainerPathID(containerID, scmID); - } - } - - public void checkContainerPathID(long containerID, String expectedID) { - KeyValueContainerData data = - (KeyValueContainerData) dsm.getContainer().getContainerSet() - .getContainer(containerID).getContainerData(); - assertThat(data.getChunksPath()).contains(expectedID); - assertThat(data.getMetadataPath()).contains(expectedID); - } - - public void checkFinalizedVolumePathID(File volume, String scmID, - String clusterID) throws Exception { - - if (scmHAAlreadyEnabled) { - checkVolumePathID(volume, clusterID); - } else { - List subdirs = getHddsSubdirs(volume); - File hddsRoot = getHddsRoot(volume); - - // Volume should have SCM ID and cluster ID directory, where cluster ID - // is a symlink to SCM ID. - assertEquals(2, subdirs.size()); - - File scmIDDir = new File(hddsRoot, scmID); - assertThat(subdirs).contains(scmIDDir); - - File clusterIDDir = new File(hddsRoot, CLUSTER_ID); - assertThat(subdirs).contains(clusterIDDir); - assertTrue(Files.isSymbolicLink(clusterIDDir.toPath())); - Path symlinkTarget = Files.readSymbolicLink(clusterIDDir.toPath()); - assertEquals(scmID, symlinkTarget.toString()); - } - } - - public void checkPreFinalizedVolumePathID(File volume, String scmID, - String clusterID) { - - if (scmHAAlreadyEnabled) { - checkVolumePathID(volume, clusterID); - } else { - checkVolumePathID(volume, scmID); - } - - } - - public void checkVolumePathID(File volume, String expectedID) { - List subdirs; - File hddsRoot; - if (dnThinksVolumeFailed(volume)) { - // If the volume is failed, read from the failed location it was - // moved to. - subdirs = getHddsSubdirs(getFailedVolume(volume)); - hddsRoot = getHddsRoot(getFailedVolume(volume)); - } else { - subdirs = getHddsSubdirs(volume); - hddsRoot = getHddsRoot(volume); - } - - // Volume should only have the specified ID directory. - assertEquals(1, subdirs.size()); - File idDir = new File(hddsRoot, expectedID); - assertThat(subdirs).contains(idDir); - } - - public List getHddsSubdirs(File volume) { - File[] subdirsArray = getHddsRoot(volume).listFiles(File::isDirectory); - assertNotNull(subdirsArray); - return Arrays.asList(subdirsArray); - } - - public File getHddsRoot(File volume) { - return new File(HddsVolumeUtil.getHddsRoot(volume.getAbsolutePath())); - } - - /// CLUSTER OPERATIONS /// - - private void startScmServer() throws Exception { - scmServerImpl = new ScmTestMock(CLUSTER_ID); - scmRpcServer = SCMTestUtils.startScmRpcServer(conf, - scmServerImpl, address, 10); - } - - /** - * Updates the SCM ID on the SCM server. Datanode will not be aware of this - * until {@link UpgradeTestHelper#callVersionEndpointTask} is called. - * @return the new scm ID. - */ - private String changeScmID() { - String scmID = UUID.randomUUID().toString(); - scmServerImpl.setScmId(scmID); - return scmID; - } - - /// CONTAINER OPERATIONS /// - - /** - * Exports the specified container to a temporary file and returns the file. - */ - private File exportContainer(long containerId) throws Exception { - final ContainerReplicationSource replicationSource = - new OnDemandContainerReplicationSource( - dsm.getContainer().getController()); - - replicationSource.prepare(containerId); - - File destination = - Files.createFile(tempFolder.resolve("destFile" + containerId)).toFile(); - try (FileOutputStream fos = new FileOutputStream(destination)) { - replicationSource.copyData(containerId, fos, NO_COMPRESSION); - } - return destination; - } - - /** - * Imports the container found in {@code source} to the datanode with the ID - * {@code containerID}. - */ - private void importContainer(long containerID, File source) throws Exception { - ContainerImporter replicator = - new ContainerImporter(dsm.getConf(), - dsm.getContainer().getContainerSet(), - dsm.getContainer().getController(), - dsm.getContainer().getVolumeSet()); - - File tempFile = Files.createFile( - tempFolder.resolve(ContainerUtils.getContainerTarName(containerID))) - .toFile(); - Files.copy(source.toPath(), tempFile.toPath(), - StandardCopyOption.REPLACE_EXISTING); - replicator.importContainer(containerID, tempFile.toPath(), null, - NO_COMPRESSION); - } - - /// VOLUME OPERATIONS /// - - /** - * Renames the specified volume directory so it will appear as failed to - * the datanode. - */ - public void failVolume(File volume) { - File failedVolume = getFailedVolume(volume); - assertTrue(volume.renameTo(failedVolume)); - } - - /** - * Convert the specified volume from its failed name back to its original - * name. The File passed should be the original volume path, not the one it - * was renamed to to fail it. - */ - public void restoreVolume(File volume) { - File failedVolume = getFailedVolume(volume); - assertTrue(failedVolume.renameTo(volume)); - } - - /** - * @return The file name that will be used to rename a volume to fail it. - */ - public File getFailedVolume(File volume) { - return new File(volume.getParent(), volume.getName() + "-failed"); - } - - /** - * Checks whether the datanode thinks the volume has failed. - * This could be outdated information if the volume was restored already - * and the datanode has not been restarted since then. - */ - public boolean dnThinksVolumeFailed(File volume) { - return dsm.getContainer().getVolumeSet().getFailedVolumesList().stream() - .anyMatch(v -> - getHddsRoot(v.getStorageDir()).equals(getHddsRoot(volume))); - } -} diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestDeletedBlockLog.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestDeletedBlockLog.java index 2a012cbe180..4fb323d7451 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestDeletedBlockLog.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestDeletedBlockLog.java @@ -26,7 +26,6 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; -import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.HddsTestUtils; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerManager; @@ -112,7 +111,6 @@ public class TestDeletedBlockLog { @BeforeEach public void setup() throws Exception { conf = new OzoneConfiguration(); - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); conf.setInt(OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 20); conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, testDir.getAbsolutePath()); replicationManager = mock(ReplicationManager.class); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMHAConfiguration.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMConfiguration.java similarity index 80% rename from hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMHAConfiguration.java rename to hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMConfiguration.java index 75a943ee8da..2d9a18c5a8e 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMHAConfiguration.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMConfiguration.java @@ -18,13 +18,11 @@ package org.apache.hadoop.hdds.scm.ha; import org.apache.hadoop.hdds.HddsConfigKeys; -import org.apache.hadoop.hdds.conf.ConfigurationException; import org.apache.hadoop.hdds.conf.DefaultConfigManager; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.ScmRatisServerConfig; import org.apache.hadoop.hdds.scm.server.SCMStorageConfig; -import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.hdds.utils.HddsServerUtil; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.ozone.common.Storage; @@ -35,13 +33,10 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.ValueSource; import java.io.File; import java.io.IOException; import java.net.InetSocketAddress; -import java.util.UUID; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_ADDRESS_KEY; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_BLOCK_CLIENT_ADDRESS_KEY; @@ -63,8 +58,6 @@ import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_SECURITY_SERVICE_PORT_KEY; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_METADATA_DIRS; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -72,7 +65,7 @@ /** * Test for SCM HA-related configuration. */ -class TestSCMHAConfiguration { +class TestSCMConfiguration { private OzoneConfiguration conf; @TempDir private File tempDir; @@ -85,7 +78,7 @@ void setup() { } @Test - public void testSCMHAConfig() throws Exception { + public void testSCMConfig() throws Exception { String scmServiceId = "scmserviceId"; conf.set(ScmConfigKeys.OZONE_SCM_SERVICE_IDS_KEY, scmServiceId); @@ -225,7 +218,7 @@ public void testSCMHAConfig() throws Exception { @Test - public void testHAWithSamePortConfig() throws Exception { + public void testSamePortConfig() throws Exception { String scmServiceId = "scmserviceId"; conf.set(ScmConfigKeys.OZONE_SCM_SERVICE_IDS_KEY, scmServiceId); @@ -301,25 +294,7 @@ public void testHAWithSamePortConfig() throws Exception { } @Test - public void testRatisEnabledDefaultConfigWithoutInitializedSCM() - throws IOException { - SCMStorageConfig scmStorageConfig = mock(SCMStorageConfig.class); - when(scmStorageConfig.getState()).thenReturn(Storage.StorageState.NOT_INITIALIZED); - SCMHANodeDetails.loadSCMHAConfig(conf, scmStorageConfig); - assertEquals(SCMHAUtils.isSCMHAEnabled(conf), - ScmConfigKeys.OZONE_SCM_HA_ENABLE_DEFAULT); - DefaultConfigManager.clearDefaultConfigs(); - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, false); - SCMHANodeDetails.loadSCMHAConfig(conf, scmStorageConfig); - assertFalse(SCMHAUtils.isSCMHAEnabled(conf)); - DefaultConfigManager.clearDefaultConfigs(); - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); - SCMHANodeDetails.loadSCMHAConfig(conf, scmStorageConfig); - assertTrue(SCMHAUtils.isSCMHAEnabled(conf)); - } - - @Test - public void testRatisEnabledDefaultConfigWithInitializedSCM() + public void testDefaultConfigWithInitializedSCM() throws IOException { SCMStorageConfig scmStorageConfig = mock(SCMStorageConfig.class); when(scmStorageConfig.getState()) @@ -333,44 +308,4 @@ public void testRatisEnabledDefaultConfigWithInitializedSCM() DefaultConfigManager.clearDefaultConfigs(); assertTrue(SCMHAUtils.isSCMHAEnabled(conf)); } - - @Test - public void testRatisEnabledDefaultConflictConfigWithInitializedSCM() { - SCMStorageConfig scmStorageConfig = mock(SCMStorageConfig.class); - when(scmStorageConfig.getState()) - .thenReturn(Storage.StorageState.INITIALIZED); - when(scmStorageConfig.isSCMHAEnabled()).thenReturn(true); - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, false); - assertThrows(ConfigurationException.class, - () -> SCMHANodeDetails.loadSCMHAConfig(conf, scmStorageConfig)); - } - - @ParameterizedTest - @ValueSource(booleans = {true, false}) - void testHAConfig(boolean ratisEnabled) throws IOException { - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, ratisEnabled); - SCMStorageConfig scmStorageConfig = newStorageConfig(ratisEnabled); - StorageContainerManager.scmInit(conf, scmStorageConfig.getClusterID()); - assertEquals(ratisEnabled, DefaultConfigManager.getValue( - ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, !ratisEnabled)); - } - - @Test - void testInvalidHAConfig() throws IOException { - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, false); - SCMStorageConfig scmStorageConfig = newStorageConfig(true); - String clusterID = scmStorageConfig.getClusterID(); - assertThrows(ConfigurationException.class, - () -> StorageContainerManager.scmInit(conf, clusterID)); - } - - private SCMStorageConfig newStorageConfig( - boolean ratisEnabled) throws IOException { - final SCMStorageConfig scmStorageConfig = new SCMStorageConfig(conf); - scmStorageConfig.setClusterId(UUID.randomUUID().toString()); - scmStorageConfig.setSCMHAFlag(ratisEnabled); - scmStorageConfig.initialize(); - return scmStorageConfig; - } - } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestStatefulServiceStateManagerImpl.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestStatefulServiceStateManagerImpl.java index 4e69f46b6e9..33da298423d 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestStatefulServiceStateManagerImpl.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestStatefulServiceStateManagerImpl.java @@ -20,7 +20,6 @@ import com.google.protobuf.ByteString; import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.metadata.SCMDBDefinition; import org.apache.hadoop.hdds.utils.db.DBStore; import org.apache.hadoop.hdds.utils.db.DBStoreBuilder; @@ -48,7 +47,6 @@ public class TestStatefulServiceStateManagerImpl { @BeforeEach void setup(@TempDir File testDir) throws IOException { conf = SCMTestUtils.getConf(testDir); - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); dbStore = DBStoreBuilder.createDBStore(conf, SCMDBDefinition.get()); statefulServiceConfig = SCMDBDefinition.STATEFUL_SERVICE_CONFIG.getTable(dbStore); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java index 568c11c541c..d92934cde8d 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java @@ -77,6 +77,7 @@ import org.apache.hadoop.util.Time; import org.apache.ozone.test.GenericTestUtils; import org.apache.hadoop.test.PathUtils; +import org.apache.ozone.test.tag.Unhealthy; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -277,6 +278,7 @@ public void testGetLastHeartbeatTimeDiff() throws Exception { * @throws TimeoutException */ @Test + @Unhealthy("HDDS-11986") public void testScmLayoutOnHeartbeat() throws Exception { OzoneConfiguration conf = getConf(); conf.setTimeDuration(ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_INTERVAL, @@ -394,6 +396,7 @@ private void assertPipelineClosedAfterLayoutHeartbeat( * @throws TimeoutException */ @Test + @Unhealthy("HDDS-11986") public void testScmLayoutOnRegister() throws Exception { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/upgrade/TestSCMHAUnfinalizedStateValidationAction.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/upgrade/TestSCMHAUnfinalizedStateValidationAction.java index 8b4bc906e0d..91dfaa1dafb 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/upgrade/TestSCMHAUnfinalizedStateValidationAction.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/upgrade/TestSCMHAUnfinalizedStateValidationAction.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hdds.scm.upgrade; -import org.apache.hadoop.hdds.conf.ConfigurationException; import org.apache.hadoop.hdds.conf.DefaultConfigManager; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.scm.HddsTestUtils; @@ -26,19 +25,16 @@ import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.hdds.upgrade.HDDSLayoutFeature; import org.apache.hadoop.ozone.OzoneConfigKeys; -import org.apache.hadoop.ozone.upgrade.UpgradeException; import org.apache.hadoop.ozone.upgrade.UpgradeFinalizer; import org.apache.ratis.util.ExitUtils; import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.CsvSource; import java.nio.file.Path; import java.util.UUID; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; /** @@ -62,20 +58,12 @@ public static void setup() { ExitUtils.disableSystemExit(); } - @ParameterizedTest - @CsvSource({ - "true, true", - "true, false", - "false, true", - "false, false", - }) - public void testUpgrade(boolean haEnabledBefore, - boolean haEnabledPreFinalized, @TempDir Path dataPath) throws Exception { + @Test + public void testUpgrade(@TempDir Path dataPath) throws Exception { // Write version file for original version. OzoneConfiguration conf = new OzoneConfiguration(); conf.setInt(ScmConfig.ConfigStrings.HDDS_SCM_INIT_DEFAULT_LAYOUT_VERSION, HDDSLayoutFeature.INITIAL_VERSION.layoutVersion()); - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, haEnabledBefore); conf.set(ScmConfigKeys.OZONE_SCM_DB_DIRS, dataPath.toString()); conf.set(OzoneConfigKeys.OZONE_METADATA_DIRS, dataPath.toString()); // This init should always succeed, since SCM is not pre-finalized yet. @@ -83,43 +71,17 @@ public void testUpgrade(boolean haEnabledBefore, boolean initResult1 = StorageContainerManager.scmInit(conf, CLUSTER_ID); assertTrue(initResult1); - // Set up new pre-finalized SCM. - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, - haEnabledPreFinalized); - /* Clusters from Ratis SCM -> Non Ratis SCM - Ratis SCM -> Non Ratis SCM not supported - */ - if (haEnabledPreFinalized != haEnabledBefore) { - if (haEnabledBefore) { - assertThrows(ConfigurationException.class, - () -> StorageContainerManager.scmInit(conf, CLUSTER_ID)); - } else { - assertThrows(UpgradeException.class, - () -> StorageContainerManager.scmInit(conf, CLUSTER_ID)); - } - return; - } StorageContainerManager scm = HddsTestUtils.getScm(conf); assertEquals(UpgradeFinalizer.Status.FINALIZATION_REQUIRED, scm.getFinalizationManager().getUpgradeFinalizer().getStatus()); - final boolean shouldFail = !haEnabledBefore && haEnabledPreFinalized; + DefaultConfigManager.clearDefaultConfigs(); - if (shouldFail) { - // Start on its own should fail. - assertThrows(UpgradeException.class, scm::start); + boolean initResult2 = StorageContainerManager.scmInit(conf, CLUSTER_ID); + assertTrue(initResult2); + scm.start(); + scm.stop(); - // Init followed by start should both fail. - // Init is not necessary here, but is allowed to be run. - assertThrows(UpgradeException.class, - () -> StorageContainerManager.scmInit(conf, CLUSTER_ID)); - assertThrows(UpgradeException.class, scm::start); - } else { - boolean initResult2 = StorageContainerManager.scmInit(conf, CLUSTER_ID); - assertTrue(initResult2); - scm.start(); - scm.stop(); - } } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestSCMInstallSnapshot.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestSCMInstallSnapshot.java index e90c576e8dd..5027f7fc37b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestSCMInstallSnapshot.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestSCMInstallSnapshot.java @@ -66,7 +66,6 @@ public class TestSCMInstallSnapshot { @BeforeAll static void setup(@TempDir Path tempDir) throws Exception { conf = new OzoneConfiguration(); - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); conf.set(ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_INTERVAL, "10s"); conf.setLong(ScmConfigKeys.OZONE_SCM_HA_RATIS_SNAPSHOT_THRESHOLD, 1L); conf.set(ScmConfigKeys.OZONE_SCM_HA_RATIS_SNAPSHOT_DIR, tempDir.toString()); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestSCMSnapshot.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestSCMSnapshot.java index 0375d83baaf..d0ad8222f60 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestSCMSnapshot.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestSCMSnapshot.java @@ -44,7 +44,6 @@ public class TestSCMSnapshot { @BeforeAll public static void setup() throws Exception { conf = new OzoneConfiguration(); - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); conf.set(ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_INTERVAL, "10s"); conf.setLong(ScmConfigKeys.OZONE_SCM_HA_RATIS_SNAPSHOT_THRESHOLD, 1L); cluster = MiniOzoneCluster diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java index 94c8f914294..a66d6aba30a 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java @@ -92,12 +92,12 @@ import org.apache.hadoop.ozone.protocol.commands.DeleteBlocksCommand; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.security.authentication.client.AuthenticationException; import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.util.Time; import org.apache.log4j.Level; import org.apache.log4j.LogManager; import org.apache.ozone.test.GenericTestUtils; +import org.apache.ozone.test.tag.Unhealthy; import org.apache.ratis.conf.RaftProperties; import org.apache.ratis.protocol.RaftGroupId; import org.apache.ratis.server.RaftServerConfigKeys; @@ -141,16 +141,14 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_CREATION; import static org.apache.hadoop.hdds.scm.HddsTestUtils.mockRemoteUser; import static org.apache.hadoop.hdds.scm.HddsWhiteboxTestUtils.setInternalState; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; -import static org.apache.ozone.test.GenericTestUtils.PortAllocator.getFreePort; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertInstanceOf; import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.any; @@ -164,6 +162,7 @@ * Test class that exercises the StorageContainerManager. */ @Timeout(900) +@Unhealthy public class TestStorageContainerManager { private static final String LOCALHOST_IP = "127.0.0.1"; private static XceiverClientManager xceiverClientManager; @@ -367,7 +366,9 @@ public void testBlockDeletionTransactions() throws Exception { @Test public void testOldDNRegistersToReInitialisedSCM() throws Exception { + LogManager.getLogger(HeartbeatEndpointTask.class).setLevel(Level.DEBUG); OzoneConfiguration conf = new OzoneConfiguration(); + conf.setBoolean(OZONE_SCM_HA_ENABLE_KEY, true); conf.setTimeDuration(HDDS_HEARTBEAT_INTERVAL, 1000, TimeUnit.MILLISECONDS); conf.setTimeDuration(ScmConfigKeys.OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, 3000, TimeUnit.MILLISECONDS); @@ -402,7 +403,6 @@ public void testOldDNRegistersToReInitialisedSCM() throws Exception { GenericTestUtils.LogCapturer scmDnHBDispatcherLog = GenericTestUtils.LogCapturer.captureLogs( SCMDatanodeHeartbeatDispatcher.LOG); - LogManager.getLogger(HeartbeatEndpointTask.class).setLevel(Level.DEBUG); GenericTestUtils.LogCapturer heartbeatEndpointTaskLog = GenericTestUtils.LogCapturer.captureLogs(HeartbeatEndpointTask.LOG); GenericTestUtils.LogCapturer versionEndPointTaskLog = @@ -559,6 +559,7 @@ private Map> createDeleteTXLog( @Test public void testSCMInitialization(@TempDir Path tempDir) throws Exception { OzoneConfiguration conf = new OzoneConfiguration(); + conf.set(ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_INTERVAL, "10s"); Path scmPath = tempDir.resolve("scm-meta"); conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, scmPath.toString()); @@ -574,21 +575,6 @@ public void testSCMInitialization(@TempDir Path tempDir) throws Exception { assertEquals(NodeType.SCM, scmStore.getNodeType()); assertEquals(testClusterId, scmStore.getClusterID()); assertTrue(scmStore.isSCMHAEnabled()); - } - - @Test - public void testSCMInitializationWithHAEnabled(@TempDir Path tempDir) throws Exception { - OzoneConfiguration conf = new OzoneConfiguration(); - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); - conf.set(ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_INTERVAL, "10s"); - Path scmPath = tempDir.resolve("scm-meta"); - conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, scmPath.toString()); - - final UUID clusterId = UUID.randomUUID(); - // This will initialize SCM - StorageContainerManager.scmInit(conf, clusterId.toString()); - SCMStorageConfig scmStore = new SCMStorageConfig(conf); - assertTrue(scmStore.isSCMHAEnabled()); validateRatisGroupExists(conf, clusterId.toString()); } @@ -953,35 +939,6 @@ public void testIncrementalContainerReportQueue() throws Exception { containerReportExecutors.close(); } - @Test - public void testNonRatisToRatis() - throws IOException, AuthenticationException, InterruptedException, - TimeoutException { - final OzoneConfiguration conf = new OzoneConfiguration(); - try (MiniOzoneCluster cluster = MiniOzoneCluster.newBuilder(conf) - .setNumDatanodes(3) - .build()) { - final StorageContainerManager nonRatisSCM = cluster - .getStorageContainerManager(); - assertNull(nonRatisSCM.getScmHAManager().getRatisServer()); - assertFalse(nonRatisSCM.getScmStorageConfig().isSCMHAEnabled()); - nonRatisSCM.stop(); - nonRatisSCM.join(); - - DefaultConfigManager.clearDefaultConfigs(); - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); - StorageContainerManager.scmInit(conf, cluster.getClusterId()); - conf.setInt(ScmConfigKeys.OZONE_SCM_DATANODE_PORT_KEY, getFreePort()); - conf.unset(ScmConfigKeys.OZONE_SCM_DATANODE_ADDRESS_KEY); - cluster.restartStorageContainerManager(false); - - final StorageContainerManager ratisSCM = cluster - .getStorageContainerManager(); - assertNotNull(ratisSCM.getScmHAManager().getRatisServer()); - assertTrue(ratisSCM.getScmStorageConfig().isSCMHAEnabled()); - } - } - private void addTransactions(StorageContainerManager scm, DeletedBlockLog delLog, Map> containerBlocksMap) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/metrics/TestSCMContainerManagerMetrics.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/metrics/TestSCMContainerManagerMetrics.java index 14875781b98..84b1f1610a1 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/metrics/TestSCMContainerManagerMetrics.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/metrics/TestSCMContainerManagerMetrics.java @@ -68,6 +68,7 @@ public void setup() throws Exception { conf.setBoolean(HDDS_SCM_SAFEMODE_PIPELINE_CREATION, false); cluster = MiniOzoneCluster.newBuilder(conf).setNumDatanodes(1).build(); cluster.waitForClusterToBeReady(); + cluster.waitForPipelineTobeReady(HddsProtos.ReplicationFactor.ONE, 30000); client = cluster.newClient(); scm = cluster.getStorageContainerManager(); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/node/TestDecommissionAndMaintenance.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/node/TestDecommissionAndMaintenance.java index 100ea9394a9..80b3748708e 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/node/TestDecommissionAndMaintenance.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/node/TestDecommissionAndMaintenance.java @@ -43,6 +43,7 @@ import org.apache.hadoop.ozone.client.OzoneClient; import org.apache.ozone.test.GenericTestUtils; import org.apache.ozone.test.tag.Flaky; +import org.apache.ozone.test.tag.Unhealthy; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeEach; @@ -93,6 +94,7 @@ * Test from the scmclient for decommission and maintenance. */ @Flaky({"HDDS-6028", "HDDS-6049"}) +@Unhealthy public class TestDecommissionAndMaintenance { private static final Logger LOG = LoggerFactory.getLogger(TestDecommissionAndMaintenance.class); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java index 1b7eb837cf8..e213af3db16 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java @@ -78,6 +78,7 @@ import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.ozone.test.GenericTestUtils; +import org.apache.ozone.test.tag.Unhealthy; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; @@ -125,6 +126,7 @@ /** * This class tests container commands on EC containers. */ +@Unhealthy public class TestContainerCommandsEC { private static final String ANY_USER = "any"; diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java index 30e41764d3f..0b60708f10b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java @@ -40,7 +40,6 @@ import com.amazonaws.regions.Regions; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3ClientBuilder; -import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.annotation.InterfaceAudience; import org.apache.hadoop.hdds.client.RatisReplicationConfig; @@ -50,7 +49,6 @@ import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.HddsTestUtils; import org.apache.hadoop.hdds.scm.ha.SCMHANodeDetails; -import org.apache.hadoop.hdds.scm.ha.SCMHAUtils; import org.apache.hadoop.hdds.scm.ha.SCMRatisServerImpl; import org.apache.hadoop.hdds.scm.node.NodeStatus; import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException; @@ -191,10 +189,8 @@ protected void setConf(OzoneConfiguration newConf) { public void waitForSCMToBeReady() throws TimeoutException, InterruptedException { - if (SCMHAUtils.isSCMHAEnabled(conf)) { - GenericTestUtils.waitFor(scm::checkLeader, - 1000, waitForClusterToBeReadyTimeout); - } + GenericTestUtils.waitFor(scm::checkLeader, + 1000, waitForClusterToBeReadyTimeout); } public StorageContainerManager getActiveSCM() { @@ -755,15 +751,13 @@ protected void initializeScmStorage(SCMStorageConfig scmStore) //TODO: HDDS-6897 //Disabling Ratis for only of MiniOzoneClusterImpl. //MiniOzoneClusterImpl doesn't work with Ratis enabled SCM - if (StringUtils.isNotEmpty( - conf.get(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY)) - && SCMHAUtils.isSCMHAEnabled(conf)) { - scmStore.setSCMHAFlag(true); - scmStore.persistCurrentState(); - SCMRatisServerImpl.initialize(clusterId, scmId, - SCMHANodeDetails.loadSCMHAConfig(conf, scmStore) - .getLocalNodeDetails(), conf); - } + + scmStore.setSCMHAFlag(true); + scmStore.persistCurrentState(); + SCMRatisServerImpl.initialize(clusterId, scmId, + SCMHANodeDetails.loadSCMHAConfig(conf, scmStore) + .getLocalNodeDetails(), conf); + } void initializeOmStorage(OMStorage omStorage) throws IOException { @@ -876,6 +870,8 @@ protected void configureSCM() { localhostWithFreePort()); conf.set(HddsConfigKeys.HDDS_SCM_WAIT_TIME_AFTER_SAFE_MODE_EXIT, "3s"); + conf.setInt(ScmConfigKeys.OZONE_SCM_RATIS_PORT_KEY, getFreePort()); + conf.setInt(ScmConfigKeys.OZONE_SCM_GRPC_PORT_KEY, getFreePort()); } private void configureOM() { diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java index 9df70f1b7c2..15269330282 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java @@ -562,7 +562,6 @@ protected SCMHAService createSCMService() OzoneConfiguration scmConfig = new OzoneConfiguration(conf); scmConfig.set(OZONE_METADATA_DIRS, metaDirPath); scmConfig.set(ScmConfigKeys.OZONE_SCM_NODE_ID_KEY, nodeId); - scmConfig.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); configureSCM(); if (i == 1) { diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachine.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachine.java index dc00b0acc55..7c1c6874c1b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachine.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachine.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hdds.client.ReplicationType; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.OzoneClientConfig; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClientTestImpl; @@ -114,6 +115,7 @@ public void setup() throws Exception { .build(); cluster.setWaitForClusterToBeReadyTimeout(300000); cluster.waitForClusterToBeReady(); + cluster.waitForPipelineTobeReady(HddsProtos.ReplicationFactor.ONE, 30000); cluster.getOzoneManager().startSecretManager(); //the easiest way to create an open container is creating a key client = OzoneClientFactory.getRpcClient(conf); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestDeleteWithInAdequateDN.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestDeleteWithInAdequateDN.java index bb42d8a0f57..72b1bff9371 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestDeleteWithInAdequateDN.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestDeleteWithInAdequateDN.java @@ -65,6 +65,7 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_INTERVAL; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_DESTROY_TIMEOUT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; @@ -73,6 +74,8 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertThrows; + +import org.apache.ozone.test.tag.Unhealthy; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assumptions; import org.junit.jupiter.api.BeforeAll; @@ -81,6 +84,7 @@ /** * Tests delete key operation with inadequate datanodes. */ +@Unhealthy public class TestDeleteWithInAdequateDN { private static MiniOzoneCluster cluster; @@ -103,6 +107,7 @@ public static void init() throws Exception { conf = new OzoneConfiguration(); + conf.setBoolean(OZONE_SCM_HA_ENABLE_KEY, false); conf.setTimeDuration(HDDS_HEARTBEAT_INTERVAL, 100, TimeUnit.MILLISECONDS); conf.setTimeDuration(HDDS_CONTAINER_REPORT_INTERVAL, 200, diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java index cf7d26847bb..c6d7b4ad983 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java @@ -80,6 +80,7 @@ import org.apache.ozone.test.GenericTestUtils; import org.apache.ozone.test.GenericTestUtils.LogCapturer; import org.apache.ozone.test.tag.Flaky; +import org.apache.ozone.test.tag.Unhealthy; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -95,6 +96,7 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_EXPIRED_CONTAINER_REPLICA_OP_SCRUB_INTERVAL; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; @@ -110,6 +112,7 @@ /** * Tests for Block deletion. */ +@Unhealthy public class TestBlockDeletion { public static final Logger LOG = @@ -133,6 +136,7 @@ public void init() throws Exception { GenericTestUtils.setLogLevel(SCMBlockDeletingService.LOG, Level.DEBUG); GenericTestUtils.setLogLevel(ReplicationManager.LOG, Level.DEBUG); + conf.setBoolean(OZONE_SCM_HA_ENABLE_KEY, false); conf.set("ozone.replication.allowed-configs", "^(RATIS/THREE)|(EC/2-1-256k)$"); conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 100, diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java index 192c933f53c..dde17404ae1 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java @@ -56,6 +56,7 @@ import org.apache.hadoop.ozone.protocol.commands.DeleteContainerCommand; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; import org.apache.ozone.test.GenericTestUtils; +import org.apache.ozone.test.tag.Unhealthy; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -75,6 +76,7 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.ONE; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_RATIS_VOLUME_FREE_SPACE_MIN; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -85,6 +87,7 @@ * Tests DeleteContainerCommand Handler. */ @Timeout(300) +@Unhealthy public class TestDeleteContainerHandler { private static OzoneClient client; @@ -97,6 +100,7 @@ public class TestDeleteContainerHandler { @BeforeAll public static void setup() throws Exception { conf = new OzoneConfiguration(); + conf.setBoolean(OZONE_SCM_HA_ENABLE_KEY, false); conf.set(OZONE_SCM_CONTAINER_SIZE, "1GB"); conf.setStorageSize(OZONE_DATANODE_RATIS_VOLUME_FREE_SPACE_MIN, 0, StorageUnit.MB); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconScmHASnapshot.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconScmHASnapshot.java deleted file mode 100644 index 6006ce67580..00000000000 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconScmHASnapshot.java +++ /dev/null @@ -1,65 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.ozone.recon; - -import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.hdds.scm.ScmConfigKeys; -import org.apache.hadoop.ozone.MiniOzoneCluster; - -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Timeout; - -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY; - -/** - * Test Recon SCM HA Snapshot Download implementation. - */ -@Timeout(300) -public class TestReconScmHASnapshot { - private OzoneConfiguration conf; - private MiniOzoneCluster ozoneCluster = null; - - @BeforeEach - public void setup() throws Exception { - conf = new OzoneConfiguration(); - conf.setBoolean(OZONE_SCM_HA_ENABLE_KEY, true); - conf.setBoolean( - ReconServerConfigKeys.OZONE_RECON_SCM_SNAPSHOT_ENABLED, true); - conf.setInt(ReconServerConfigKeys.OZONE_RECON_SCM_CONTAINER_THRESHOLD, 0); - conf.setInt(ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT, 5); - ozoneCluster = MiniOzoneCluster.newBuilder(conf) - .setNumDatanodes(4) - .includeRecon(true) - .build(); - ozoneCluster.waitForClusterToBeReady(); - } - - @Test - public void testScmHASnapshot() throws Exception { - TestReconScmSnapshot.testSnapshot(ozoneCluster); - } - - @AfterEach - public void shutdown() throws Exception { - if (ozoneCluster != null) { - ozoneCluster.shutdown(); - } - } -} diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconScmNonHASnapshot.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconScmNonHASnapshot.java deleted file mode 100644 index ae342e63e8c..00000000000 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconScmNonHASnapshot.java +++ /dev/null @@ -1,64 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.ozone.recon; - -import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.hdds.scm.ScmConfigKeys; -import org.apache.hadoop.ozone.MiniOzoneCluster; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.Timeout; - -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY; - -/** - * Test Recon SCM HA Snapshot Download implementation. - */ -@Timeout(300) -public class TestReconScmNonHASnapshot { - private OzoneConfiguration conf; - private MiniOzoneCluster ozoneCluster = null; - - @BeforeEach - public void setup() throws Exception { - conf = new OzoneConfiguration(); - conf.setBoolean(OZONE_SCM_HA_ENABLE_KEY, false); - conf.setBoolean( - ReconServerConfigKeys.OZONE_RECON_SCM_SNAPSHOT_ENABLED, true); - conf.setInt(ReconServerConfigKeys.OZONE_RECON_SCM_CONTAINER_THRESHOLD, 0); - conf.setInt(ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT, 5); - ozoneCluster = MiniOzoneCluster.newBuilder(conf) - .setNumDatanodes(4) - .includeRecon(true) - .build(); - ozoneCluster.waitForClusterToBeReady(); - } - - @Test - public void testScmNonHASnapshot() throws Exception { - TestReconScmSnapshot.testSnapshot(ozoneCluster); - } - - @AfterEach - public void shutdown() throws Exception { - if (ozoneCluster != null) { - ozoneCluster.shutdown(); - } - } -} diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconTasks.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconTasks.java index 4476cbc3e38..e4b81da0203 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconTasks.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconTasks.java @@ -79,6 +79,7 @@ public void init() throws Exception { cluster = MiniOzoneCluster.newBuilder(conf).setNumDatanodes(1) .includeRecon(true).build(); cluster.waitForClusterToBeReady(); + cluster.waitForPipelineTobeReady(ONE, 30000); GenericTestUtils.setLogLevel(SCMDatanodeHeartbeatDispatcher.LOG, Level.DEBUG); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestDeletedBlocksTxnShell.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestDeletedBlocksTxnShell.java index 730a2479a51..fd27652791b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestDeletedBlocksTxnShell.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestDeletedBlocksTxnShell.java @@ -21,7 +21,6 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State; -import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.block.DeletedBlockLog; import org.apache.hadoop.hdds.scm.cli.ContainerOperationClient; import org.apache.hadoop.hdds.scm.container.ContainerID; @@ -93,7 +92,6 @@ public void init() throws Exception { conf = new OzoneConfiguration(); scmServiceId = "scm-service-test1"; - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); conf.setInt(OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 20); cluster = MiniOzoneCluster.newHABuilder(conf) From 36db01f5f2336f0d8d602ab177f99f8a3493bb94 Mon Sep 17 00:00:00 2001 From: chungen0126 Date: Mon, 6 Jan 2025 00:50:34 +0800 Subject: [PATCH 02/16] unmark TestDecommissionAndMaintenance --- .../hadoop/hdds/scm/node/TestDecommissionAndMaintenance.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/node/TestDecommissionAndMaintenance.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/node/TestDecommissionAndMaintenance.java index 80b3748708e..84b3bf51ece 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/node/TestDecommissionAndMaintenance.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/node/TestDecommissionAndMaintenance.java @@ -94,7 +94,6 @@ * Test from the scmclient for decommission and maintenance. */ @Flaky({"HDDS-6028", "HDDS-6049"}) -@Unhealthy public class TestDecommissionAndMaintenance { private static final Logger LOG = LoggerFactory.getLogger(TestDecommissionAndMaintenance.class); From a282a4f7f85f6c3d28f66fc22ed2b8d47b2bf75b Mon Sep 17 00:00:00 2001 From: chungen0126 Date: Mon, 6 Jan 2025 00:51:01 +0800 Subject: [PATCH 03/16] fix checkstyle --- .../hadoop/hdds/scm/node/TestDecommissionAndMaintenance.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/node/TestDecommissionAndMaintenance.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/node/TestDecommissionAndMaintenance.java index 84b3bf51ece..100ea9394a9 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/node/TestDecommissionAndMaintenance.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/node/TestDecommissionAndMaintenance.java @@ -43,7 +43,6 @@ import org.apache.hadoop.ozone.client.OzoneClient; import org.apache.ozone.test.GenericTestUtils; import org.apache.ozone.test.tag.Flaky; -import org.apache.ozone.test.tag.Unhealthy; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeEach; From 7292bb74da68d888d67792a62af6e43c139f4157 Mon Sep 17 00:00:00 2001 From: chungen0126 Date: Mon, 6 Jan 2025 01:34:26 +0800 Subject: [PATCH 04/16] unmark unhealthy --- .../org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java index d92934cde8d..e4e4a57232f 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java @@ -77,7 +77,6 @@ import org.apache.hadoop.util.Time; import org.apache.ozone.test.GenericTestUtils; import org.apache.hadoop.test.PathUtils; -import org.apache.ozone.test.tag.Unhealthy; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -185,7 +184,6 @@ OzoneConfiguration getConf() { TimeUnit.MILLISECONDS); conf.setBoolean(HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_CREATION, false); conf.setInt(OZONE_SCM_RATIS_PIPELINE_LIMIT, 10); - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); return conf; } @@ -278,7 +276,6 @@ public void testGetLastHeartbeatTimeDiff() throws Exception { * @throws TimeoutException */ @Test - @Unhealthy("HDDS-11986") public void testScmLayoutOnHeartbeat() throws Exception { OzoneConfiguration conf = getConf(); conf.setTimeDuration(ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_INTERVAL, @@ -396,7 +393,6 @@ private void assertPipelineClosedAfterLayoutHeartbeat( * @throws TimeoutException */ @Test - @Unhealthy("HDDS-11986") public void testScmLayoutOnRegister() throws Exception { From d1d346a6aeb8b90e82e6bb65cfa8b9429da23e71 Mon Sep 17 00:00:00 2001 From: chungen0126 Date: Mon, 6 Jan 2025 03:17:28 +0800 Subject: [PATCH 05/16] add tag to unhealtht --- .../apache/hadoop/hdds/scm/TestStorageContainerManager.java | 4 ++-- .../hadoop/hdds/scm/storage/TestContainerCommandsEC.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java index a66d6aba30a..27f0d9d2a51 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java @@ -162,7 +162,7 @@ * Test class that exercises the StorageContainerManager. */ @Timeout(900) -@Unhealthy +@Unhealthy("HDDS-12022") public class TestStorageContainerManager { private static final String LOCALHOST_IP = "127.0.0.1"; private static XceiverClientManager xceiverClientManager; @@ -366,7 +366,6 @@ public void testBlockDeletionTransactions() throws Exception { @Test public void testOldDNRegistersToReInitialisedSCM() throws Exception { - LogManager.getLogger(HeartbeatEndpointTask.class).setLevel(Level.DEBUG); OzoneConfiguration conf = new OzoneConfiguration(); conf.setBoolean(OZONE_SCM_HA_ENABLE_KEY, true); conf.setTimeDuration(HDDS_HEARTBEAT_INTERVAL, 1000, TimeUnit.MILLISECONDS); @@ -403,6 +402,7 @@ public void testOldDNRegistersToReInitialisedSCM() throws Exception { GenericTestUtils.LogCapturer scmDnHBDispatcherLog = GenericTestUtils.LogCapturer.captureLogs( SCMDatanodeHeartbeatDispatcher.LOG); + LogManager.getLogger(HeartbeatEndpointTask.class).setLevel(Level.DEBUG); GenericTestUtils.LogCapturer heartbeatEndpointTaskLog = GenericTestUtils.LogCapturer.captureLogs(HeartbeatEndpointTask.LOG); GenericTestUtils.LogCapturer versionEndPointTaskLog = diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java index e213af3db16..cbb5a31f35c 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java @@ -126,7 +126,7 @@ /** * This class tests container commands on EC containers. */ -@Unhealthy +@Unhealthy("HDDS-12023") public class TestContainerCommandsEC { private static final String ANY_USER = "any"; From fd8933f40dd295ae9a6e4d8516aa8f98243d9fe8 Mon Sep 17 00:00:00 2001 From: chungen0126 Date: Mon, 6 Jan 2025 04:33:17 +0800 Subject: [PATCH 06/16] fix TestSCMInstallSnapshot --- .../org/apache/hadoop/hdds/scm/TestSCMInstallSnapshot.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestSCMInstallSnapshot.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestSCMInstallSnapshot.java index 5027f7fc37b..fcfc5ab4be4 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestSCMInstallSnapshot.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestSCMInstallSnapshot.java @@ -66,6 +66,7 @@ public class TestSCMInstallSnapshot { @BeforeAll static void setup(@TempDir Path tempDir) throws Exception { conf = new OzoneConfiguration(); + conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY,true); conf.set(ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_INTERVAL, "10s"); conf.setLong(ScmConfigKeys.OZONE_SCM_HA_RATIS_SNAPSHOT_THRESHOLD, 1L); conf.set(ScmConfigKeys.OZONE_SCM_HA_RATIS_SNAPSHOT_DIR, tempDir.toString()); @@ -104,7 +105,7 @@ private DBCheckpoint downloadSnapshot() throws Exception { pipelineManager.openPipeline(ratisPipeline2.getId()); SCMNodeDetails scmNodeDetails = new SCMNodeDetails.Builder() .setRpcAddress(new InetSocketAddress("0.0.0.0", 0)) - .setGrpcPort(ScmConfigKeys.OZONE_SCM_GRPC_PORT_DEFAULT) + .setGrpcPort(conf.getInt(ScmConfigKeys.OZONE_SCM_GRPC_PORT_KEY, ScmConfigKeys.OZONE_SCM_GRPC_PORT_DEFAULT)) .setSCMNodeId("scm1") .build(); Map peerMap = new HashMap<>(); From e24228e8ab4738674be7943e6f63b594e907e4f6 Mon Sep 17 00:00:00 2001 From: chungen0126 Date: Mon, 6 Jan 2025 05:27:33 +0800 Subject: [PATCH 07/16] fix checkstyle --- .../java/org/apache/hadoop/hdds/scm/TestSCMInstallSnapshot.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestSCMInstallSnapshot.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestSCMInstallSnapshot.java index fcfc5ab4be4..ffdc49fd099 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestSCMInstallSnapshot.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestSCMInstallSnapshot.java @@ -66,7 +66,6 @@ public class TestSCMInstallSnapshot { @BeforeAll static void setup(@TempDir Path tempDir) throws Exception { conf = new OzoneConfiguration(); - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY,true); conf.set(ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_INTERVAL, "10s"); conf.setLong(ScmConfigKeys.OZONE_SCM_HA_RATIS_SNAPSHOT_THRESHOLD, 1L); conf.set(ScmConfigKeys.OZONE_SCM_HA_RATIS_SNAPSHOT_DIR, tempDir.toString()); From b1f6bf0c15c78b9057e341e4c40ca93410994478 Mon Sep 17 00:00:00 2001 From: chungen0126 Date: Tue, 7 Jan 2025 00:59:34 +0800 Subject: [PATCH 08/16] mark TestStorageContainerManager --- .../hdds/scm/TestStorageContainerManager.java | 51 +++++++++++++++++-- 1 file changed, 48 insertions(+), 3 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java index 27f0d9d2a51..9066030a5b5 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java @@ -92,6 +92,7 @@ import org.apache.hadoop.ozone.protocol.commands.DeleteBlocksCommand; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.authentication.client.AuthenticationException; import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.util.Time; import org.apache.log4j.Level; @@ -141,14 +142,16 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_CREATION; import static org.apache.hadoop.hdds.scm.HddsTestUtils.mockRemoteUser; import static org.apache.hadoop.hdds.scm.HddsWhiteboxTestUtils.setInternalState; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; +import static org.apache.ozone.test.GenericTestUtils.PortAllocator.getFreePort; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertInstanceOf; import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.any; @@ -367,7 +370,6 @@ public void testBlockDeletionTransactions() throws Exception { @Test public void testOldDNRegistersToReInitialisedSCM() throws Exception { OzoneConfiguration conf = new OzoneConfiguration(); - conf.setBoolean(OZONE_SCM_HA_ENABLE_KEY, true); conf.setTimeDuration(HDDS_HEARTBEAT_INTERVAL, 1000, TimeUnit.MILLISECONDS); conf.setTimeDuration(ScmConfigKeys.OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, 3000, TimeUnit.MILLISECONDS); @@ -559,7 +561,6 @@ private Map> createDeleteTXLog( @Test public void testSCMInitialization(@TempDir Path tempDir) throws Exception { OzoneConfiguration conf = new OzoneConfiguration(); - conf.set(ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_INTERVAL, "10s"); Path scmPath = tempDir.resolve("scm-meta"); conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, scmPath.toString()); @@ -575,6 +576,21 @@ public void testSCMInitialization(@TempDir Path tempDir) throws Exception { assertEquals(NodeType.SCM, scmStore.getNodeType()); assertEquals(testClusterId, scmStore.getClusterID()); assertTrue(scmStore.isSCMHAEnabled()); + } + + @Test + public void testSCMInitializationWithHAEnabled(@TempDir Path tempDir) throws Exception { + OzoneConfiguration conf = new OzoneConfiguration(); + conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); + conf.set(ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_INTERVAL, "10s"); + Path scmPath = tempDir.resolve("scm-meta"); + conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, scmPath.toString()); + + final UUID clusterId = UUID.randomUUID(); + // This will initialize SCM + StorageContainerManager.scmInit(conf, clusterId.toString()); + SCMStorageConfig scmStore = new SCMStorageConfig(conf); + assertTrue(scmStore.isSCMHAEnabled()); validateRatisGroupExists(conf, clusterId.toString()); } @@ -939,6 +955,35 @@ public void testIncrementalContainerReportQueue() throws Exception { containerReportExecutors.close(); } + @Test + public void testNonRatisToRatis() + throws IOException, AuthenticationException, InterruptedException, + TimeoutException { + final OzoneConfiguration conf = new OzoneConfiguration(); + try (MiniOzoneCluster cluster = MiniOzoneCluster.newBuilder(conf) + .setNumDatanodes(3) + .build()) { + final StorageContainerManager nonRatisSCM = cluster + .getStorageContainerManager(); + assertNull(nonRatisSCM.getScmHAManager().getRatisServer()); + assertFalse(nonRatisSCM.getScmStorageConfig().isSCMHAEnabled()); + nonRatisSCM.stop(); + nonRatisSCM.join(); + + DefaultConfigManager.clearDefaultConfigs(); + conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); + StorageContainerManager.scmInit(conf, cluster.getClusterId()); + conf.setInt(ScmConfigKeys.OZONE_SCM_DATANODE_PORT_KEY, getFreePort()); + conf.unset(ScmConfigKeys.OZONE_SCM_DATANODE_ADDRESS_KEY); + cluster.restartStorageContainerManager(false); + + final StorageContainerManager ratisSCM = cluster + .getStorageContainerManager(); + assertNotNull(ratisSCM.getScmHAManager().getRatisServer()); + assertTrue(ratisSCM.getScmStorageConfig().isSCMHAEnabled()); + } + } + private void addTransactions(StorageContainerManager scm, DeletedBlockLog delLog, Map> containerBlocksMap) From 365980fa89aecdeabe3139dda2d89c67c9731453 Mon Sep 17 00:00:00 2001 From: Chung En Lee Date: Tue, 7 Jan 2025 15:15:26 +0800 Subject: [PATCH 09/16] HDDS-11989. Enable SCM Ratis in tests related to DeletedBlockLog (#7615) (cherry picked from commit 8a774a57df907c1e5c6c274054cfde21f914a33b) --- .../hdds/scm/TestStorageContainerManager.java | 12 +---- .../apache/hadoop/ozone/OzoneTestUtils.java | 33 ++++++++++++ .../rpc/TestDeleteWithInAdequateDN.java | 6 +-- .../commandhandler/TestBlockDeletion.java | 50 +++++++++---------- .../TestDeleteContainerHandler.java | 10 ++-- 5 files changed, 65 insertions(+), 46 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java index 9066030a5b5..8bf353acd04 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java @@ -323,17 +323,7 @@ public void testBlockDeletionTransactions() throws Exception { // after sometime, all the TX should be proceed and by then // the number of containerBlocks of all known containers will be // empty again. - GenericTestUtils.waitFor(() -> { - try { - if (SCMHAUtils.isSCMHAEnabled(cluster.getConf())) { - cluster.getStorageContainerManager().getScmHAManager() - .asSCMHADBTransactionBuffer().flush(); - } - return delLog.getNumOfValidTransactions() == 0; - } catch (IOException e) { - return false; - } - }, 1000, 22000); + OzoneTestUtils.waitBlockDeleted(cluster.getStorageContainerManager()); assertTrue(verifyBlocksWithTxnTable(cluster, conf, containerBlocks)); // Continue the work, add some TXs that with known container names, // but unknown block IDs. diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/OzoneTestUtils.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/OzoneTestUtils.java index 884e435d25e..0a5f7114c40 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/OzoneTestUtils.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/OzoneTestUtils.java @@ -161,4 +161,37 @@ public static void closeContainer(StorageContainerManager scm, container.getState() == HddsProtos.LifeCycleState.CLOSED, 200, 30000); } + + /** + * Flush deleted block log & wait till something was flushed. + */ + public static void flushAndWaitForDeletedBlockLog(StorageContainerManager scm) + throws InterruptedException, TimeoutException { + GenericTestUtils.waitFor(() -> { + try { + scm.getScmHAManager().asSCMHADBTransactionBuffer().flush(); + if (scm.getScmBlockManager().getDeletedBlockLog().getNumOfValidTransactions() > 0) { + return true; + } + } catch (IOException e) { + } + return false; + }, 100, 3000); + } + + /** + * Wait till all blocks are removed. + */ + public static void waitBlockDeleted(StorageContainerManager scm) + throws InterruptedException, TimeoutException { + GenericTestUtils.waitFor(() -> { + try { + if (scm.getScmBlockManager().getDeletedBlockLog().getNumOfValidTransactions() == 0) { + return true; + } + } catch (IOException e) { + } + return false; + }, 1000, 60000); + } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestDeleteWithInAdequateDN.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestDeleteWithInAdequateDN.java index 72b1bff9371..bc7bb36a242 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestDeleteWithInAdequateDN.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestDeleteWithInAdequateDN.java @@ -42,6 +42,7 @@ import org.apache.hadoop.ozone.HddsDatanodeService; import org.apache.hadoop.ozone.MiniOzoneCluster; import org.apache.hadoop.ozone.OzoneConfigKeys; +import org.apache.hadoop.ozone.OzoneTestUtils; import org.apache.hadoop.ozone.RatisTestHelper; import org.apache.hadoop.ozone.client.ObjectStore; import org.apache.hadoop.ozone.client.OzoneClient; @@ -65,7 +66,6 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_INTERVAL; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_DESTROY_TIMEOUT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; @@ -75,7 +75,6 @@ import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertThrows; -import org.apache.ozone.test.tag.Unhealthy; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assumptions; import org.junit.jupiter.api.BeforeAll; @@ -84,7 +83,6 @@ /** * Tests delete key operation with inadequate datanodes. */ -@Unhealthy public class TestDeleteWithInAdequateDN { private static MiniOzoneCluster cluster; @@ -107,7 +105,6 @@ public static void init() throws Exception { conf = new OzoneConfiguration(); - conf.setBoolean(OZONE_SCM_HA_ENABLE_KEY, false); conf.setTimeDuration(HDDS_HEARTBEAT_INTERVAL, 100, TimeUnit.MILLISECONDS); conf.setTimeDuration(HDDS_CONTAINER_REPORT_INTERVAL, 200, @@ -286,6 +283,7 @@ void testDeleteKeyWithInAdequateDN() throws Exception { //cluster.getOzoneManager().deleteKey(keyArgs); client.getObjectStore().getVolume(volumeName).getBucket(bucketName). deleteKey("ratis"); + OzoneTestUtils.flushAndWaitForDeletedBlockLog(cluster.getStorageContainerManager()); // make sure the chunk was never deleted on the leader even though // deleteBlock handler is invoked diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java index c6d7b4ad983..df5f3ec0d27 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java @@ -47,7 +47,6 @@ import org.apache.hadoop.hdds.scm.block.ScmBlockDeletingServiceMetrics; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerInfo; -import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.container.ContainerStateManager; import org.apache.hadoop.hdds.scm.container.replication.ReplicationManager; @@ -80,7 +79,6 @@ import org.apache.ozone.test.GenericTestUtils; import org.apache.ozone.test.GenericTestUtils.LogCapturer; import org.apache.ozone.test.tag.Flaky; -import org.apache.ozone.test.tag.Unhealthy; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -96,7 +94,6 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_EXPIRED_CONTAINER_REPLICA_OP_SCRUB_INTERVAL; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; @@ -112,7 +109,6 @@ /** * Tests for Block deletion. */ -@Unhealthy public class TestBlockDeletion { public static final Logger LOG = @@ -136,7 +132,6 @@ public void init() throws Exception { GenericTestUtils.setLogLevel(SCMBlockDeletingService.LOG, Level.DEBUG); GenericTestUtils.setLogLevel(ReplicationManager.LOG, Level.DEBUG); - conf.setBoolean(OZONE_SCM_HA_ENABLE_KEY, false); conf.set("ozone.replication.allowed-configs", "^(RATIS/THREE)|(EC/2-1-256k)$"); conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 100, @@ -243,6 +238,7 @@ public void testBlockDeletion(ReplicationConfig repConfig) throws Exception { // verify key blocks were created in DN. GenericTestUtils.waitFor(() -> { try { + scm.getScmHAManager().asSCMHADBTransactionBuffer().flush(); verifyBlocksCreated(omKeyLocationInfoGroupList); return true; } catch (Throwable t) { @@ -287,6 +283,7 @@ public void testBlockDeletion(ReplicationConfig repConfig) throws Exception { // The blocks should be deleted in the DN. GenericTestUtils.waitFor(() -> { try { + scm.getScmHAManager().asSCMHADBTransactionBuffer().flush(); verifyBlocksDeleted(omKeyLocationInfoGroupList); return true; } catch (Throwable t) { @@ -303,6 +300,7 @@ public void testBlockDeletion(ReplicationConfig repConfig) throws Exception { // Verify transactions committed GenericTestUtils.waitFor(() -> { try { + scm.getScmHAManager().asSCMHADBTransactionBuffer().flush(); verifyTransactionsCommitted(); return true; } catch (Throwable t) { @@ -384,10 +382,16 @@ public void testContainerStatisticsAfterDelete() throws Exception { writeClient.deleteKey(keyArgs); // Wait for blocks to be deleted and container reports to be processed - GenericTestUtils.waitFor(() -> - scm.getContainerManager().getContainers().stream() - .allMatch(c -> c.getUsedBytes() == 0 && - c.getNumberOfKeys() == 0), 500, 20000); + GenericTestUtils.waitFor(() -> { + try { + scm.getScmHAManager().asSCMHADBTransactionBuffer().flush(); + } catch (IOException e) { + throw new RuntimeException(e); + } + return scm.getContainerManager().getContainers().stream() + .allMatch(c -> c.getUsedBytes() == 0 && + c.getNumberOfKeys() == 0); + }, 500, 20000); Thread.sleep(5000); // Verify that pending block delete num are as expected with resent cmds cluster.getHddsDatanodes().forEach(dn -> { @@ -429,6 +433,7 @@ public void testContainerStatisticsAfterDelete() throws Exception { assertEquals(HddsProtos.LifeCycleState.DELETED, container.getState()); try { + scm.getScmHAManager().asSCMHADBTransactionBuffer().flush(); assertEquals(HddsProtos.LifeCycleState.DELETED, scm.getScmMetadataStore().getContainerTable() .get(container.containerID()).getState()); @@ -520,14 +525,14 @@ public void testContainerStateAfterDNRestart() throws Exception { GenericTestUtils.waitFor(() -> { try { + scm.getScmHAManager().asSCMHADBTransactionBuffer().flush(); return scm.getContainerManager().getContainerReplicas( containerId).stream(). allMatch(replica -> replica.isEmpty()); - } catch (ContainerNotFoundException e) { + } catch (IOException e) { throw new RuntimeException(e); } - }, - 100, 10 * 1000); + }, 100, 10 * 1000); // Container state should be empty now as key got deleted assertTrue(getContainerFromDN( @@ -550,6 +555,7 @@ public void testContainerStateAfterDNRestart() throws Exception { assertEquals(HddsProtos.LifeCycleState.DELETED, container.getState()); try { + scm.getScmHAManager().asSCMHADBTransactionBuffer().flush(); assertEquals(HddsProtos.LifeCycleState.DELETED, scm.getScmMetadataStore().getContainerTable() .get(container.containerID()).getState()); @@ -564,7 +570,6 @@ public void testContainerStateAfterDNRestart() throws Exception { } return true; }, 500, 30000); - LOG.info(metrics.toString()); } /** @@ -650,14 +655,14 @@ public void testContainerDeleteWithInvalidKeyCount() // Ensure isEmpty are true for all replica after delete key GenericTestUtils.waitFor(() -> { try { + scm.getScmHAManager().asSCMHADBTransactionBuffer().flush(); return scm.getContainerManager().getContainerReplicas( containerId).stream() .allMatch(replica -> replica.isEmpty()); - } catch (ContainerNotFoundException e) { + } catch (IOException e) { throw new RuntimeException(e); } - }, - 500, 5 * 2000); + }, 500, 5 * 2000); // Update container replica by making invalid keyCount in one replica ContainerReplica replicaOne = ContainerReplica.newBuilder() @@ -687,6 +692,7 @@ public void testContainerDeleteWithInvalidKeyCount() assertEquals(HddsProtos.LifeCycleState.DELETED, container.getState()); try { + scm.getScmHAManager().asSCMHADBTransactionBuffer().flush(); assertEquals(HddsProtos.LifeCycleState.DELETED, scm.getScmMetadataStore().getContainerTable() .get(container.containerID()).getState()); @@ -816,17 +822,7 @@ public void testBlockDeleteCommandParallelProcess() throws Exception { } // Wait for block delete command sent from OM - GenericTestUtils.waitFor(() -> { - try { - if (scm.getScmBlockManager().getDeletedBlockLog() - .getNumOfValidTransactions() > 0) { - return true; - } - } catch (IOException e) { - } - return false; - }, 100, 5000); - + OzoneTestUtils.flushAndWaitForDeletedBlockLog(scm); long start = System.currentTimeMillis(); // Wait for all blocks been deleted. GenericTestUtils.waitFor(() -> { diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java index dde17404ae1..0006feb858a 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java @@ -56,7 +56,6 @@ import org.apache.hadoop.ozone.protocol.commands.DeleteContainerCommand; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; import org.apache.ozone.test.GenericTestUtils; -import org.apache.ozone.test.tag.Unhealthy; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -76,7 +75,6 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.ONE; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_RATIS_VOLUME_FREE_SPACE_MIN; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -87,7 +85,6 @@ * Tests DeleteContainerCommand Handler. */ @Timeout(300) -@Unhealthy public class TestDeleteContainerHandler { private static OzoneClient client; @@ -100,7 +97,6 @@ public class TestDeleteContainerHandler { @BeforeAll public static void setup() throws Exception { conf = new OzoneConfiguration(); - conf.setBoolean(OZONE_SCM_HA_ENABLE_KEY, false); conf.set(OZONE_SCM_CONTAINER_SIZE, "1GB"); conf.setStorageSize(OZONE_DATANODE_RATIS_VOLUME_FREE_SPACE_MIN, 0, StorageUnit.MB); @@ -200,6 +196,8 @@ public void testDeleteNonEmptyContainerOnDirEmptyCheckTrue() // Delete key, which will make isEmpty flag to true in containerData objectStore.getVolume(volumeName) .getBucket(bucketName).deleteKey(keyName); + OzoneTestUtils.flushAndWaitForDeletedBlockLog(cluster.getStorageContainerManager()); + OzoneTestUtils.waitBlockDeleted(cluster.getStorageContainerManager()); // Ensure isEmpty flag is true when key is deleted and container is empty GenericTestUtils.waitFor(() -> getContainerfromDN( @@ -317,6 +315,8 @@ public void testDeleteNonEmptyContainerOnDirEmptyCheckFalse() // Delete key, which will make isEmpty flag to true in containerData objectStore.getVolume(volumeName) .getBucket(bucketName).deleteKey(keyName); + OzoneTestUtils.flushAndWaitForDeletedBlockLog(cluster.getStorageContainerManager()); + OzoneTestUtils.waitBlockDeleted(cluster.getStorageContainerManager()); // Ensure isEmpty flag is true when key is deleted and container is empty GenericTestUtils.waitFor(() -> getContainerfromDN( @@ -656,6 +656,8 @@ public void testDeleteContainerRequestHandlerOnClosedContainer() // Delete key, which will make isEmpty flag to true in containerData objectStore.getVolume(volumeName) .getBucket(bucketName).deleteKey(keyName); + OzoneTestUtils.flushAndWaitForDeletedBlockLog(cluster.getStorageContainerManager()); + OzoneTestUtils.waitBlockDeleted(cluster.getStorageContainerManager()); // Ensure isEmpty flag is true when key is deleted GenericTestUtils.waitFor(() -> getContainerfromDN( From 2940f1ea4388741ee14ae3d0f8066653ac8bbbde Mon Sep 17 00:00:00 2001 From: Chung En Lee Date: Tue, 7 Jan 2025 16:14:24 +0800 Subject: [PATCH 10/16] HDDS-12023. Enable SCM Ratis in TestContainerCommandsEC (#7650) (cherry picked from commit 44ba9a3f5d689d003cc8770ad62815d04d2596a2) --- .../hadoop/hdds/scm/storage/TestContainerCommandsEC.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java index cbb5a31f35c..0cc14631f0d 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java @@ -70,6 +70,7 @@ import org.apache.hadoop.ozone.common.utils.BufferUtils; import org.apache.hadoop.ozone.container.ContainerTestHelper; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; +import org.apache.hadoop.ozone.container.common.statemachine.StateContext; import org.apache.hadoop.ozone.container.ec.reconstruction.ECContainerOperationClient; import org.apache.hadoop.ozone.container.ec.reconstruction.ECReconstructionCoordinator; import org.apache.hadoop.ozone.container.ec.reconstruction.ECReconstructionMetrics; @@ -172,6 +173,7 @@ public class TestContainerCommandsEC { @BeforeAll public static void init() throws Exception { config = new OzoneConfiguration(); + config.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); config.setInt(ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT, 1); config.setTimeDuration(ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL, 3, TimeUnit.SECONDS); config.setBoolean(OzoneConfigKeys.OZONE_ACL_ENABLED, true); @@ -322,8 +324,10 @@ public void testOrphanBlock() throws Exception { .setTxID(1L) .setCount(10) .build())); - dn2Service.getDatanodeStateMachine().getContext() - .addCommand(deleteBlocksCommand); + StateContext context = dn2Service.getDatanodeStateMachine().getContext(); + deleteBlocksCommand.setTerm(context.getTermOfLeaderSCM().isPresent() ? + context.getTermOfLeaderSCM().getAsLong() : 0); + context.addCommand(deleteBlocksCommand); try (XceiverClientGrpc client = new XceiverClientGrpc( createSingleNodePipeline(orphanPipeline, dn2, 1), cluster.getConf())) { From 5da0c9caa2648b33d2ed91088d92df2a570c990e Mon Sep 17 00:00:00 2001 From: Chung En Lee Date: Tue, 7 Jan 2025 17:26:12 +0800 Subject: [PATCH 11/16] HDDS-12022. Enable SCM Ratis in TestStorageContainerManager (#7651) (cherry picked from commit e8d96f422efe094b9191dc2d65459a29e8a8faac) --- .../hdds/scm/TestStorageContainerManager.java | 71 ++++++------------- 1 file changed, 22 insertions(+), 49 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java index 8bf353acd04..db643157fa8 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java @@ -45,6 +45,7 @@ import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.ha.RatisUtil; import org.apache.hadoop.hdds.scm.ha.SCMContext; +import org.apache.hadoop.hdds.scm.ha.SCMHANodeDetails; import org.apache.hadoop.hdds.scm.ha.SCMHAUtils; import org.apache.hadoop.hdds.scm.ha.SCMRatisServerImpl; import org.apache.hadoop.hdds.scm.node.DatanodeInfo; @@ -92,7 +93,6 @@ import org.apache.hadoop.ozone.protocol.commands.DeleteBlocksCommand; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.security.authentication.client.AuthenticationException; import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.util.Time; import org.apache.log4j.Level; @@ -143,15 +143,12 @@ import static org.apache.hadoop.hdds.scm.HddsTestUtils.mockRemoteUser; import static org.apache.hadoop.hdds.scm.HddsWhiteboxTestUtils.setInternalState; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; -import static org.apache.ozone.test.GenericTestUtils.PortAllocator.getFreePort; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertInstanceOf; import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.any; @@ -193,11 +190,13 @@ public void cleanupDefaults() { public void testRpcPermission() throws Exception { // Test with default configuration OzoneConfiguration defaultConf = new OzoneConfiguration(); + defaultConf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); testRpcPermissionWithConf(defaultConf, any -> false, "unknownUser"); // Test with ozone.administrators defined in configuration String admins = "adminUser1, adminUser2"; OzoneConfiguration ozoneConf = new OzoneConfiguration(); + ozoneConf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); ozoneConf.setStrings(OzoneConfigKeys.OZONE_ADMINISTRATORS, admins); // Non-admin user will get permission denied. // Admin user will pass the permission check. @@ -269,6 +268,7 @@ private void verifyPermissionDeniedException(Exception e, String userName) { public void testBlockDeletionTransactions() throws Exception { int numKeys = 5; OzoneConfiguration conf = new OzoneConfiguration(); + conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 100, TimeUnit.MILLISECONDS); DatanodeConfiguration datanodeConfiguration = conf.getObject( @@ -360,6 +360,7 @@ public void testBlockDeletionTransactions() throws Exception { @Test public void testOldDNRegistersToReInitialisedSCM() throws Exception { OzoneConfiguration conf = new OzoneConfiguration(); + conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); conf.setTimeDuration(HDDS_HEARTBEAT_INTERVAL, 1000, TimeUnit.MILLISECONDS); conf.setTimeDuration(ScmConfigKeys.OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, 3000, TimeUnit.MILLISECONDS); @@ -371,10 +372,13 @@ public void testOldDNRegistersToReInitialisedSCM() throws Exception { cluster.waitForClusterToBeReady(); HddsDatanodeService datanode = cluster.getHddsDatanodes().get(0); StorageContainerManager scm = cluster.getStorageContainerManager(); + File dbDir = scm.getScmMetadataStore().getStore().getDbLocation(); scm.stop(); // re-initialise SCM with new clusterID + GenericTestUtils.deleteDirectory(new File(SCMHAUtils.getRatisStorageDir(conf))); + GenericTestUtils.deleteDirectory(dbDir); GenericTestUtils.deleteDirectory( new File(scm.getScmStorageConfig().getStorageDir())); String newClusterId = UUID.randomUUID().toString(); @@ -415,7 +419,7 @@ public void testOldDNRegistersToReInitialisedSCM() throws Exception { datanode.getDatanodeDetails()); GenericTestUtils.waitFor( () -> scmDnHBDispatcherLog.getOutput().contains(expectedLog), 100, - 5000); + 30000); ExitUtil.disableSystemExit(); // As part of processing response for re-register, DN EndpointStateMachine // goes to GET-VERSION state which checks if there is already existing @@ -434,6 +438,7 @@ public void testOldDNRegistersToReInitialisedSCM() throws Exception { assertThat(versionEndPointTaskLog.getOutput()).contains( "org.apache.hadoop.ozone.common" + ".InconsistentStorageStateException: Mismatched ClusterIDs"); + scm.stop(); } } @@ -441,6 +446,7 @@ public void testOldDNRegistersToReInitialisedSCM() throws Exception { public void testBlockDeletingThrottling() throws Exception { int numKeys = 15; OzoneConfiguration conf = new OzoneConfiguration(); + conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); conf.setTimeDuration(HDDS_CONTAINER_REPORT_INTERVAL, 1, TimeUnit.SECONDS); conf.setInt(ScmConfigKeys.OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 5); conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, @@ -551,6 +557,7 @@ private Map> createDeleteTXLog( @Test public void testSCMInitialization(@TempDir Path tempDir) throws Exception { OzoneConfiguration conf = new OzoneConfiguration(); + conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); Path scmPath = tempDir.resolve("scm-meta"); conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, scmPath.toString()); @@ -566,27 +573,13 @@ public void testSCMInitialization(@TempDir Path tempDir) throws Exception { assertEquals(NodeType.SCM, scmStore.getNodeType()); assertEquals(testClusterId, scmStore.getClusterID()); assertTrue(scmStore.isSCMHAEnabled()); - } - - @Test - public void testSCMInitializationWithHAEnabled(@TempDir Path tempDir) throws Exception { - OzoneConfiguration conf = new OzoneConfiguration(); - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); - conf.set(ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_INTERVAL, "10s"); - Path scmPath = tempDir.resolve("scm-meta"); - conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, scmPath.toString()); - - final UUID clusterId = UUID.randomUUID(); - // This will initialize SCM - StorageContainerManager.scmInit(conf, clusterId.toString()); - SCMStorageConfig scmStore = new SCMStorageConfig(conf); - assertTrue(scmStore.isSCMHAEnabled()); validateRatisGroupExists(conf, clusterId.toString()); } @Test public void testSCMReinitialization(@TempDir Path tempDir) throws Exception { OzoneConfiguration conf = new OzoneConfiguration(); + conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); Path scmPath = tempDir.resolve("scm-meta"); conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, scmPath.toString()); //This will set the cluster id in the version file @@ -648,6 +641,7 @@ public static void validateRatisGroupExists(OzoneConfiguration conf, @Test void testSCMInitializationFailure(@TempDir Path tempDir) { OzoneConfiguration conf = new OzoneConfiguration(); + conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); Path scmPath = tempDir.resolve("scm-meta"); conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, scmPath.toString()); @@ -660,15 +654,21 @@ public void testScmInfo(@TempDir Path tempDir) throws Exception { OzoneConfiguration conf = new OzoneConfiguration(); Path scmPath = tempDir.resolve("scm-meta"); + conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, scmPath.toString()); SCMStorageConfig scmStore = new SCMStorageConfig(conf); String clusterId = UUID.randomUUID().toString(); String scmId = UUID.randomUUID().toString(); scmStore.setClusterId(clusterId); scmStore.setScmId(scmId); + scmStore.setSCMHAFlag(true); // writes the version file properties scmStore.initialize(); + SCMRatisServerImpl.initialize(clusterId, scmId, + SCMHANodeDetails.loadSCMHAConfig(conf, scmStore) + .getLocalNodeDetails(), conf); StorageContainerManager scm = HddsTestUtils.getScmSimple(conf); + scm.start(); //Reads the SCM Info from SCM instance ScmInfo scmInfo = scm.getClientProtocolServer().getScmInfo(); assertEquals(clusterId, scmInfo.getClusterId()); @@ -686,6 +686,7 @@ public void testScmInfo(@TempDir Path tempDir) throws Exception { public void testScmProcessDatanodeHeartbeat() throws Exception { String rackName = "/rack1"; OzoneConfiguration conf = new OzoneConfiguration(); + conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); conf.setClass(NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, StaticMapping.class, DNSToSwitchMapping.class); StaticMapping.addNodeToRack(NetUtils.normalizeHostName(HddsUtils.getHostName(conf)), @@ -728,6 +729,7 @@ public void testScmProcessDatanodeHeartbeat() throws Exception { public void testCloseContainerCommandOnRestart() throws Exception { int numKeys = 15; OzoneConfiguration conf = new OzoneConfiguration(); + conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); conf.setTimeDuration(HDDS_CONTAINER_REPORT_INTERVAL, 1, TimeUnit.SECONDS); conf.setInt(ScmConfigKeys.OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 5); conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, @@ -945,35 +947,6 @@ public void testIncrementalContainerReportQueue() throws Exception { containerReportExecutors.close(); } - @Test - public void testNonRatisToRatis() - throws IOException, AuthenticationException, InterruptedException, - TimeoutException { - final OzoneConfiguration conf = new OzoneConfiguration(); - try (MiniOzoneCluster cluster = MiniOzoneCluster.newBuilder(conf) - .setNumDatanodes(3) - .build()) { - final StorageContainerManager nonRatisSCM = cluster - .getStorageContainerManager(); - assertNull(nonRatisSCM.getScmHAManager().getRatisServer()); - assertFalse(nonRatisSCM.getScmStorageConfig().isSCMHAEnabled()); - nonRatisSCM.stop(); - nonRatisSCM.join(); - - DefaultConfigManager.clearDefaultConfigs(); - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); - StorageContainerManager.scmInit(conf, cluster.getClusterId()); - conf.setInt(ScmConfigKeys.OZONE_SCM_DATANODE_PORT_KEY, getFreePort()); - conf.unset(ScmConfigKeys.OZONE_SCM_DATANODE_ADDRESS_KEY); - cluster.restartStorageContainerManager(false); - - final StorageContainerManager ratisSCM = cluster - .getStorageContainerManager(); - assertNotNull(ratisSCM.getScmHAManager().getRatisServer()); - assertTrue(ratisSCM.getScmStorageConfig().isSCMHAEnabled()); - } - } - private void addTransactions(StorageContainerManager scm, DeletedBlockLog delLog, Map> containerBlocksMap) From caaa0a7eae39d6b2cbc03f24a08a48b03ddbf3af Mon Sep 17 00:00:00 2001 From: chungen0126 Date: Tue, 7 Jan 2025 18:13:20 +0800 Subject: [PATCH 12/16] remove all OZONE_SCM_HA_ENABLE_KEY from tests --- .../hdds/scm/TestStorageContainerManager.java | 106 +++++++++--------- .../scm/storage/TestContainerCommandsEC.java | 3 - .../hadoop/ozone/TestSecureOzoneCluster.java | 39 ++----- 3 files changed, 64 insertions(+), 84 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java index db643157fa8..de512568a12 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java @@ -98,7 +98,6 @@ import org.apache.log4j.Level; import org.apache.log4j.LogManager; import org.apache.ozone.test.GenericTestUtils; -import org.apache.ozone.test.tag.Unhealthy; import org.apache.ratis.conf.RaftProperties; import org.apache.ratis.protocol.RaftGroupId; import org.apache.ratis.server.RaftServerConfigKeys; @@ -162,7 +161,6 @@ * Test class that exercises the StorageContainerManager. */ @Timeout(900) -@Unhealthy("HDDS-12022") public class TestStorageContainerManager { private static final String LOCALHOST_IP = "127.0.0.1"; private static XceiverClientManager xceiverClientManager; @@ -190,13 +188,11 @@ public void cleanupDefaults() { public void testRpcPermission() throws Exception { // Test with default configuration OzoneConfiguration defaultConf = new OzoneConfiguration(); - defaultConf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); testRpcPermissionWithConf(defaultConf, any -> false, "unknownUser"); // Test with ozone.administrators defined in configuration String admins = "adminUser1, adminUser2"; OzoneConfiguration ozoneConf = new OzoneConfiguration(); - ozoneConf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); ozoneConf.setStrings(OzoneConfigKeys.OZONE_ADMINISTRATORS, admins); // Non-admin user will get permission denied. // Admin user will pass the permission check. @@ -268,7 +264,6 @@ private void verifyPermissionDeniedException(Exception e, String userName) { public void testBlockDeletionTransactions() throws Exception { int numKeys = 5; OzoneConfiguration conf = new OzoneConfiguration(); - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 100, TimeUnit.MILLISECONDS); DatanodeConfiguration datanodeConfiguration = conf.getObject( @@ -360,7 +355,6 @@ public void testBlockDeletionTransactions() throws Exception { @Test public void testOldDNRegistersToReInitialisedSCM() throws Exception { OzoneConfiguration conf = new OzoneConfiguration(); - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); conf.setTimeDuration(HDDS_HEARTBEAT_INTERVAL, 1000, TimeUnit.MILLISECONDS); conf.setTimeDuration(ScmConfigKeys.OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, 3000, TimeUnit.MILLISECONDS); @@ -407,38 +401,43 @@ public void testOldDNRegistersToReInitialisedSCM() throws Exception { assertThat(scmDnHBDispatcherLog.getOutput()).isEmpty(); assertThat(versionEndPointTaskLog.getOutput()).isEmpty(); // start the new SCM - scm.start(); - // Initially DatanodeStateMachine will be in Running state - assertEquals(DatanodeStateMachine.DatanodeStates.RUNNING, - dsm.getContext().getState()); - // DN heartbeats to new SCM, SCM doesn't recognize the node, sends the - // command to DN to re-register. Wait for SCM to send re-register command - String expectedLog = String.format( - "SCM received heartbeat from an unregistered datanode %s. " - + "Asking datanode to re-register.", - datanode.getDatanodeDetails()); - GenericTestUtils.waitFor( - () -> scmDnHBDispatcherLog.getOutput().contains(expectedLog), 100, - 30000); - ExitUtil.disableSystemExit(); - // As part of processing response for re-register, DN EndpointStateMachine - // goes to GET-VERSION state which checks if there is already existing - // version file on the DN & if the clusterID matches with that of the SCM - // In this case, it won't match and gets InconsistentStorageStateException - // and DN shuts down. - String expectedLog2 = "Received SCM notification to register." - + " Interrupt HEARTBEAT and transit to GETVERSION state."; - GenericTestUtils.waitFor( - () -> heartbeatEndpointTaskLog.getOutput().contains(expectedLog2), - 100, 5000); - GenericTestUtils.waitFor(() -> dsm.getContext().getShutdownOnError(), 100, - 5000); - assertEquals(DatanodeStateMachine.DatanodeStates.SHUTDOWN, - dsm.getContext().getState()); - assertThat(versionEndPointTaskLog.getOutput()).contains( - "org.apache.hadoop.ozone.common" + - ".InconsistentStorageStateException: Mismatched ClusterIDs"); - scm.stop(); + try { + scm.start(); + // Initially DatanodeStateMachine will be in Running state + assertEquals(DatanodeStateMachine.DatanodeStates.RUNNING, + dsm.getContext().getState()); + // DN heartbeats to new SCM, SCM doesn't recognize the node, sends the + // command to DN to re-register. Wait for SCM to send re-register command + String expectedLog = String.format( + "SCM received heartbeat from an unregistered datanode %s. " + + "Asking datanode to re-register.", + datanode.getDatanodeDetails()); + GenericTestUtils.waitFor( + () -> scmDnHBDispatcherLog.getOutput().contains(expectedLog), 100, + 30000); + ExitUtil.disableSystemExit(); + // As part of processing response for re-register, DN EndpointStateMachine + // goes to GET-VERSION state which checks if there is already existing + // version file on the DN & if the clusterID matches with that of the SCM + // In this case, it won't match and gets InconsistentStorageStateException + // and DN shuts down. + String expectedLog2 = "Received SCM notification to register." + + " Interrupt HEARTBEAT and transit to GETVERSION state."; + GenericTestUtils.waitFor( + () -> heartbeatEndpointTaskLog.getOutput().contains(expectedLog2), + 100, 5000); + GenericTestUtils.waitFor(() -> dsm.getContext().getShutdownOnError(), 100, + 5000); + assertEquals(DatanodeStateMachine.DatanodeStates.SHUTDOWN, + dsm.getContext().getState()); + assertThat(versionEndPointTaskLog.getOutput()).contains( + "org.apache.hadoop.ozone.common" + + ".InconsistentStorageStateException: Mismatched ClusterIDs"); + } finally { + if (scm != null) { + scm.stop(); + } + } } } @@ -446,7 +445,6 @@ public void testOldDNRegistersToReInitialisedSCM() throws Exception { public void testBlockDeletingThrottling() throws Exception { int numKeys = 15; OzoneConfiguration conf = new OzoneConfiguration(); - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); conf.setTimeDuration(HDDS_CONTAINER_REPORT_INTERVAL, 1, TimeUnit.SECONDS); conf.setInt(ScmConfigKeys.OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 5); conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, @@ -557,7 +555,6 @@ private Map> createDeleteTXLog( @Test public void testSCMInitialization(@TempDir Path tempDir) throws Exception { OzoneConfiguration conf = new OzoneConfiguration(); - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); Path scmPath = tempDir.resolve("scm-meta"); conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, scmPath.toString()); @@ -579,7 +576,6 @@ public void testSCMInitialization(@TempDir Path tempDir) throws Exception { @Test public void testSCMReinitialization(@TempDir Path tempDir) throws Exception { OzoneConfiguration conf = new OzoneConfiguration(); - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); Path scmPath = tempDir.resolve("scm-meta"); conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, scmPath.toString()); //This will set the cluster id in the version file @@ -641,7 +637,6 @@ public static void validateRatisGroupExists(OzoneConfiguration conf, @Test void testSCMInitializationFailure(@TempDir Path tempDir) { OzoneConfiguration conf = new OzoneConfiguration(); - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); Path scmPath = tempDir.resolve("scm-meta"); conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, scmPath.toString()); @@ -654,7 +649,6 @@ public void testScmInfo(@TempDir Path tempDir) throws Exception { OzoneConfiguration conf = new OzoneConfiguration(); Path scmPath = tempDir.resolve("scm-meta"); - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, scmPath.toString()); SCMStorageConfig scmStore = new SCMStorageConfig(conf); String clusterId = UUID.randomUUID().toString(); @@ -668,15 +662,21 @@ public void testScmInfo(@TempDir Path tempDir) throws Exception { SCMHANodeDetails.loadSCMHAConfig(conf, scmStore) .getLocalNodeDetails(), conf); StorageContainerManager scm = HddsTestUtils.getScmSimple(conf); - scm.start(); - //Reads the SCM Info from SCM instance - ScmInfo scmInfo = scm.getClientProtocolServer().getScmInfo(); - assertEquals(clusterId, scmInfo.getClusterId()); - assertEquals(scmId, scmInfo.getScmId()); - - String expectedVersion = HddsVersionInfo.HDDS_VERSION_INFO.getVersion(); - String actualVersion = scm.getSoftwareVersion(); - assertEquals(expectedVersion, actualVersion); + try { + scm.start(); + //Reads the SCM Info from SCM instance + ScmInfo scmInfo = scm.getClientProtocolServer().getScmInfo(); + assertEquals(clusterId, scmInfo.getClusterId()); + assertEquals(scmId, scmInfo.getScmId()); + + String expectedVersion = HddsVersionInfo.HDDS_VERSION_INFO.getVersion(); + String actualVersion = scm.getSoftwareVersion(); + assertEquals(expectedVersion, actualVersion); + } finally { + if (scmStore != null) { + scm.stop(); + } + } } /** @@ -686,7 +686,6 @@ public void testScmInfo(@TempDir Path tempDir) throws Exception { public void testScmProcessDatanodeHeartbeat() throws Exception { String rackName = "/rack1"; OzoneConfiguration conf = new OzoneConfiguration(); - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); conf.setClass(NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, StaticMapping.class, DNSToSwitchMapping.class); StaticMapping.addNodeToRack(NetUtils.normalizeHostName(HddsUtils.getHostName(conf)), @@ -729,7 +728,6 @@ public void testScmProcessDatanodeHeartbeat() throws Exception { public void testCloseContainerCommandOnRestart() throws Exception { int numKeys = 15; OzoneConfiguration conf = new OzoneConfiguration(); - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); conf.setTimeDuration(HDDS_CONTAINER_REPORT_INTERVAL, 1, TimeUnit.SECONDS); conf.setInt(ScmConfigKeys.OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 5); conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java index 0cc14631f0d..ca4e1a896b0 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java @@ -79,7 +79,6 @@ import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.ozone.test.GenericTestUtils; -import org.apache.ozone.test.tag.Unhealthy; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; @@ -127,7 +126,6 @@ /** * This class tests container commands on EC containers. */ -@Unhealthy("HDDS-12023") public class TestContainerCommandsEC { private static final String ANY_USER = "any"; @@ -173,7 +171,6 @@ public class TestContainerCommandsEC { @BeforeAll public static void init() throws Exception { config = new OzoneConfiguration(); - config.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); config.setInt(ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT, 1); config.setTimeDuration(ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL, 3, TimeUnit.SECONDS); config.setBoolean(OzoneConfigKeys.OZONE_ACL_ENABLED, true); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java index 637e8bd9e4f..4eced770467 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java @@ -232,7 +232,6 @@ void init() { conf.setInt(OZONE_SCM_GRPC_PORT_KEY, getFreePort()); conf.set(OZONE_OM_ADDRESS_KEY, InetAddress.getLocalHost().getCanonicalHostName() + ":" + getFreePort()); - conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, false); DefaultMetricsSystem.setMiniClusterMode(true); ExitUtils.disableSystemExit(); @@ -353,10 +352,17 @@ void testSecureScmStartupSuccess() throws Exception { initSCM(); scm = HddsTestUtils.getScmSimple(conf); //Reads the SCM Info from SCM instance - ScmInfo scmInfo = scm.getClientProtocolServer().getScmInfo(); - assertEquals(clusterId, scmInfo.getClusterId()); - assertEquals(scmId, scmInfo.getScmId()); - assertEquals(2, scm.getScmCertificateClient().getTrustChain().size()); + try { + scm.start(); + ScmInfo scmInfo = scm.getClientProtocolServer().getScmInfo(); + assertEquals(clusterId, scmInfo.getClusterId()); + assertEquals(scmId, scmInfo.getScmId()); + assertEquals(2, scm.getScmCertificateClient().getTrustChain().size()); + } finally { + if (scm != null) { + scm.stop(); + } + } } @Test @@ -444,28 +450,6 @@ void testAdminAccessControlException() throws Exception { } } - @Test - void testSecretManagerInitializedNonHASCM() throws Exception { - conf.setBoolean(HDDS_BLOCK_TOKEN_ENABLED, true); - initSCM(); - scm = HddsTestUtils.getScmSimple(conf); - //Reads the SCM Info from SCM instance - try { - scm.start(); - - SecretKeyManager secretKeyManager = scm.getSecretKeyManager(); - boolean inSafeMode = scm.getScmSafeModeManager().getInSafeMode(); - assertFalse(SCMHAUtils.isSCMHAEnabled(conf)); - assertTrue(inSafeMode); - assertNotNull(secretKeyManager); - assertTrue(secretKeyManager.isInitialized()); - } finally { - if (scm != null) { - scm.stop(); - } - } - } - private void initSCM() throws IOException { Path scmPath = new File(tempDir, "scm-meta").toPath(); Files.createDirectories(scmPath); @@ -474,6 +458,7 @@ private void initSCM() throws IOException { SCMStorageConfig scmStore = new SCMStorageConfig(conf); scmStore.setClusterId(clusterId); scmStore.setScmId(scmId); + scmStore.setSCMHAFlag(true); HASecurityUtils.initializeSecurity(scmStore, conf, InetAddress.getLocalHost().getHostName(), true); scmStore.setPrimaryScmNodeId(scmId); From 89eb81fcd77a0cd2ca736fee1f2b12bc121d2655 Mon Sep 17 00:00:00 2001 From: chungen0126 Date: Tue, 7 Jan 2025 18:22:25 +0800 Subject: [PATCH 13/16] remove all OZONE_SCM_HA_ENABLE_KEY in tests --- .../apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java | 1 - .../hadoop/ozone/client/rpc/TestDeleteWithInAdequateDN.java | 2 -- .../common/statemachine/commandhandler/TestBlockDeletion.java | 2 -- .../statemachine/commandhandler/TestDeleteContainerHandler.java | 2 -- 4 files changed, 7 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java index bf40a600e29..ca4e1a896b0 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java @@ -171,7 +171,6 @@ public class TestContainerCommandsEC { @BeforeAll public static void init() throws Exception { config = new OzoneConfiguration(); - config.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true); config.setInt(ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT, 1); config.setTimeDuration(ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL, 3, TimeUnit.SECONDS); config.setBoolean(OzoneConfigKeys.OZONE_ACL_ENABLED, true); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestDeleteWithInAdequateDN.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestDeleteWithInAdequateDN.java index 2b199306b76..bc7bb36a242 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestDeleteWithInAdequateDN.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestDeleteWithInAdequateDN.java @@ -66,7 +66,6 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_INTERVAL; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_DESTROY_TIMEOUT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; @@ -106,7 +105,6 @@ public static void init() throws Exception { conf = new OzoneConfiguration(); - conf.setBoolean(OZONE_SCM_HA_ENABLE_KEY, true); conf.setTimeDuration(HDDS_HEARTBEAT_INTERVAL, 100, TimeUnit.MILLISECONDS); conf.setTimeDuration(HDDS_CONTAINER_REPORT_INTERVAL, 200, diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java index e38312e02e6..df5f3ec0d27 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java @@ -94,7 +94,6 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_EXPIRED_CONTAINER_REPLICA_OP_SCRUB_INTERVAL; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; @@ -133,7 +132,6 @@ public void init() throws Exception { GenericTestUtils.setLogLevel(SCMBlockDeletingService.LOG, Level.DEBUG); GenericTestUtils.setLogLevel(ReplicationManager.LOG, Level.DEBUG); - conf.setBoolean(OZONE_SCM_HA_ENABLE_KEY, true); conf.set("ozone.replication.allowed-configs", "^(RATIS/THREE)|(EC/2-1-256k)$"); conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 100, diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java index 705ef1e0d86..0006feb858a 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java @@ -75,7 +75,6 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.ONE; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_RATIS_VOLUME_FREE_SPACE_MIN; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -98,7 +97,6 @@ public class TestDeleteContainerHandler { @BeforeAll public static void setup() throws Exception { conf = new OzoneConfiguration(); - conf.setBoolean(OZONE_SCM_HA_ENABLE_KEY, true); conf.set(OZONE_SCM_CONTAINER_SIZE, "1GB"); conf.setStorageSize(OZONE_DATANODE_RATIS_VOLUME_FREE_SPACE_MIN, 0, StorageUnit.MB); From d33ea9a7853c762692f5293a9c23752898416bd0 Mon Sep 17 00:00:00 2001 From: chungen0126 Date: Tue, 7 Jan 2025 18:26:33 +0800 Subject: [PATCH 14/16] fix checkstyle --- .../java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java index 4eced770467..d71a4854c9e 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java @@ -49,7 +49,6 @@ import org.apache.hadoop.hdds.protocol.proto.SCMSecurityProtocolProtos.SCMGetCertResponseProto; import org.apache.hadoop.hdds.protocolPB.SCMSecurityProtocolClientSideTranslatorPB; import org.apache.hadoop.hdds.scm.ScmConfig; -import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.ScmInfo; import org.apache.hadoop.hdds.scm.HddsTestUtils; import org.apache.hadoop.hdds.scm.client.ScmTopologyClient; @@ -65,7 +64,6 @@ import org.apache.hadoop.hdds.security.exception.SCMSecurityException; import org.apache.hadoop.hdds.security.SecurityConfig; import org.apache.hadoop.hdds.security.symmetric.ManagedSecretKey; -import org.apache.hadoop.hdds.security.symmetric.SecretKeyManager; import org.apache.hadoop.hdds.security.x509.certificate.authority.CAType; import org.apache.hadoop.hdds.security.x509.certificate.authority.DefaultApprover; import org.apache.hadoop.hdds.security.x509.certificate.authority.profile.DefaultProfile; @@ -117,7 +115,6 @@ import org.apache.commons.lang3.RandomStringUtils; import org.apache.commons.lang3.StringUtils; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION; -import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_BLOCK_TOKEN_ENABLED; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_GRPC_TLS_ENABLED; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_ACK_TIMEOUT; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_CHECK_INTERNAL; @@ -169,7 +166,6 @@ import static org.apache.ozone.test.GenericTestUtils.PortAllocator.getFreePort; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; From 6ac85a4818959cd670f94546fd28ad5b146e4734 Mon Sep 17 00:00:00 2001 From: chungen0126 Date: Wed, 8 Jan 2025 16:54:46 +0800 Subject: [PATCH 15/16] address comments --- .../hadoop/hdds/scm/TestStorageContainerManager.java | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java index de512568a12..b00c7f8040b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java @@ -434,9 +434,7 @@ public void testOldDNRegistersToReInitialisedSCM() throws Exception { "org.apache.hadoop.ozone.common" + ".InconsistentStorageStateException: Mismatched ClusterIDs"); } finally { - if (scm != null) { - scm.stop(); - } + scm.stop(); } } } @@ -673,9 +671,7 @@ public void testScmInfo(@TempDir Path tempDir) throws Exception { String actualVersion = scm.getSoftwareVersion(); assertEquals(expectedVersion, actualVersion); } finally { - if (scmStore != null) { - scm.stop(); - } + scm.stop(); } } From 131da4b44a032777c9c06e91546db2fab4ed10bc Mon Sep 17 00:00:00 2001 From: chungen0126 Date: Wed, 8 Jan 2025 16:55:53 +0800 Subject: [PATCH 16/16] address comments --- .../java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java index 0b60708f10b..b3d9f780888 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java @@ -748,10 +748,6 @@ protected void initializeScmStorage(SCMStorageConfig scmStore) scmStore.setClusterId(clusterId); scmStore.setScmId(scmId); scmStore.initialize(); - //TODO: HDDS-6897 - //Disabling Ratis for only of MiniOzoneClusterImpl. - //MiniOzoneClusterImpl doesn't work with Ratis enabled SCM - scmStore.setSCMHAFlag(true); scmStore.persistCurrentState(); SCMRatisServerImpl.initialize(clusterId, scmId,