diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitor.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitor.java index bd224e45a79..e0b4c3ce543 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitor.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitor.java @@ -29,6 +29,6 @@ public interface DatanodeAdminMonitor extends Runnable { void startMonitoring(DatanodeDetails dn); void stopMonitoring(DatanodeDetails dn); - Set getTrackedNodes(); + Set getTrackedNodes(); void setMetrics(NodeDecommissionMetrics metrics); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java index 693a3474def..ff038f93e47 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java @@ -78,7 +78,7 @@ public class DatanodeAdminMonitorImpl implements DatanodeAdminMonitor { private ReplicationManager replicationManager; private Queue pendingNodes = new ArrayDeque(); private Queue cancelledNodes = new ArrayDeque(); - private Set trackedNodes = new HashSet<>(); + private Set trackedNodes = new HashSet<>(); private NodeDecommissionMetrics metrics; private long pipelinesWaitingToClose = 0; private long sufficientlyReplicatedContainers = 0; @@ -87,6 +87,24 @@ public class DatanodeAdminMonitorImpl implements DatanodeAdminMonitor { private long unhealthyContainers = 0; private long underReplicatedContainers = 0; + public static final class TrackedNode { + + private DatanodeDetails datanodeDetails; + + public TrackedNode(DatanodeDetails datanodeDetails) { + this.datanodeDetails = datanodeDetails; + } + + @Override + public int hashCode() { + return datanodeDetails.hashCode(); + } + + public DatanodeDetails getDatanodeDetails() { + return datanodeDetails; + } + } + private Map containerStateByHost; private static final Logger LOG = @@ -145,7 +163,7 @@ public synchronized void setMetrics(NodeDecommissionMetrics metrics) { * @return An unmodifiable set of the tracked nodes. */ @Override - public synchronized Set getTrackedNodes() { + public synchronized Set getTrackedNodes() { return Collections.unmodifiableSet(trackedNodes); } @@ -220,7 +238,7 @@ private void processCancelledNodes() { while (!cancelledNodes.isEmpty()) { DatanodeDetails dn = cancelledNodes.poll(); try { - stopTrackingNode(dn); + stopTrackingNode(new TrackedNode(dn)); putNodeBackInService(dn); LOG.info("Recommissioned node {}", dn); } catch (NodeNotFoundException e) { @@ -237,10 +255,10 @@ private void processPendingNodes() { private void processTransitioningNodes() { resetContainerMetrics(); - Iterator iterator = trackedNodes.iterator(); + Iterator iterator = trackedNodes.iterator(); while (iterator.hasNext()) { - DatanodeDetails dn = iterator.next(); + DatanodeDetails dn = iterator.next().getDatanodeDetails(); try { NodeStatus status = getNodeStatus(dn); @@ -462,10 +480,10 @@ private void completeMaintenance(DatanodeDetails dn) private void startTrackingNode(DatanodeDetails dn) { eventQueue.fireEvent(SCMEvents.START_ADMIN_ON_NODE, dn); - trackedNodes.add(dn); + trackedNodes.add(new TrackedNode(dn)); } - private void stopTrackingNode(DatanodeDetails dn) { + private void stopTrackingNode(TrackedNode dn) { trackedNodes.remove(dn); } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java index 09ff7cb0ff6..6e348f13e05 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java @@ -188,7 +188,6 @@ public void testDecommissionNodeWaitsForContainersToReplicate() // REPLICATE_CONTAINERS as there are no pipelines to close. monitor.startMonitoring(dn1); monitor.run(); - DatanodeDetails node = getFirstTrackedNode(); assertEquals(1, monitor.getTrackedNodeCount()); assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONING, nodeManager.getNodeStatus(dn1).getOperationalState()); @@ -414,7 +413,6 @@ public void testDecommissionNodeWithUnrecoverableECContainer() // REPLICATE_CONTAINERS as there are no pipelines to close. monitor.startMonitoring(dn1); monitor.run(); - DatanodeDetails node = getFirstTrackedNode(); assertEquals(1, monitor.getTrackedNodeCount()); assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONING, nodeManager.getNodeStatus(dn1).getOperationalState()); @@ -469,7 +467,6 @@ public void testDecommissionAbortedWhenNodeInUnexpectedState() monitor.startMonitoring(dn1); monitor.run(); assertEquals(1, monitor.getTrackedNodeCount()); - DatanodeDetails node = getFirstTrackedNode(); assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONING, nodeManager.getNodeStatus(dn1).getOperationalState()); @@ -506,7 +503,6 @@ public void testDecommissionAbortedWhenNodeGoesDead() monitor.startMonitoring(dn1); monitor.run(); assertEquals(1, monitor.getTrackedNodeCount()); - DatanodeDetails node = getFirstTrackedNode(); assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONING, nodeManager.getNodeStatus(dn1).getOperationalState()); @@ -625,7 +621,6 @@ public void testDeadMaintenanceNodeDoesNotAbortWorkflow() monitor.startMonitoring(dn1); monitor.run(); assertEquals(1, monitor.getTrackedNodeCount()); - DatanodeDetails node = getFirstTrackedNode(); assertTrue(nodeManager.getNodeStatus(dn1).isInMaintenance()); // Set the node dead and ensure the workflow does not end @@ -652,7 +647,6 @@ public void testCancelledNodesMovedToInService() monitor.startMonitoring(dn1); monitor.run(); assertEquals(1, monitor.getTrackedNodeCount()); - DatanodeDetails node = getFirstTrackedNode(); assertTrue(nodeManager.getNodeStatus(dn1).isInMaintenance()); // Now cancel the node and run the monitor, the node should be IN_SERVICE @@ -684,6 +678,7 @@ private Set generateContainers(int count) { */ private DatanodeDetails getFirstTrackedNode() { return - monitor.getTrackedNodes().toArray(new DatanodeDetails[0])[0]; + monitor.getTrackedNodes().toArray( + new DatanodeAdminMonitorImpl.TrackedNode[0])[0].getDatanodeDetails(); } }