From 8ca33c718b2253297f15941758ec4613926a700a Mon Sep 17 00:00:00 2001 From: tanvipenumudy <46785609+tanvipenumudy@users.noreply.github.com> Date: Mon, 9 Sep 2024 13:01:52 +0530 Subject: [PATCH 01/43] HDDS-11414. Key listing for FSO buckets fails with forward client (#7161) --- .../apache/hadoop/ozone/OzoneManagerVersion.java | 2 ++ .../apache/hadoop/ozone/client/rpc/RpcClient.java | 13 ++++++++++--- .../src/main/smoketest/compatibility/read.robot | 4 ++++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneManagerVersion.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneManagerVersion.java index eec2ceeb5e8..982b559c7a5 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneManagerVersion.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneManagerVersion.java @@ -44,6 +44,8 @@ public enum OzoneManagerVersion implements ComponentVersion { ATOMIC_REWRITE_KEY(6, "OzoneManager version that supports rewriting key as atomic operation"), HBASE_SUPPORT(7, "OzoneManager version that supports HBase integration"), + LIGHTWEIGHT_LIST_STATUS(8, "OzoneManager version that supports lightweight" + + " listStatus API."), FUTURE_VERSION(-1, "Used internally in the client when the server side is " + " newer and an unknown server version has arrived to the client."); diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java index 15babfde69d..9dc11637f3c 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java @@ -2316,9 +2316,16 @@ public List listStatusLight(String volumeName, String bucketName, String keyName, boolean recursive, String startKey, long numEntries, boolean allowPartialPrefixes) throws IOException { OmKeyArgs keyArgs = prepareOmKeyArgs(volumeName, bucketName, keyName); - return ozoneManagerClient - .listStatusLight(keyArgs, recursive, startKey, numEntries, - allowPartialPrefixes); + if (omVersion.compareTo(OzoneManagerVersion.LIGHTWEIGHT_LIST_STATUS) >= 0) { + return ozoneManagerClient.listStatusLight(keyArgs, recursive, startKey, + numEntries, allowPartialPrefixes); + } else { + return ozoneManagerClient.listStatus(keyArgs, recursive, startKey, + numEntries, allowPartialPrefixes) + .stream() + .map(OzoneFileStatusLight::fromOzoneFileStatus) + .collect(Collectors.toList()); + } } /** diff --git a/hadoop-ozone/dist/src/main/smoketest/compatibility/read.robot b/hadoop-ozone/dist/src/main/smoketest/compatibility/read.robot index 511679c56f4..57715cda95f 100644 --- a/hadoop-ozone/dist/src/main/smoketest/compatibility/read.robot +++ b/hadoop-ozone/dist/src/main/smoketest/compatibility/read.robot @@ -30,6 +30,10 @@ Key Can Be Read Dir Can Be Listed Execute ozone fs -ls o3fs://bucket1.vol1/dir-${SUFFIX} +Dir Can Be Listed Using Shell + ${result} = Execute ozone sh key list /vol1/bucket1 + Should Contain ${result} key-${SUFFIX} + File Can Be Get Execute ozone fs -get o3fs://bucket1.vol1/dir-${SUFFIX}/file-${SUFFIX} /tmp/ Execute diff -q ${TESTFILE} /tmp/file-${SUFFIX} From 9477aa63fd07c6a939b2886d0fb500f07e6f0ea4 Mon Sep 17 00:00:00 2001 From: Arafat2198 Date: Tue, 10 Sep 2024 10:51:08 +0530 Subject: [PATCH 02/43] HDDS-11436. Minor update in Recon API handling. (#7178) --- .../ozone/recon/api/TriggerDBSyncEndpoint.java | 1 + .../ozone/recon/api/filters/TestAdminFilter.java | 12 ++++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/TriggerDBSyncEndpoint.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/TriggerDBSyncEndpoint.java index 070b7e1ccd4..3ce4fc7f837 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/TriggerDBSyncEndpoint.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/TriggerDBSyncEndpoint.java @@ -32,6 +32,7 @@ */ @Path("/triggerdbsync") @Produces(MediaType.APPLICATION_JSON) +@AdminOnly public class TriggerDBSyncEndpoint { private OzoneManagerServiceProvider ozoneManagerServiceProvider; diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/filters/TestAdminFilter.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/filters/TestAdminFilter.java index e30590df55e..7c874a9e299 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/filters/TestAdminFilter.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/filters/TestAdminFilter.java @@ -33,7 +33,6 @@ import org.apache.hadoop.ozone.recon.api.NodeEndpoint; import org.apache.hadoop.ozone.recon.api.PipelineEndpoint; import org.apache.hadoop.ozone.recon.api.TaskStatusService; -import org.apache.hadoop.ozone.recon.api.TriggerDBSyncEndpoint; import org.apache.hadoop.ozone.recon.api.UtilizationEndpoint; import org.apache.hadoop.security.UserGroupInformation; import org.junit.jupiter.api.Test; @@ -70,8 +69,14 @@ public void testAdminOnlyEndpoints() { assertThat(allEndpoints).isNotEmpty(); - // If an endpoint is added, it must be explicitly added to this set or be - // marked with @AdminOnly for this test to pass. + // If an endpoint is added, it must either require admin privileges by being + // marked with the `@AdminOnly` annotation, or be added to this set to exclude it. + // - Any endpoint that displays information related to the filesystem namespace + // (including aggregate counts), user information, or allows modification to the + // cluster's state should be marked as `@AdminOnly`. + // - Read-only endpoints that only return information about node status or + // cluster state do not require the `@AdminOnly` annotation and can be excluded + // from admin requirements by adding them to this set. Set> nonAdminEndpoints = new HashSet<>(); nonAdminEndpoints.add(UtilizationEndpoint.class); nonAdminEndpoints.add(ClusterStateEndpoint.class); @@ -79,7 +84,6 @@ public void testAdminOnlyEndpoints() { nonAdminEndpoints.add(NodeEndpoint.class); nonAdminEndpoints.add(PipelineEndpoint.class); nonAdminEndpoints.add(TaskStatusService.class); - nonAdminEndpoints.add(TriggerDBSyncEndpoint.class); assertThat(allEndpoints).containsAll(nonAdminEndpoints); From 33dbd4a3851fdc467f342f4da35d4eb3495fb330 Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Tue, 10 Sep 2024 17:53:59 +0800 Subject: [PATCH 03/43] HDDS-11357. Datanode Usageinfo Support Display Pipeline. (#7105) --- .../interface-client/src/main/proto/hdds.proto | 1 + .../hadoop/hdds/scm/node/DatanodeUsageInfo.java | 11 +++++++++++ .../apache/hadoop/hdds/scm/node/SCMNodeManager.java | 6 ++++++ .../hdds/scm/cli/datanode/UsageInfoSubcommand.java | 9 +++++++++ .../scm/cli/datanode/TestUsageInfoSubcommand.java | 3 +++ 5 files changed, 30 insertions(+) diff --git a/hadoop-hdds/interface-client/src/main/proto/hdds.proto b/hadoop-hdds/interface-client/src/main/proto/hdds.proto index 4555d1cf4a3..6cd4f6235ce 100644 --- a/hadoop-hdds/interface-client/src/main/proto/hdds.proto +++ b/hadoop-hdds/interface-client/src/main/proto/hdds.proto @@ -191,6 +191,7 @@ message DatanodeUsageInfoProto { optional int64 containerCount = 5; optional int64 committed = 6; optional int64 freeSpaceToSpare = 7; + optional int64 pipelineCount = 8; } /** diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeUsageInfo.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeUsageInfo.java index 4f7df496906..1cafab3f67c 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeUsageInfo.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeUsageInfo.java @@ -32,6 +32,7 @@ public class DatanodeUsageInfo { private DatanodeDetails datanodeDetails; private SCMNodeStat scmNodeStat; private int containerCount; + private int pipelineCount; /** * Constructs a DatanodeUsageInfo with DatanodeDetails and SCMNodeStat. @@ -45,6 +46,7 @@ public DatanodeUsageInfo( this.datanodeDetails = datanodeDetails; this.scmNodeStat = scmNodeStat; this.containerCount = -1; + this.pipelineCount = -1; } /** @@ -145,6 +147,14 @@ public void setContainerCount(int containerCount) { this.containerCount = containerCount; } + public int getPipelineCount() { + return pipelineCount; + } + + public void setPipelineCount(int pipelineCount) { + this.pipelineCount = pipelineCount; + } + /** * Gets Comparator that compares two DatanodeUsageInfo on the basis of * their utilization values. Utilization is (capacity - remaining) divided @@ -210,6 +220,7 @@ private DatanodeUsageInfoProto.Builder toProtoBuilder(int clientVersion) { } builder.setContainerCount(containerCount); + builder.setPipelineCount(pipelineCount); return builder; } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java index 05a68628852..fa8f316aa42 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java @@ -982,6 +982,7 @@ public DatanodeUsageInfo getUsageInfo(DatanodeDetails dn) { DatanodeUsageInfo usageInfo = new DatanodeUsageInfo(dn, stat); try { usageInfo.setContainerCount(getContainerCount(dn)); + usageInfo.setPipelineCount(getPipeLineCount(dn)); } catch (NodeNotFoundException ex) { LOG.error("Unknown datanode {}.", dn, ex); } @@ -1610,6 +1611,11 @@ public int getContainerCount(DatanodeDetails datanodeDetails) return nodeStateManager.getContainerCount(datanodeDetails.getUuid()); } + public int getPipeLineCount(DatanodeDetails datanodeDetails) + throws NodeNotFoundException { + return nodeStateManager.getPipelinesCount(datanodeDetails); + } + @Override public void addDatanodeCommand(UUID dnId, SCMCommand command) { writeLock().lock(); diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/UsageInfoSubcommand.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/UsageInfoSubcommand.java index b967fa0658c..2c069291a86 100644 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/UsageInfoSubcommand.java +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/UsageInfoSubcommand.java @@ -155,6 +155,8 @@ private void printInfo(DatanodeUsage info) { + " B", StringUtils.byteDesc(info.getRemaining())); System.out.printf("%-13s: %s %n", "Remaining %", PERCENT_FORMAT.format(info.getRemainingRatio())); + System.out.printf("%-13s: %d %n", "Pipeline(s)", + info.getPipelineCount()); System.out.printf("%-13s: %d %n", "Container(s)", info.getContainerCount()); System.out.printf("%-24s: %s (%s) %n", "Container Pre-allocated", @@ -192,6 +194,7 @@ private static class DatanodeUsage { private long committed = 0; private long freeSpaceToSpare = 0; private long containerCount = 0; + private long pipelineCount = 0; DatanodeUsage(HddsProtos.DatanodeUsageInfoProto proto) { if (proto.hasNode()) { @@ -212,6 +215,9 @@ private static class DatanodeUsage { if (proto.hasContainerCount()) { containerCount = proto.getContainerCount(); } + if (proto.hasPipelineCount()) { + pipelineCount = proto.getPipelineCount(); + } if (proto.hasFreeSpaceToSpare()) { freeSpaceToSpare = proto.getFreeSpaceToSpare(); } @@ -277,5 +283,8 @@ public double getRemainingRatio() { return remaining / (double) capacity; } + public long getPipelineCount() { + return pipelineCount; + } } } diff --git a/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestUsageInfoSubcommand.java b/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestUsageInfoSubcommand.java index 09f6621735e..a691e754606 100644 --- a/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestUsageInfoSubcommand.java +++ b/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestUsageInfoSubcommand.java @@ -94,6 +94,7 @@ public void testCorrectJsonValuesInReport() throws IOException { assertEquals(80.00, json.get(0).get("remainingPercent").doubleValue(), 0.001); assertEquals(5, json.get(0).get("containerCount").longValue()); + assertEquals(10, json.get(0).get("pipelineCount").longValue()); } @Test @@ -122,6 +123,7 @@ public void testOutputDataFieldsAligning() throws IOException { assertThat(output).contains("Remaining :"); assertThat(output).contains("Remaining % :"); assertThat(output).contains("Container(s) :"); + assertThat(output).contains("Pipeline(s) :"); assertThat(output).contains("Container Pre-allocated :"); assertThat(output).contains("Remaining Allocatable :"); assertThat(output).contains("Free Space To Spare :"); @@ -135,6 +137,7 @@ private List getUsageProto() { .setRemaining(80) .setUsed(10) .setContainerCount(5) + .setPipelineCount(10) .build()); return result; } From 883a63f4727d5fb91797b18bfa1dbb2c38488c1e Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" <6454655+adoroszlai@users.noreply.github.com> Date: Tue, 10 Sep 2024 17:58:13 +0200 Subject: [PATCH 04/43] HDDS-11441. ozone sh key put should only accept positive expectedGeneration (#7180) --- .../src/main/smoketest/ozone-lib/shell_tests.robot | 8 ++++++++ .../hadoop/ozone/shell/keys/PutKeyHandler.java | 13 +++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/hadoop-ozone/dist/src/main/smoketest/ozone-lib/shell_tests.robot b/hadoop-ozone/dist/src/main/smoketest/ozone-lib/shell_tests.robot index 22805efcb1b..651cda016f2 100644 --- a/hadoop-ozone/dist/src/main/smoketest/ozone-lib/shell_tests.robot +++ b/hadoop-ozone/dist/src/main/smoketest/ozone-lib/shell_tests.robot @@ -56,3 +56,11 @@ Compare Key With Local File with Different File Compare Key With Local File if File Does Not Exist ${matches} = Compare Key With Local File o3://${OM_SERVICE_ID}/vol1/bucket/passwd /no-such-file Should Be Equal ${matches} ${FALSE} + +Rejects Put Key With Zero Expected Generation + ${output} = Execute and checkrc ozone sh key put --expectedGeneration 0 o3://${OM_SERVICE_ID}/vol1/bucket/passwd /etc/passwd 255 + Should Contain ${output} must be positive + +Rejects Put Key With Negative Expected Generation + ${output} = Execute and checkrc ozone sh key put --expectedGeneration -1 o3://${OM_SERVICE_ID}/vol1/bucket/passwd /etc/passwd 255 + Should Contain ${output} must be positive diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/keys/PutKeyHandler.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/keys/PutKeyHandler.java index 833f4f7e779..35095dd7ff2 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/keys/PutKeyHandler.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/keys/PutKeyHandler.java @@ -71,7 +71,7 @@ public class PutKeyHandler extends KeyHandler { @Option(names = "--expectedGeneration", description = "Store key only if it already exists and its generation matches the value provided") - private long expectedGeneration; + private Long expectedGeneration; @Override protected void execute(OzoneClient client, OzoneAddress address) @@ -131,9 +131,14 @@ private void async( private OzoneOutputStream createOrReplaceKey(OzoneBucket bucket, String keyName, long size, Map keyMetadata, ReplicationConfig replicationConfig ) throws IOException { - return expectedGeneration > 0 - ? bucket.rewriteKey(keyName, size, expectedGeneration, replicationConfig, keyMetadata) - : bucket.createKey(keyName, size, replicationConfig, keyMetadata); + if (expectedGeneration != null) { + final long existingGeneration = expectedGeneration; + Preconditions.checkArgument(existingGeneration > 0, + "expectedGeneration must be positive, but was %s", existingGeneration); + return bucket.rewriteKey(keyName, size, existingGeneration, replicationConfig, keyMetadata); + } + + return bucket.createKey(keyName, size, replicationConfig, keyMetadata); } private void stream( From 86fe920030eb50266205d08afe797f8fa956de67 Mon Sep 17 00:00:00 2001 From: jianghuazhu <740087514@qq.com> Date: Wed, 11 Sep 2024 01:45:24 +0800 Subject: [PATCH 05/43] HDDS-11376. Improve ReplicationSupervisor to record replication metrics (#7140) --- .../ECReconstructionCoordinatorTask.java | 10 + .../replication/AbstractReplicationTask.java | 4 + .../replication/ReplicationSupervisor.java | 89 +++++-- .../ReplicationSupervisorMetrics.java | 33 ++- .../replication/ReplicationTask.java | 10 + .../TestReplicationSupervisor.java | 221 ++++++++++++++++-- 6 files changed, 331 insertions(+), 36 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinatorTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinatorTask.java index 6d32f3a3f3e..a50a125f6d4 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinatorTask.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinatorTask.java @@ -46,6 +46,16 @@ public ECReconstructionCoordinatorTask( debugString = reconstructionCommandInfo.toString(); } + @Override + public String getMetricName() { + return "ECReconstructions"; + } + + @Override + public String getMetricDescriptionSegment() { + return "EC reconstructions"; + } + @Override public void runTask() { // Implement the coordinator logic to handle a container group diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/AbstractReplicationTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/AbstractReplicationTask.java index 72fa88b35d9..f4bf54a3d82 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/AbstractReplicationTask.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/AbstractReplicationTask.java @@ -70,6 +70,10 @@ protected AbstractReplicationTask(long containerID, this.term = term; queued = Instant.now(clock); } + + protected abstract String getMetricName(); + + protected abstract String getMetricDescriptionSegment(); public long getContainerId() { return containerId; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisor.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisor.java index 5ceea125e81..92ff4b6d8d6 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisor.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisor.java @@ -26,6 +26,7 @@ import java.util.Objects; import java.util.OptionalLong; import java.util.Set; +import java.util.Collections; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.PriorityBlockingQueue; @@ -71,11 +72,17 @@ public final class ReplicationSupervisor { private final StateContext context; private final Clock clock; - private final AtomicLong requestCounter = new AtomicLong(); - private final AtomicLong successCounter = new AtomicLong(); - private final AtomicLong failureCounter = new AtomicLong(); - private final AtomicLong timeoutCounter = new AtomicLong(); - private final AtomicLong skippedCounter = new AtomicLong(); + private final Map requestCounter = new ConcurrentHashMap<>(); + private final Map successCounter = new ConcurrentHashMap<>(); + private final Map failureCounter = new ConcurrentHashMap<>(); + private final Map timeoutCounter = new ConcurrentHashMap<>(); + private final Map skippedCounter = new ConcurrentHashMap<>(); + + private static final Map METRICS_MAP; + + static { + METRICS_MAP = new HashMap<>(); + } /** * A set of container IDs that are currently being downloaded @@ -188,6 +195,10 @@ public static Builder newBuilder() { return new Builder(); } + public static Map getMetricsMap() { + return Collections.unmodifiableMap(METRICS_MAP); + } + private ReplicationSupervisor(StateContext context, ExecutorService executor, ReplicationConfig replicationConfig, DatanodeConfiguration datanodeConfig, Clock clock, IntConsumer executorThreadUpdater) { @@ -221,6 +232,19 @@ public void addTask(AbstractReplicationTask task) { return; } + if (requestCounter.get(task.getMetricName()) == null) { + synchronized (this) { + if (requestCounter.get(task.getMetricName()) == null) { + requestCounter.put(task.getMetricName(), new AtomicLong(0)); + successCounter.put(task.getMetricName(), new AtomicLong(0)); + failureCounter.put(task.getMetricName(), new AtomicLong(0)); + timeoutCounter.put(task.getMetricName(), new AtomicLong(0)); + skippedCounter.put(task.getMetricName(), new AtomicLong(0)); + METRICS_MAP.put(task.getMetricName(), task.getMetricDescriptionSegment()); + } + } + } + if (inFlight.add(task)) { if (task.getPriority() != ReplicationCommandPriority.LOW) { // Low priority tasks are not included in the replication queue sizes @@ -330,14 +354,14 @@ public TaskRunner(AbstractReplicationTask task) { @Override public void run() { try { - requestCounter.incrementAndGet(); + requestCounter.get(task.getMetricName()).incrementAndGet(); final long now = clock.millis(); final long deadline = task.getDeadline(); if (deadline > 0 && now > deadline) { LOG.info("Ignoring {} since the deadline has passed ({} < {})", this, Instant.ofEpochMilli(deadline), Instant.ofEpochMilli(now)); - timeoutCounter.incrementAndGet(); + timeoutCounter.get(task.getMetricName()).incrementAndGet(); return; } @@ -364,18 +388,18 @@ public void run() { task.runTask(); if (task.getStatus() == Status.FAILED) { LOG.warn("Failed {}", this); - failureCounter.incrementAndGet(); + failureCounter.get(task.getMetricName()).incrementAndGet(); } else if (task.getStatus() == Status.DONE) { LOG.info("Successful {}", this); - successCounter.incrementAndGet(); + successCounter.get(task.getMetricName()).incrementAndGet(); } else if (task.getStatus() == Status.SKIPPED) { LOG.info("Skipped {}", this); - skippedCounter.incrementAndGet(); + skippedCounter.get(task.getMetricName()).incrementAndGet(); } } catch (Exception e) { task.setStatus(Status.FAILED); LOG.warn("Failed {}", this, e); - failureCounter.incrementAndGet(); + failureCounter.get(task.getMetricName()).incrementAndGet(); } finally { inFlight.remove(task); decrementTaskCounter(task); @@ -419,7 +443,12 @@ public boolean equals(Object o) { } public long getReplicationRequestCount() { - return requestCounter.get(); + return getCount(requestCounter); + } + + public long getReplicationRequestCount(String metricsName) { + AtomicLong counter = requestCounter.get(metricsName); + return counter != null ? counter.get() : 0; } public long getQueueSize() { @@ -438,20 +467,48 @@ public long getMaxReplicationStreams() { } } + private long getCount(Map counter) { + long total = 0; + for (Map.Entry entry : counter.entrySet()) { + total += entry.getValue().get(); + } + return total; + } + public long getReplicationSuccessCount() { - return successCounter.get(); + return getCount(successCounter); + } + + public long getReplicationSuccessCount(String metricsName) { + AtomicLong counter = successCounter.get(metricsName); + return counter != null ? counter.get() : 0; } public long getReplicationFailureCount() { - return failureCounter.get(); + return getCount(failureCounter); + } + + public long getReplicationFailureCount(String metricsName) { + AtomicLong counter = failureCounter.get(metricsName); + return counter != null ? counter.get() : 0; } public long getReplicationTimeoutCount() { - return timeoutCounter.get(); + return getCount(timeoutCounter); + } + + public long getReplicationTimeoutCount(String metricsName) { + AtomicLong counter = timeoutCounter.get(metricsName); + return counter != null ? counter.get() : 0; } public long getReplicationSkippedCount() { - return skippedCounter.get(); + return getCount(skippedCounter); + } + + public long getReplicationSkippedCount(String metricsName) { + AtomicLong counter = skippedCounter.get(metricsName); + return counter != null ? counter.get() : 0; } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisorMetrics.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisorMetrics.java index 671e985d7ad..a1763976af9 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisorMetrics.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisorMetrics.java @@ -71,16 +71,47 @@ public void getMetrics(MetricsCollector collector, boolean all) { .addGauge(Interns.info("numRequestedReplications", "Number of requested replications"), supervisor.getReplicationRequestCount()) + .addGauge(Interns.info("numSuccessReplications", + "Number of successful replications"), + supervisor.getReplicationSuccessCount()) + .addGauge(Interns.info("numFailureReplications", + "Number of failure replications"), + supervisor.getReplicationFailureCount()) .addGauge(Interns.info("numTimeoutReplications", "Number of replication requests timed out before being processed"), supervisor.getReplicationTimeoutCount()) .addGauge(Interns.info("numSkippedReplications", "Number of replication requests skipped as the container is " - + "already present"), supervisor.getReplicationSkippedCount()) + + "already present"), + supervisor.getReplicationSkippedCount()) .addGauge(Interns.info("maxReplicationStreams", "Maximum number of " + "concurrent replication tasks which can run simultaneously"), supervisor.getMaxReplicationStreams()); + Map metricsMap = ReplicationSupervisor.getMetricsMap(); + if (!metricsMap.isEmpty()) { + metricsMap.forEach((metricsName, descriptionSegment) -> { + if (!metricsName.equals("")) { + builder.addGauge(Interns.info("numRequested" + metricsName, + "Number of requested " + descriptionSegment), + supervisor.getReplicationRequestCount(metricsName)) + .addGauge(Interns.info("numSuccess" + metricsName, + "Number of successful " + descriptionSegment), + supervisor.getReplicationSuccessCount(metricsName)) + .addGauge(Interns.info("numFailure" + metricsName, + "Number of failure " + descriptionSegment), + supervisor.getReplicationFailureCount(metricsName)) + .addGauge(Interns.info("numTimeout" + metricsName, + "Number of " + descriptionSegment + " timed out before being processed"), + supervisor.getReplicationTimeoutCount(metricsName)) + .addGauge(Interns.info("numSkipped" + metricsName, + "Number of " + descriptionSegment + " skipped as the container is " + + "already present"), + supervisor.getReplicationSkippedCount(metricsName)); + } + }); + } + Map tasks = supervisor.getInFlightReplicationSummary(); for (Map.Entry entry : tasks.entrySet()) { builder.addGauge(Interns.info("numInflight" + entry.getKey(), diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationTask.java index ca0ca98906c..2168f324c24 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationTask.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationTask.java @@ -65,6 +65,16 @@ protected ReplicationTask( replicator); } + @Override + public String getMetricName() { + return "ContainerReplications"; + } + + @Override + public String getMetricDescriptionSegment() { + return "container replications"; + } + @Override public boolean equals(Object o) { if (this == o) { diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/TestReplicationSupervisor.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/TestReplicationSupervisor.java index 1f69db78d62..ef37c226653 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/TestReplicationSupervisor.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/TestReplicationSupervisor.java @@ -27,6 +27,7 @@ import java.time.Instant; import java.time.ZoneId; import java.util.List; +import java.util.SortedMap; import java.util.UUID; import java.util.concurrent.AbstractExecutorService; import java.util.concurrent.CountDownLatch; @@ -46,6 +47,8 @@ import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ReplicationCommandPriority; +import org.apache.hadoop.hdds.security.symmetric.SecretKeySignerClient; +import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; import org.apache.hadoop.metrics2.impl.MetricsCollectorImpl; import org.apache.hadoop.ozone.container.common.impl.ContainerLayoutVersion; import org.apache.hadoop.ozone.container.common.impl.ContainerSet; @@ -55,7 +58,9 @@ import org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine; import org.apache.hadoop.ozone.container.common.statemachine.StateContext; import org.apache.hadoop.ozone.container.ec.reconstruction.ECReconstructionCommandInfo; +import org.apache.hadoop.ozone.container.ec.reconstruction.ECReconstructionCoordinator; import org.apache.hadoop.ozone.container.ec.reconstruction.ECReconstructionCoordinatorTask; +import org.apache.hadoop.ozone.container.ec.reconstruction.ECReconstructionMetrics; import org.apache.hadoop.ozone.container.keyvalue.ContainerLayoutTestInfo; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; @@ -109,6 +114,8 @@ public class TestReplicationSupervisor { }; private final AtomicReference replicatorRef = new AtomicReference<>(); + private final AtomicReference ecReplicatorRef = + new AtomicReference<>(); private ContainerSet set; @@ -135,6 +142,7 @@ public void setUp() throws Exception { @AfterEach public void cleanup() { replicatorRef.set(null); + ecReplicatorRef.set(null); } @ContainerLayoutTestInfo.ContainerTest @@ -394,6 +402,107 @@ public void taskWithObsoleteTermIsDropped(ContainerLayoutVersion layout) { assertEquals(0, supervisor.getReplicationSuccessCount()); } + @ContainerLayoutTestInfo.ContainerTest + public void testMultipleReplication(ContainerLayoutVersion layout, + @TempDir File tempFile) throws IOException { + this.layoutVersion = layout; + OzoneConfiguration conf = new OzoneConfiguration(); + // GIVEN + ReplicationSupervisor replicationSupervisor = + supervisorWithReplicator(FakeReplicator::new); + ReplicationSupervisor ecReconstructionSupervisor = supervisorWithECReconstruction(); + ReplicationSupervisorMetrics replicationMetrics = + ReplicationSupervisorMetrics.create(replicationSupervisor); + ReplicationSupervisorMetrics ecReconstructionMetrics = + ReplicationSupervisorMetrics.create(ecReconstructionSupervisor); + try { + //WHEN + replicationSupervisor.addTask(createTask(1L)); + ecReconstructionSupervisor.addTask(createECTaskWithCoordinator(2L)); + replicationSupervisor.addTask(createTask(1L)); + replicationSupervisor.addTask(createTask(3L)); + ecReconstructionSupervisor.addTask(createECTaskWithCoordinator(4L)); + + SimpleContainerDownloader moc = mock(SimpleContainerDownloader.class); + Path res = Paths.get("file:/tmp/no-such-file"); + when(moc.getContainerDataFromReplicas(anyLong(), anyList(), + any(Path.class), any())).thenReturn(res); + + final String testDir = tempFile.getPath(); + MutableVolumeSet volumeSet = mock(MutableVolumeSet.class); + when(volumeSet.getVolumesList()).thenReturn(singletonList( + new HddsVolume.Builder(testDir).conf(conf).build())); + ContainerController mockedCC = mock(ContainerController.class); + ContainerImporter importer = new ContainerImporter(conf, set, mockedCC, volumeSet); + ContainerReplicator replicator = new DownloadAndImportReplicator( + conf, set, importer, moc); + replicatorRef.set(replicator); + replicationSupervisor.addTask(createTask(5L)); + + ReplicateContainerCommand cmd1 = createCommand(6L); + cmd1.setDeadline(clock.millis() + 10000); + ReplicationTask task1 = new ReplicationTask(cmd1, replicatorRef.get()); + clock.fastForward(15000); + replicationSupervisor.addTask(task1); + + ReconstructECContainersCommand cmd2 = createReconstructionCmd(7L); + cmd2.setDeadline(clock.millis() + 10000); + ECReconstructionCoordinatorTask task2 = new ECReconstructionCoordinatorTask( + ecReplicatorRef.get(), new ECReconstructionCommandInfo(cmd2)); + clock.fastForward(15000); + ecReconstructionSupervisor.addTask(task2); + ecReconstructionSupervisor.addTask(createECTask(8L)); + ecReconstructionSupervisor.addTask(createECTask(9L)); + + //THEN + assertEquals(2, replicationSupervisor.getReplicationSuccessCount()); + assertEquals(2, replicationSupervisor.getReplicationSuccessCount( + task1.getMetricName())); + assertEquals(1, replicationSupervisor.getReplicationFailureCount()); + assertEquals(1, replicationSupervisor.getReplicationFailureCount( + task1.getMetricName())); + assertEquals(1, replicationSupervisor.getReplicationSkippedCount()); + assertEquals(1, replicationSupervisor.getReplicationSkippedCount( + task1.getMetricName())); + assertEquals(1, replicationSupervisor.getReplicationTimeoutCount()); + assertEquals(1, replicationSupervisor.getReplicationTimeoutCount( + task1.getMetricName())); + assertEquals(5, replicationSupervisor.getReplicationRequestCount()); + assertEquals(5, replicationSupervisor.getReplicationRequestCount( + task1.getMetricName())); + assertEquals(0, replicationSupervisor.getReplicationRequestCount( + task2.getMetricName())); + + assertEquals(2, ecReconstructionSupervisor.getReplicationSuccessCount()); + assertEquals(2, ecReconstructionSupervisor.getReplicationSuccessCount( + task2.getMetricName())); + assertEquals(1, ecReconstructionSupervisor.getReplicationTimeoutCount()); + assertEquals(1, ecReconstructionSupervisor.getReplicationTimeoutCount( + task2.getMetricName())); + assertEquals(2, ecReconstructionSupervisor.getReplicationFailureCount()); + assertEquals(2, ecReconstructionSupervisor.getReplicationFailureCount( + task2.getMetricName())); + assertEquals(5, ecReconstructionSupervisor.getReplicationRequestCount()); + assertEquals(5, ecReconstructionSupervisor.getReplicationRequestCount( + task2.getMetricName())); + assertEquals(0, ecReconstructionSupervisor.getReplicationRequestCount( + task1.getMetricName())); + + MetricsCollectorImpl replicationMetricsCollector = new MetricsCollectorImpl(); + replicationMetrics.getMetrics(replicationMetricsCollector, true); + assertEquals(1, replicationMetricsCollector.getRecords().size()); + + MetricsCollectorImpl ecReconstructionMetricsCollector = new MetricsCollectorImpl(); + ecReconstructionMetrics.getMetrics(ecReconstructionMetricsCollector, true); + assertEquals(1, ecReconstructionMetricsCollector.getRecords().size()); + } finally { + replicationMetrics.unRegister(); + ecReconstructionMetrics.unRegister(); + replicationSupervisor.stop(); + ecReconstructionSupervisor.stop(); + } + } + @ContainerLayoutTestInfo.ContainerTest public void testPriorityOrdering(ContainerLayoutVersion layout) throws InterruptedException { @@ -476,6 +585,16 @@ private static class BlockingTask extends AbstractReplicationTask { this.waitForCompleteLatch = waitForCompletion; } + @Override + protected String getMetricName() { + return "Blockings"; + } + + @Override + protected String getMetricDescriptionSegment() { + return "blockings"; + } + @Override public void runTask() { runningLatch.countDown(); @@ -502,6 +621,16 @@ private static class OrderedTask extends AbstractReplicationTask { setPriority(priority); } + @Override + protected String getMetricName() { + return "Ordereds"; + } + + @Override + protected String getMetricDescriptionSegment() { + return "ordereds"; + } + @Override public void runTask() { completeList.add(name); @@ -531,6 +660,22 @@ private ReplicationSupervisor supervisorWith( return supervisor; } + private ReplicationSupervisor supervisorWithECReconstruction() throws IOException { + ConfigurationSource conf = new OzoneConfiguration(); + ExecutorService executor = newDirectExecutorService(); + ReplicationServer.ReplicationConfig repConf = + conf.getObject(ReplicationServer.ReplicationConfig.class); + ReplicationSupervisor supervisor = ReplicationSupervisor.newBuilder() + .stateContext(context).replicationConfig(repConf).executor(executor) + .clock(clock).build(); + + FakeECReconstructionCoordinator coordinator = new FakeECReconstructionCoordinator( + new OzoneConfiguration(), null, null, context, + ECReconstructionMetrics.create(), "", supervisor); + ecReplicatorRef.set(coordinator); + return supervisor; + } + private ReplicationTask createTask(long containerId) { ReplicateContainerCommand cmd = createCommand(containerId); return new ReplicationTask(cmd, replicatorRef.get()); @@ -538,7 +683,13 @@ private ReplicationTask createTask(long containerId) { private ECReconstructionCoordinatorTask createECTask(long containerId) { return new ECReconstructionCoordinatorTask(null, - createReconstructionCmd(containerId)); + createReconstructionCmdInfo(containerId)); + } + + private ECReconstructionCoordinatorTask createECTaskWithCoordinator(long containerId) { + ECReconstructionCommandInfo ecReconstructionCommandInfo = createReconstructionCmdInfo(containerId); + return new ECReconstructionCoordinatorTask(ecReplicatorRef.get(), + ecReconstructionCommandInfo); } private static ReplicateContainerCommand createCommand(long containerId) { @@ -548,18 +699,20 @@ private static ReplicateContainerCommand createCommand(long containerId) { return cmd; } - private static ECReconstructionCommandInfo createReconstructionCmd( + private static ECReconstructionCommandInfo createReconstructionCmdInfo( long containerId) { - List sources - = new ArrayList<>(); - sources.add(new ReconstructECContainersCommand - .DatanodeDetailsAndReplicaIndex( - MockDatanodeDetails.randomDatanodeDetails(), 1)); - sources.add(new ReconstructECContainersCommand - .DatanodeDetailsAndReplicaIndex( + return new ECReconstructionCommandInfo(createReconstructionCmd(containerId)); + } + + private static ReconstructECContainersCommand createReconstructionCmd( + long containerId) { + List sources = + new ArrayList<>(); + sources.add(new ReconstructECContainersCommand.DatanodeDetailsAndReplicaIndex( + MockDatanodeDetails.randomDatanodeDetails(), 1)); + sources.add(new ReconstructECContainersCommand.DatanodeDetailsAndReplicaIndex( MockDatanodeDetails.randomDatanodeDetails(), 2)); - sources.add(new ReconstructECContainersCommand - .DatanodeDetailsAndReplicaIndex( + sources.add(new ReconstructECContainersCommand.DatanodeDetailsAndReplicaIndex( MockDatanodeDetails.randomDatanodeDetails(), 3)); byte[] missingIndexes = new byte[1]; @@ -567,14 +720,44 @@ private static ECReconstructionCommandInfo createReconstructionCmd( List target = singletonList( MockDatanodeDetails.randomDatanodeDetails()); - ReconstructECContainersCommand cmd = - new ReconstructECContainersCommand(containerId, - sources, - target, - Proto2Utils.unsafeByteString(missingIndexes), - new ECReplicationConfig(3, 2)); - - return new ECReconstructionCommandInfo(cmd); + ReconstructECContainersCommand cmd = new ReconstructECContainersCommand(containerId, sources, target, + Proto2Utils.unsafeByteString(missingIndexes), + new ECReplicationConfig(3, 2)); + cmd.setTerm(CURRENT_TERM); + return cmd; + } + + /** + * A fake coordinator that simulates successful reconstruction of ec containers. + */ + private class FakeECReconstructionCoordinator extends ECReconstructionCoordinator { + + private final OzoneConfiguration conf = new OzoneConfiguration(); + private final ReplicationSupervisor supervisor; + + FakeECReconstructionCoordinator(ConfigurationSource conf, + CertificateClient certificateClient, SecretKeySignerClient secretKeyClient, + StateContext context, ECReconstructionMetrics metrics, String threadNamePrefix, + ReplicationSupervisor supervisor) + throws IOException { + super(conf, certificateClient, secretKeyClient, context, metrics, threadNamePrefix); + this.supervisor = supervisor; + } + + @Override + public void reconstructECContainerGroup(long containerID, + ECReplicationConfig repConfig, SortedMap sourceNodeMap, + SortedMap targetNodeMap) { + assertEquals(1, supervisor.getTotalInFlightReplications()); + + KeyValueContainerData kvcd = new KeyValueContainerData( + containerID, layoutVersion, 100L, + UUID.randomUUID().toString(), UUID.randomUUID().toString()); + KeyValueContainer kvc = new KeyValueContainer(kvcd, conf); + assertDoesNotThrow(() -> { + set.addContainer(kvc); + }); + } } /** From 0f1619573036090b772efa3f6db8fcde7d1ccc5c Mon Sep 17 00:00:00 2001 From: Sumit Agrawal Date: Wed, 11 Sep 2024 11:25:00 +0530 Subject: [PATCH 06/43] HDDS-11416. refactor ratis submit request avoid code duplicate (#7166) --- .../om/service/TestRangerBGSyncService.java | 3 +- .../apache/hadoop/ozone/om/OzoneManager.java | 1 + .../hadoop/ozone/om/TrashOzoneFileSystem.java | 31 ++--------------- .../om/ratis/OzoneManagerRatisServer.java | 18 +++++++--- .../ratis/utils/OzoneManagerRatisUtils.java | 12 +++++++ .../service/AbstractKeyDeletingService.java | 33 ++----------------- .../ozone/om/service/KeyDeletingService.java | 24 ++------------ .../MultipartUploadCleanupService.java | 24 ++------------ .../om/service/OMRangerBGSyncService.java | 21 ++---------- .../om/service/OpenKeyCleanupService.java | 24 ++------------ .../ozone/om/service/QuotaRepairTask.java | 22 ++----------- .../om/service/SnapshotDeletingService.java | 23 ++----------- .../SnapshotDirectoryCleaningService.java | 25 ++------------ 13 files changed, 48 insertions(+), 213 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestRangerBGSyncService.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestRangerBGSyncService.java index abc21ed4351..a173bd9222e 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestRangerBGSyncService.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestRangerBGSyncService.java @@ -80,6 +80,7 @@ import static org.junit.jupiter.api.Assertions.assertInstanceOf; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.ArgumentMatchers.anyLong; import static org.mockito.Mockito.any; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doNothing; @@ -233,7 +234,7 @@ public void setUp() throws IOException { ozoneManager.getMetadataManager().getMetaTable().put( OzoneConsts.RANGER_OZONE_SERVICE_VERSION_KEY, String.valueOf(v)); return null; - }).when(omRatisServer).submitRequest(any(), any()); + }).when(omRatisServer).submitRequest(any(), any(), anyLong()); } catch (ServiceException e) { throw new RuntimeException(e); } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java index b6903ca9e91..0038bca2e32 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java @@ -2065,6 +2065,7 @@ private void addOMNodeToPeers(String newOMNodeId) throws IOException { } catch (IOException e) { LOG.error("{}: Couldn't add OM {} to peer list.", getOMNodeId(), newOMNodeId); + return; } if (omRatisSnapshotProvider == null) { diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/TrashOzoneFileSystem.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/TrashOzoneFileSystem.java index 6e1c9da34cb..bd462224e9d 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/TrashOzoneFileSystem.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/TrashOzoneFileSystem.java @@ -17,7 +17,6 @@ package org.apache.hadoop.ozone.om; import com.google.common.base.Preconditions; -import com.google.protobuf.RpcController; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.ozone.ClientVersion; import org.apache.hadoop.ozone.om.exceptions.OMException; @@ -35,15 +34,12 @@ import org.apache.hadoop.ozone.om.helpers.OmKeyArgs; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.OzoneFileStatus; -import org.apache.hadoop.ozone.om.helpers.OMRatisHelper; import org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerRatisUtils; import org.apache.hadoop.ozone.om.request.OMClientRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Progressable; import org.apache.ratis.protocol.ClientId; -import org.apache.ratis.protocol.Message; -import org.apache.ratis.protocol.RaftClientRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -69,8 +65,6 @@ */ public class TrashOzoneFileSystem extends FileSystem { - private static final RpcController NULL_RPC_CONTROLLER = null; - private static final int OZONE_FS_ITERATE_BATCH_SIZE = 100; private static final int OZONE_MAX_LIST_KEYS_SIZE = 10000; @@ -97,34 +91,15 @@ public TrashOzoneFileSystem(OzoneManager ozoneManager) throws IOException { ozoneConfiguration = OzoneConfiguration.of(getConf()); } - private RaftClientRequest getRatisRequest( - OzoneManagerProtocolProtos.OMRequest omRequest) { - return RaftClientRequest.newBuilder() - .setClientId(CLIENT_ID) - .setServerId(ozoneManager.getOmRatisServer().getRaftPeerId()) - .setGroupId(ozoneManager.getOmRatisServer().getRaftGroupId()) - .setCallId(runCount.getAndIncrement()) - .setMessage( - Message.valueOf( - OMRatisHelper.convertRequestToByteString(omRequest))) - .setType(RaftClientRequest.writeRequestType()) - .build(); - - } - private void submitRequest(OzoneManagerProtocolProtos.OMRequest omRequest) throws Exception { ozoneManager.getMetrics().incNumTrashWriteRequests(); if (ozoneManager.isRatisEnabled()) { - OMClientRequest omClientRequest = - OzoneManagerRatisUtils.createClientRequest(omRequest, ozoneManager); + // perform preExecute as ratis submit do no perform preExecute + OMClientRequest omClientRequest = OzoneManagerRatisUtils.createClientRequest(omRequest, ozoneManager); omRequest = omClientRequest.preExecute(ozoneManager); - RaftClientRequest req = getRatisRequest(omRequest); - ozoneManager.getOmRatisServer().submitRequest(omRequest, req); - } else { - ozoneManager.getOmServerProtocol(). - submitRequest(NULL_RPC_CONTROLLER, omRequest); } + OzoneManagerRatisUtils.submitRequest(ozoneManager, omRequest, CLIENT_ID, runCount.getAndIncrement()); } @Override diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java index 78d6ed89d2d..af4d42ad68a 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java @@ -301,15 +301,23 @@ private RaftClientRequest createRaftRequest(OMRequest omRequest) { } /** - * API used internally from OzoneManager Server when requests needs to be - * submitted to ratis, where the crafted RaftClientRequest is passed along. + * API used internally from OzoneManager Server when requests need to be submitted. * @param omRequest - * @param raftClientRequest + * @param cliId + * @param callId * @return OMResponse * @throws ServiceException */ - public OMResponse submitRequest(OMRequest omRequest, - RaftClientRequest raftClientRequest) throws ServiceException { + public OMResponse submitRequest(OMRequest omRequest, ClientId cliId, long callId) throws ServiceException { + RaftClientRequest raftClientRequest = RaftClientRequest.newBuilder() + .setClientId(cliId) + .setServerId(getRaftPeerId()) + .setGroupId(getRaftGroupId()) + .setCallId(callId) + .setMessage(Message.valueOf( + OMRatisHelper.convertRequestToByteString(omRequest))) + .setType(RaftClientRequest.writeRequestType()) + .build(); RaftClientReply raftClientReply = submitRequestToRatis(raftClientRequest); return createOmResponse(omRequest, raftClientReply); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/utils/OzoneManagerRatisUtils.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/utils/OzoneManagerRatisUtils.java index 5dc640c742c..ffaedaa06a9 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/utils/OzoneManagerRatisUtils.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/utils/OzoneManagerRatisUtils.java @@ -19,6 +19,7 @@ import com.google.common.base.Preconditions; import com.google.common.base.Strings; +import com.google.protobuf.RpcController; import com.google.protobuf.ServiceException; import java.io.File; import java.nio.file.InvalidPathException; @@ -98,6 +99,7 @@ import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Status; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type; import org.apache.ratis.grpc.GrpcTlsConfig; +import org.apache.ratis.protocol.ClientId; import org.rocksdb.RocksDBException; import java.io.IOException; @@ -117,6 +119,7 @@ public final class OzoneManagerRatisUtils { private static final Logger LOG = LoggerFactory .getLogger(OzoneManagerRatisUtils.class); + private static final RpcController NULL_RPC_CONTROLLER = null; private OzoneManagerRatisUtils() { } @@ -502,4 +505,13 @@ public static GrpcTlsConfig createServerTlsConfig(SecurityConfig conf, return null; } + + public static OzoneManagerProtocolProtos.OMResponse submitRequest( + OzoneManager om, OMRequest omRequest, ClientId clientId, long callId) throws ServiceException { + if (om.isRatisEnabled()) { + return om.getOmRatisServer().submitRequest(omRequest, clientId, callId); + } else { + return om.getOmServerProtocol().submitRequest(NULL_RPC_CONTROLLER, omRequest); + } + } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java index 154bd474b6d..2c2d16bf14c 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java @@ -33,11 +33,11 @@ import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.om.SnapshotChainManager; -import org.apache.hadoop.ozone.om.helpers.OMRatisHelper; import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; +import org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerRatisUtils; import org.apache.hadoop.ozone.om.snapshot.SnapshotUtils; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.DeletedKeys; @@ -48,8 +48,6 @@ import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type; import org.apache.hadoop.util.Time; import org.apache.ratis.protocol.ClientId; -import org.apache.ratis.protocol.Message; -import org.apache.ratis.protocol.RaftClientRequest; import org.apache.ratis.util.Preconditions; import java.io.IOException; @@ -247,10 +245,7 @@ private int submitPurgeKeysRequest(List results, // Submit PurgeKeys request to OM try { - RaftClientRequest raftClientRequest = - createRaftClientRequestForPurge(omRequest); - ozoneManager.getOmRatisServer().submitRequest(omRequest, - raftClientRequest); + OzoneManagerRatisUtils.submitRequest(ozoneManager, omRequest, clientId, runCount.get()); } catch (ServiceException e) { LOG.error("PurgeKey request failed. Will retry at next run."); return 0; @@ -259,20 +254,6 @@ private int submitPurgeKeysRequest(List results, return deletedCount; } - protected RaftClientRequest createRaftClientRequestForPurge( - OMRequest omRequest) { - return RaftClientRequest.newBuilder() - .setClientId(clientId) - .setServerId(ozoneManager.getOmRatisServer().getRaftPeerId()) - .setGroupId(ozoneManager.getOmRatisServer().getRaftGroupId()) - .setCallId(runCount.get()) - .setMessage( - Message.valueOf( - OMRatisHelper.convertRequestToByteString(omRequest))) - .setType(RaftClientRequest.writeRequestType()) - .build(); - } - /** * Parse Volume and Bucket Name from ObjectKey and add it to given map of * keys to be purged per bucket. @@ -311,15 +292,7 @@ protected void submitPurgePaths(List requests, // Submit Purge paths request to OM try { - if (isRatisEnabled()) { - RaftClientRequest raftClientRequest = - createRaftClientRequestForPurge(omRequest); - ozoneManager.getOmRatisServer().submitRequest(omRequest, - raftClientRequest); - } else { - getOzoneManager().getOmServerProtocol() - .submitRequest(null, omRequest); - } + OzoneManagerRatisUtils.submitRequest(ozoneManager, omRequest, clientId, runCount.get()); } catch (ServiceException e) { LOG.error("PurgePaths request failed. Will retry at next run."); } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java index aa2eb6720a3..5e622cb1701 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java @@ -42,8 +42,7 @@ import org.apache.hadoop.ozone.om.OmSnapshot; import org.apache.hadoop.ozone.om.OmSnapshotManager; import org.apache.hadoop.ozone.om.OzoneManager; -import org.apache.hadoop.ozone.om.helpers.OMRatisHelper; -import org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer; +import org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerRatisUtils; import org.apache.hadoop.ozone.om.snapshot.ReferenceCounted; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.SnapshotSize; @@ -67,8 +66,6 @@ import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; import org.apache.ratis.protocol.ClientId; -import org.apache.ratis.protocol.Message; -import org.apache.ratis.protocol.RaftClientRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -481,24 +478,7 @@ private void updateDeepCleanedSnapshots(List deepCleanedSnapshots) { public void submitRequest(OMRequest omRequest, ClientId clientId) { try { - if (isRatisEnabled()) { - OzoneManagerRatisServer server = getOzoneManager().getOmRatisServer(); - - RaftClientRequest raftClientRequest = RaftClientRequest.newBuilder() - .setClientId(clientId) - .setServerId(server.getRaftPeerId()) - .setGroupId(server.getRaftGroupId()) - .setCallId(getRunCount().get()) - .setMessage(Message.valueOf( - OMRatisHelper.convertRequestToByteString(omRequest))) - .setType(RaftClientRequest.writeRequestType()) - .build(); - - server.submitRequest(omRequest, raftClientRequest); - } else { - getOzoneManager().getOmServerProtocol() - .submitRequest(null, omRequest); - } + OzoneManagerRatisUtils.submitRequest(getOzoneManager(), omRequest, clientId, getRunCount().get()); } catch (ServiceException e) { LOG.error("Snapshot deep cleaning request failed. " + "Will retry at next run.", e); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/MultipartUploadCleanupService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/MultipartUploadCleanupService.java index 1199a0c6506..f1084155e98 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/MultipartUploadCleanupService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/MultipartUploadCleanupService.java @@ -29,16 +29,13 @@ import org.apache.hadoop.ozone.om.KeyManager; import org.apache.hadoop.ozone.om.OMConfigKeys; import org.apache.hadoop.ozone.om.OzoneManager; -import org.apache.hadoop.ozone.om.helpers.OMRatisHelper; -import org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer; +import org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerRatisUtils; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.ExpiredMultipartUploadsBucket; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.MultipartUploadsExpiredAbortRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type; import org.apache.hadoop.util.Time; import org.apache.ratis.protocol.ClientId; -import org.apache.ratis.protocol.Message; -import org.apache.ratis.protocol.RaftClientRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -208,24 +205,7 @@ private OMRequest createRequest(List private void submitRequest(OMRequest omRequest) { try { - if (isRatisEnabled()) { - OzoneManagerRatisServer server = ozoneManager.getOmRatisServer(); - - RaftClientRequest raftClientRequest = RaftClientRequest.newBuilder() - .setClientId(clientId) - .setServerId(server.getRaftPeerId()) - .setGroupId(server.getRaftGroupId()) - .setCallId(runCount.get()) - .setMessage(Message.valueOf( - OMRatisHelper.convertRequestToByteString(omRequest))) - .setType(RaftClientRequest.writeRequestType()) - .build(); - - server.submitRequest(omRequest, raftClientRequest); - } else { - ozoneManager.getOmServerProtocol().submitRequest(null, - omRequest); - } + OzoneManagerRatisUtils.submitRequest(ozoneManager, omRequest, clientId, runCount.get()); } catch (ServiceException e) { LOG.error("Expired multipart info delete request failed. " + "Will retry at next run.", e); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/OMRangerBGSyncService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/OMRangerBGSyncService.java index 45112037c1b..768c77ad16e 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/OMRangerBGSyncService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/OMRangerBGSyncService.java @@ -47,7 +47,6 @@ import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes; import org.apache.hadoop.ozone.om.exceptions.OMNotLeaderException; -import org.apache.hadoop.ozone.om.helpers.OMRatisHelper; import org.apache.hadoop.ozone.om.helpers.OmDBAccessIdInfo; import org.apache.hadoop.ozone.om.helpers.OmDBTenantState; import org.apache.hadoop.ozone.om.multitenant.AuthorizerLock; @@ -55,12 +54,11 @@ import org.apache.hadoop.ozone.om.multitenant.MultiTenantAccessController; import org.apache.hadoop.ozone.om.multitenant.MultiTenantAccessController.Policy; import org.apache.hadoop.ozone.om.multitenant.MultiTenantAccessController.Role; +import org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerRatisUtils; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.SetRangerServiceVersionRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type; import org.apache.ratis.protocol.ClientId; -import org.apache.ratis.protocol.Message; -import org.apache.ratis.protocol.RaftClientRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -375,19 +373,6 @@ long getRangerOzoneServicePolicyVersion() throws IOException { return policyVersion; } - private RaftClientRequest newRaftClientRequest(OMRequest omRequest) { - return RaftClientRequest.newBuilder() - .setClientId(CLIENT_ID) - .setServerId(ozoneManager.getOmRatisServer().getRaftPeerId()) - .setGroupId(ozoneManager.getOmRatisServer().getRaftGroupId()) - .setCallId(runCount.get()) - .setMessage( - Message.valueOf( - OMRatisHelper.convertRequestToByteString(omRequest))) - .setType(RaftClientRequest.writeRequestType()) - .build(); - } - public void setOMDBRangerServiceVersion(long version) throws ServiceException { // OM DB update goes through Ratis @@ -402,9 +387,7 @@ public void setOMDBRangerServiceVersion(long version) .build(); try { - RaftClientRequest raftClientRequest = newRaftClientRequest(omRequest); - ozoneManager.getOmRatisServer().submitRequest(omRequest, - raftClientRequest); + OzoneManagerRatisUtils.submitRequest(ozoneManager, omRequest, CLIENT_ID, runCount.get()); } catch (ServiceException e) { LOG.error("SetRangerServiceVersion request failed. " + "Will retry at next run."); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/OpenKeyCleanupService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/OpenKeyCleanupService.java index ab556230194..c0d958f6121 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/OpenKeyCleanupService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/OpenKeyCleanupService.java @@ -31,8 +31,7 @@ import org.apache.hadoop.ozone.om.OMConfigKeys; import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.om.helpers.BucketLayout; -import org.apache.hadoop.ozone.om.helpers.OMRatisHelper; -import org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer; +import org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerRatisUtils; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.CommitKeyRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.DeleteOpenKeysRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; @@ -41,8 +40,6 @@ import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type; import org.apache.hadoop.util.Time; import org.apache.ratis.protocol.ClientId; -import org.apache.ratis.protocol.Message; -import org.apache.ratis.protocol.RaftClientRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -268,24 +265,7 @@ private OMRequest createDeleteOpenKeysRequest( private OMResponse submitRequest(OMRequest omRequest) { try { - if (isRatisEnabled()) { - OzoneManagerRatisServer server = ozoneManager.getOmRatisServer(); - - RaftClientRequest raftClientRequest = RaftClientRequest.newBuilder() - .setClientId(clientId) - .setServerId(server.getRaftPeerId()) - .setGroupId(server.getRaftGroupId()) - .setCallId(runCount.get()) - .setMessage(Message.valueOf( - OMRatisHelper.convertRequestToByteString(omRequest))) - .setType(RaftClientRequest.writeRequestType()) - .build(); - - return server.submitRequest(omRequest, raftClientRequest); - } else { - return ozoneManager.getOmServerProtocol().submitRequest( - null, omRequest); - } + return OzoneManagerRatisUtils.submitRequest(ozoneManager, omRequest, clientId, runCount.get()); } catch (ServiceException e) { LOG.error("Open key " + omRequest.getCmdType() + " request failed. Will retry at next run.", e); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/QuotaRepairTask.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/QuotaRepairTask.java index c043a6a72f2..1a29ee8d96b 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/QuotaRepairTask.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/QuotaRepairTask.java @@ -51,14 +51,11 @@ import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.om.helpers.BucketLayout; -import org.apache.hadoop.ozone.om.helpers.OMRatisHelper; import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; -import org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer; +import org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerRatisUtils; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; import org.apache.ratis.protocol.ClientId; -import org.apache.ratis.protocol.Message; -import org.apache.ratis.protocol.RaftClientRequest; import org.codehaus.jackson.map.ObjectMapper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -194,22 +191,7 @@ private void repairActiveDb( private OzoneManagerProtocolProtos.OMResponse submitRequest( OzoneManagerProtocolProtos.OMRequest omRequest, ClientId clientId) throws Exception { try { - if (om.isRatisEnabled()) { - OzoneManagerRatisServer server = om.getOmRatisServer(); - RaftClientRequest raftClientRequest = RaftClientRequest.newBuilder() - .setClientId(clientId) - .setServerId(om.getOmRatisServer().getRaftPeerId()) - .setGroupId(om.getOmRatisServer().getRaftGroupId()) - .setCallId(RUN_CNT.getAndIncrement()) - .setMessage(Message.valueOf(OMRatisHelper.convertRequestToByteString(omRequest))) - .setType(RaftClientRequest.writeRequestType()) - .build(); - return server.submitRequest(omRequest, raftClientRequest); - } else { - RUN_CNT.getAndIncrement(); - return om.getOmServerProtocol().submitRequest( - null, omRequest); - } + return OzoneManagerRatisUtils.submitRequest(om, omRequest, clientId, RUN_CNT.getAndIncrement()); } catch (ServiceException e) { LOG.error("repair quota count " + omRequest.getCmdType() + " request failed.", e); throw e; diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java index a98081c63a1..f85bd781b05 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java @@ -40,7 +40,6 @@ import org.apache.hadoop.ozone.om.OmSnapshotManager; import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.om.SnapshotChainManager; -import org.apache.hadoop.ozone.om.helpers.OMRatisHelper; import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; @@ -48,7 +47,7 @@ import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup; import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; -import org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer; +import org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerRatisUtils; import org.apache.hadoop.ozone.om.snapshot.ReferenceCounted; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.PurgePathRequest; @@ -58,8 +57,6 @@ import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type; import org.apache.hadoop.util.Time; import org.apache.ratis.protocol.ClientId; -import org.apache.ratis.protocol.Message; -import org.apache.ratis.protocol.RaftClientRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -562,23 +559,7 @@ public void submitSnapshotMoveDeletedKeys(SnapshotInfo snapInfo, public void submitRequest(OMRequest omRequest) { try { - if (isRatisEnabled()) { - OzoneManagerRatisServer server = ozoneManager.getOmRatisServer(); - - RaftClientRequest raftClientRequest = RaftClientRequest.newBuilder() - .setClientId(clientId) - .setServerId(server.getRaftPeerId()) - .setGroupId(server.getRaftGroupId()) - .setCallId(getRunCount().get()) - .setMessage(Message.valueOf( - OMRatisHelper.convertRequestToByteString(omRequest))) - .setType(RaftClientRequest.writeRequestType()) - .build(); - - server.submitRequest(omRequest, raftClientRequest); - } else { - ozoneManager.getOmServerProtocol().submitRequest(null, omRequest); - } + OzoneManagerRatisUtils.submitRequest(ozoneManager, omRequest, clientId, getRunCount().get()); } catch (ServiceException e) { LOG.error("Snapshot Deleting request failed. " + "Will retry at next run.", e); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDirectoryCleaningService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDirectoryCleaningService.java index 9746b4421b7..26d5d24a8a0 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDirectoryCleaningService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDirectoryCleaningService.java @@ -34,13 +34,12 @@ import org.apache.hadoop.ozone.om.OmSnapshotManager; import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.om.SnapshotChainManager; -import org.apache.hadoop.ozone.om.helpers.OMRatisHelper; import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; -import org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer; +import org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerRatisUtils; import org.apache.hadoop.ozone.om.request.file.OMFileRequest; import org.apache.hadoop.ozone.om.snapshot.ReferenceCounted; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; @@ -48,8 +47,6 @@ import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.SnapshotSize; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type; import org.apache.ratis.protocol.ClientId; -import org.apache.ratis.protocol.Message; -import org.apache.ratis.protocol.RaftClientRequest; import java.io.IOException; import java.util.ArrayList; @@ -436,25 +433,7 @@ private void updateDeepCleanSnapshotDir(String snapshotKeyTable) { public void submitRequest(OMRequest omRequest, ClientId clientId) { try { - if (isRatisEnabled()) { - OzoneManagerRatisServer server = - getOzoneManager().getOmRatisServer(); - - RaftClientRequest raftClientRequest = RaftClientRequest.newBuilder() - .setClientId(clientId) - .setServerId(server.getRaftPeerId()) - .setGroupId(server.getRaftGroupId()) - .setCallId(getRunCount().get()) - .setMessage(Message.valueOf( - OMRatisHelper.convertRequestToByteString(omRequest))) - .setType(RaftClientRequest.writeRequestType()) - .build(); - - server.submitRequest(omRequest, raftClientRequest); - } else { - getOzoneManager().getOmServerProtocol() - .submitRequest(null, omRequest); - } + OzoneManagerRatisUtils.submitRequest(getOzoneManager(), omRequest, clientId, getRunCount().get()); } catch (ServiceException e) { LOG.error("Snapshot deep cleaning request failed. " + "Will retry at next run.", e); From 0915f0b1b83c0d354d1844d92861711c62489df5 Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Wed, 11 Sep 2024 17:49:10 +0800 Subject: [PATCH 07/43] HDDS-10985. EC Reconstruction failed because the size of currentChunks was not equal to checksumBlockDataChunks. (#7009) --- .../hdds/scm/storage/ECBlockOutputStream.java | 34 +++++- .../container/common/helpers/BlockData.java | 11 ++ .../scm/storage/TestContainerCommandsEC.java | 104 ++++++++++++++---- 3 files changed, 126 insertions(+), 23 deletions(-) diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ECBlockOutputStream.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ECBlockOutputStream.java index 12ca9978c68..7776e245be0 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ECBlockOutputStream.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ECBlockOutputStream.java @@ -38,9 +38,13 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.Arrays; import java.util.ArrayList; +import java.util.Comparator; import java.util.List; import java.util.Objects; +import java.util.Optional; +import java.util.Map; import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletionException; import java.util.concurrent.ExecutionException; @@ -142,8 +146,34 @@ ContainerCommandResponseProto> executePutBlock(boolean close, } if (checksumBlockData != null) { - List currentChunks = getContainerBlockData().getChunksList(); + + // For the same BlockGroupLength, we need to find the larger value of Block DataSize. + // This is because we do not send empty chunks to the DataNode, so the larger value is more accurate. + Map> maxDataSizeByGroup = Arrays.stream(blockData) + .filter(Objects::nonNull) + .collect(Collectors.groupingBy(BlockData::getBlockGroupLength, + Collectors.maxBy(Comparator.comparingLong(BlockData::getSize)))); + BlockData maxBlockData = maxDataSizeByGroup.get(blockGroupLength).get(); + + // When calculating the checksum size, + // We need to consider both blockGroupLength and the actual size of blockData. + // + // We use the smaller value to determine the size of the ChunkList. + // + // 1. In most cases, blockGroupLength is equal to the size of blockData. + // 2. Occasionally, blockData is not fully filled; if a chunk is empty, + // it is not sent to the DN, resulting in blockData size being smaller than blockGroupLength. + // 3. In cases with 'dirty data', + // if an error occurs when writing to the EC-Stripe (e.g., DN reports Container Closed), + // and the length confirmed with OM is smaller, blockGroupLength may be smaller than blockData size. + long blockDataSize = Math.min(maxBlockData.getSize(), blockGroupLength); + int chunkSize = (int) Math.ceil(((double) blockDataSize / repConfig.getEcChunkSize())); List checksumBlockDataChunks = checksumBlockData.getChunks(); + if (chunkSize > 0) { + checksumBlockDataChunks = checksumBlockData.getChunks().subList(0, chunkSize); + } + + List currentChunks = getContainerBlockData().getChunksList(); Preconditions.checkArgument( currentChunks.size() == checksumBlockDataChunks.size(), @@ -269,7 +299,7 @@ public CompletableFuture executePutBlock(boolean close, throw ce; }); } catch (IOException | ExecutionException e) { - throw new IOException(EXCEPTION_MSG + e.toString(), e); + throw new IOException(EXCEPTION_MSG + e, e); } catch (InterruptedException ex) { Thread.currentThread().interrupt(); handleInterruptedException(ex, false); diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/container/common/helpers/BlockData.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/container/common/helpers/BlockData.java index 4bd170df8e8..ea5c5453f3f 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/container/common/helpers/BlockData.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/container/common/helpers/BlockData.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hdds.utils.db.Codec; import org.apache.hadoop.hdds.utils.db.DelegatedCodec; import org.apache.hadoop.hdds.utils.db.Proto3Codec; +import org.apache.hadoop.ozone.OzoneConsts; import java.io.IOException; import java.util.Collections; @@ -280,4 +281,14 @@ public void appendTo(StringBuilder sb) { sb.append(", size=").append(size); sb.append("]"); } + + public long getBlockGroupLength() { + String lenStr = getMetadata() + .get(OzoneConsts.BLOCK_GROUP_LEN_KEY_IN_PUT_BLOCK); + // If we don't have the length, then it indicates a problem with the stripe. + // All replica should carry the length, so if it is not there, we return 0, + // which will cause us to set the length of the block to zero and not + // attempt to reconstruct it. + return (lenStr == null) ? 0 : Long.parseLong(lenStr); + } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java index c274d8fea30..6f79839cd02 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java @@ -61,6 +61,7 @@ import org.apache.hadoop.ozone.client.OzoneKeyDetails; import org.apache.hadoop.ozone.client.OzoneVolume; import org.apache.hadoop.ozone.client.SecretKeyTestClient; +import org.apache.hadoop.ozone.client.io.BlockOutputStreamEntry; import org.apache.hadoop.ozone.client.io.InsufficientLocationsException; import org.apache.hadoop.ozone.client.io.KeyOutputStream; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; @@ -83,6 +84,7 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; import java.io.IOException; @@ -99,6 +101,7 @@ import java.util.SortedMap; import java.util.TreeMap; import java.util.UUID; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeUnit; import java.util.function.Function; import java.util.stream.Collectors; @@ -117,6 +120,7 @@ import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertInstanceOf; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.params.provider.Arguments.arguments; /** * This class tests container commands on EC containers. @@ -613,30 +617,33 @@ private static byte[] getBytesWith(int singleDigitNumber, int total) { @ParameterizedTest @MethodSource("recoverableMissingIndexes") - void testECReconstructionCoordinatorWith(List missingIndexes) + void testECReconstructionCoordinatorWith(List missingIndexes, boolean triggerRetry) throws Exception { - testECReconstructionCoordinator(missingIndexes, 3); + testECReconstructionCoordinator(missingIndexes, 3, triggerRetry); } @ParameterizedTest @MethodSource("recoverableMissingIndexes") - void testECReconstructionCoordinatorWithPartialStripe(List missingIndexes) - throws Exception { - testECReconstructionCoordinator(missingIndexes, 1); + void testECReconstructionCoordinatorWithPartialStripe(List missingIndexes, + boolean triggerRetry) throws Exception { + testECReconstructionCoordinator(missingIndexes, 1, triggerRetry); } @ParameterizedTest @MethodSource("recoverableMissingIndexes") - void testECReconstructionCoordinatorWithFullAndPartialStripe(List missingIndexes) - throws Exception { - testECReconstructionCoordinator(missingIndexes, 4); + void testECReconstructionCoordinatorWithFullAndPartialStripe(List missingIndexes, + boolean triggerRetry) throws Exception { + testECReconstructionCoordinator(missingIndexes, 4, triggerRetry); } - static Stream> recoverableMissingIndexes() { - return Stream - .concat(IntStream.rangeClosed(1, 5).mapToObj(ImmutableList::of), Stream - .of(ImmutableList.of(2, 3), ImmutableList.of(2, 4), - ImmutableList.of(3, 5), ImmutableList.of(4, 5))); + static Stream recoverableMissingIndexes() { + Stream args = IntStream.rangeClosed(1, 5).mapToObj(i -> arguments(ImmutableList.of(i), true)); + Stream args1 = IntStream.rangeClosed(1, 5).mapToObj(i -> arguments(ImmutableList.of(i), false)); + Stream args2 = Stream.of(arguments(ImmutableList.of(2, 3), true), + arguments(ImmutableList.of(2, 4), true), arguments(ImmutableList.of(3, 5), true)); + Stream args3 = Stream.of(arguments(ImmutableList.of(2, 3), false), + arguments(ImmutableList.of(2, 4), false), arguments(ImmutableList.of(3, 5), false)); + return Stream.concat(Stream.concat(args, args1), Stream.concat(args2, args3)); } /** @@ -647,7 +654,7 @@ static Stream> recoverableMissingIndexes() { public void testECReconstructionCoordinatorWithMissingIndexes135() { InsufficientLocationsException exception = assertThrows(InsufficientLocationsException.class, () -> { - testECReconstructionCoordinator(ImmutableList.of(1, 3, 5), 3); + testECReconstructionCoordinator(ImmutableList.of(1, 3, 5), 3, false); }); String expectedMessage = @@ -658,7 +665,7 @@ public void testECReconstructionCoordinatorWithMissingIndexes135() { } private void testECReconstructionCoordinator(List missingIndexes, - int numInputChunks) throws Exception { + int numInputChunks, boolean triggerRetry) throws Exception { ObjectStore objectStore = rpcClient.getObjectStore(); String keyString = UUID.randomUUID().toString(); String volumeName = UUID.randomUUID().toString(); @@ -667,7 +674,7 @@ private void testECReconstructionCoordinator(List missingIndexes, objectStore.getVolume(volumeName).createBucket(bucketName); OzoneVolume volume = objectStore.getVolume(volumeName); OzoneBucket bucket = volume.getBucket(bucketName); - createKeyAndWriteData(keyString, bucket, numInputChunks); + createKeyAndWriteData(keyString, bucket, numInputChunks, triggerRetry); try ( XceiverClientManager xceiverClientManager = @@ -779,7 +786,7 @@ private void testECReconstructionCoordinator(List missingIndexes, .getReplicationConfig(), cToken); assertEquals(blockDataArrList.get(i).length, reconstructedBlockData.length); - checkBlockData(blockDataArrList.get(i), reconstructedBlockData); + checkBlockDataWithRetry(blockDataArrList.get(i), reconstructedBlockData, triggerRetry); XceiverClientSpi client = xceiverClientManager.acquireClient( newTargetPipeline); try { @@ -800,7 +807,7 @@ private void testECReconstructionCoordinator(List missingIndexes, } private void createKeyAndWriteData(String keyString, OzoneBucket bucket, - int numChunks) throws IOException { + int numChunks, boolean triggerRetry) throws IOException { for (int i = 0; i < numChunks; i++) { inputChunks[i] = getBytesWith(i + 1, EC_CHUNK_SIZE); } @@ -809,11 +816,48 @@ private void createKeyAndWriteData(String keyString, OzoneBucket bucket, new HashMap<>())) { assertInstanceOf(KeyOutputStream.class, out.getOutputStream()); for (int i = 0; i < numChunks; i++) { + // We generally wait until the data is written to the last chunk + // before attempting to trigger CloseContainer. + // We use an asynchronous approach for this trigger, + // aiming to ensure that closing the container does not interfere with the write operation. + // However, this process often needs to be executed multiple times before it takes effect. + if (i == numChunks - 1 && triggerRetry) { + triggerRetryByCloseContainer(out); + } out.write(inputChunks[i]); } } } + private void triggerRetryByCloseContainer(OzoneOutputStream out) { + CompletableFuture.runAsync(() -> { + BlockOutputStreamEntry blockOutputStreamEntry = out.getKeyOutputStream().getStreamEntries().get(0); + BlockID entryBlockID = blockOutputStreamEntry.getBlockID(); + long entryContainerID = entryBlockID.getContainerID(); + Pipeline entryPipeline = blockOutputStreamEntry.getPipeline(); + Map replicaIndexes = entryPipeline.getReplicaIndexes(); + try { + for (Map.Entry entry : replicaIndexes.entrySet()) { + DatanodeDetails key = entry.getKey(); + Integer value = entry.getValue(); + XceiverClientManager xceiverClientManager = new XceiverClientManager(config); + Token cToken = containerTokenGenerator + .generateToken(ANY_USER, ContainerID.valueOf(entryContainerID)); + XceiverClientSpi client = xceiverClientManager.acquireClient( + createSingleNodePipeline(entryPipeline, key, value)); + try { + ContainerProtocolCalls.closeContainer(client, entryContainerID, cToken.encodeToUrlString()); + } finally { + xceiverClientManager.releaseClient(client, false); + } + break; + } + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + } + @Test public void testECReconstructionCoordinatorShouldCleanupContainersOnFailure() throws Exception { @@ -826,7 +870,7 @@ public void testECReconstructionCoordinatorShouldCleanupContainersOnFailure() objectStore.getVolume(volumeName).createBucket(bucketName); OzoneVolume volume = objectStore.getVolume(volumeName); OzoneBucket bucket = volume.getBucket(bucketName); - createKeyAndWriteData(keyString, bucket, 3); + createKeyAndWriteData(keyString, bucket, 3, false); OzoneKeyDetails key = bucket.getKey(keyString); long conID = key.getOzoneKeyLocations().get(0).getContainerID(); @@ -900,6 +944,25 @@ private void closeContainer(long conID) HddsProtos.LifeCycleEvent.CLOSE); } + private void checkBlockDataWithRetry( + org.apache.hadoop.ozone.container.common.helpers.BlockData[] blockData, + org.apache.hadoop.ozone.container.common.helpers.BlockData[] + reconstructedBlockData, boolean triggerRetry) { + if (triggerRetry) { + for (int i = 0; i < reconstructedBlockData.length; i++) { + assertEquals(blockData[i].getBlockID(), reconstructedBlockData[i].getBlockID()); + List oldBlockDataChunks = blockData[i].getChunks(); + List newBlockDataChunks = reconstructedBlockData[i].getChunks(); + for (int j = 0; j < newBlockDataChunks.size(); j++) { + ContainerProtos.ChunkInfo chunkInfo = oldBlockDataChunks.get(j); + assertEquals(chunkInfo, newBlockDataChunks.get(j)); + } + } + return; + } + checkBlockData(blockData, reconstructedBlockData); + } + private void checkBlockData( org.apache.hadoop.ozone.container.common.helpers.BlockData[] blockData, org.apache.hadoop.ozone.container.common.helpers.BlockData[] @@ -967,8 +1030,7 @@ public static void prepareData(int[][] ranges) throws Exception { out.write(values[i]); } } -// List containerIDs = -// new ArrayList<>(scm.getContainerManager().getContainerIDs()); + List containerIDs = scm.getContainerManager().getContainers() .stream() From 4b47812a2d652f64f1a8e87937555208f526439a Mon Sep 17 00:00:00 2001 From: Arafat2198 Date: Wed, 11 Sep 2024 19:02:37 +0530 Subject: [PATCH 08/43] HDDS-11389. Incorrect number of deleted containers shown in Recon UI. (#7149) --- .../hadoop/ozone/recon/TestReconTasks.java | 44 +++- .../schema/ContainerSchemaDefinition.java | 31 ++- .../ozone/recon/api/ContainerEndpoint.java | 19 +- .../types/UnhealthyContainersResponse.java | 13 -- .../ozone/recon/fsck/ContainerHealthTask.java | 153 ++++++++++---- .../ReconStorageContainerManagerFacade.java | 9 +- .../recon/api/TestContainerEndpoint.java | 34 ++- .../recon/fsck/TestContainerHealthTask.java | 193 +++++++++++------- ...estContainerHealthTaskRecordGenerator.java | 62 +++++- 9 files changed, 395 insertions(+), 163 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconTasks.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconTasks.java index cba7311b3b4..4476cbc3e38 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconTasks.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconTasks.java @@ -19,6 +19,7 @@ import java.time.Duration; import java.util.List; +import java.util.Map; import org.apache.hadoop.hdds.client.RatisReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; @@ -51,6 +52,7 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_PIPELINE_REPORT_INTERVAL; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.ONE; import static org.apache.hadoop.ozone.container.ozoneimpl.TestOzoneContainer.runTestOzoneContainerViaDataNode; +import static org.apache.hadoop.ozone.recon.ReconConstants.CONTAINER_COUNT; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotEquals; @@ -236,6 +238,8 @@ public void testEmptyMissingContainerDownNode() throws Exception { // Bring down the Datanode that had the container replica. cluster.shutdownHddsDatanode(pipeline.getFirstNode()); + // Since we no longer add EMPTY_MISSING containers to the table, we should + // have zero EMPTY_MISSING containers in the DB but their information will be logged. LambdaTestUtils.await(25000, 1000, () -> { List allEmptyMissingContainers = reconContainerManager.getContainerSchemaManager() @@ -243,9 +247,18 @@ public void testEmptyMissingContainerDownNode() throws Exception { ContainerSchemaDefinition.UnHealthyContainerStates. EMPTY_MISSING, 0, 1000); - return (allEmptyMissingContainers.size() == 1); - }); + // Check if EMPTY_MISSING containers are not added to the DB and their count is logged + Map> + unhealthyContainerStateStatsMap = reconScm.getContainerHealthTask() + .getUnhealthyContainerStateStatsMap(); + + // Return true if the size of the fetched containers is 0 and the log shows 1 for EMPTY_MISSING state + return allEmptyMissingContainers.size() == 0 && + unhealthyContainerStateStatsMap.get( + ContainerSchemaDefinition.UnHealthyContainerStates.EMPTY_MISSING) + .getOrDefault(CONTAINER_COUNT, 0L) == 1; + }); // Now add a container to key mapping count as 3. This data is used to // identify if container is empty in terms of keys mapped to container. @@ -272,7 +285,17 @@ public void testEmptyMissingContainerDownNode() throws Exception { ContainerSchemaDefinition.UnHealthyContainerStates. EMPTY_MISSING, 0, 1000); - return (allEmptyMissingContainers.isEmpty()); + + + Map> + unhealthyContainerStateStatsMap = reconScm.getContainerHealthTask() + .getUnhealthyContainerStateStatsMap(); + + // Return true if the size of the fetched containers is 0 and the log shows 0 for EMPTY_MISSING state + return allEmptyMissingContainers.size() == 0 && + unhealthyContainerStateStatsMap.get( + ContainerSchemaDefinition.UnHealthyContainerStates.EMPTY_MISSING) + .getOrDefault(CONTAINER_COUNT, 0L) == 0; }); // Now remove keys from container. This data is used to @@ -283,8 +306,8 @@ public void testEmptyMissingContainerDownNode() throws Exception { reconContainerMetadataManager.commitBatchOperation(rdbBatchOperation); } - // Check existing container state in UNHEALTHY_CONTAINER table - // will be updated as EMPTY_MISSING + // Since we no longer add EMPTY_MISSING containers to the table, we should + // have zero EMPTY_MISSING containers in the DB but their information will be logged. LambdaTestUtils.await(25000, 1000, () -> { List allEmptyMissingContainers = reconContainerManager.getContainerSchemaManager() @@ -292,7 +315,16 @@ public void testEmptyMissingContainerDownNode() throws Exception { ContainerSchemaDefinition.UnHealthyContainerStates. EMPTY_MISSING, 0, 1000); - return (allEmptyMissingContainers.size() == 1); + + Map> + unhealthyContainerStateStatsMap = reconScm.getContainerHealthTask() + .getUnhealthyContainerStateStatsMap(); + + // Return true if the size of the fetched containers is 0 and the log shows 1 for EMPTY_MISSING state + return allEmptyMissingContainers.size() == 0 && + unhealthyContainerStateStatsMap.get( + ContainerSchemaDefinition.UnHealthyContainerStates.EMPTY_MISSING) + .getOrDefault(CONTAINER_COUNT, 0L) == 1; }); // Now restart the cluster and verify the container is no longer missing. diff --git a/hadoop-ozone/recon-codegen/src/main/java/org/hadoop/ozone/recon/schema/ContainerSchemaDefinition.java b/hadoop-ozone/recon-codegen/src/main/java/org/hadoop/ozone/recon/schema/ContainerSchemaDefinition.java index 7c293ff1861..0882de3bf4f 100644 --- a/hadoop-ozone/recon-codegen/src/main/java/org/hadoop/ozone/recon/schema/ContainerSchemaDefinition.java +++ b/hadoop-ozone/recon-codegen/src/main/java/org/hadoop/ozone/recon/schema/ContainerSchemaDefinition.java @@ -31,6 +31,7 @@ import javax.sql.DataSource; import java.sql.Connection; import java.sql.SQLException; +import java.util.Arrays; /** * Class used to create tables that are required for tracking containers. @@ -69,11 +70,39 @@ public enum UnHealthyContainerStates { public void initializeSchema() throws SQLException { Connection conn = dataSource.getConnection(); dslContext = DSL.using(conn); - if (!TABLE_EXISTS_CHECK.test(conn, UNHEALTHY_CONTAINERS_TABLE_NAME)) { + + if (TABLE_EXISTS_CHECK.test(conn, UNHEALTHY_CONTAINERS_TABLE_NAME)) { + // Drop the existing constraint if it exists + String constraintName = UNHEALTHY_CONTAINERS_TABLE_NAME + "ck1"; + dslContext.alterTable(UNHEALTHY_CONTAINERS_TABLE_NAME) + .dropConstraint(constraintName) + .execute(); + + // Add the updated constraint with all enum states + addUpdatedConstraint(); + } else { + // Create the table if it does not exist createUnhealthyContainersTable(); } } + /** + * Add the updated constraint to the table. + */ + private void addUpdatedConstraint() { + // Get all enum values as a list of strings + String[] enumStates = Arrays.stream(UnHealthyContainerStates.values()) + .map(Enum::name) + .toArray(String[]::new); + + // Alter the table to add the updated constraint + dslContext.alterTable(UNHEALTHY_CONTAINERS_TABLE_NAME) + .add(DSL.constraint(UNHEALTHY_CONTAINERS_TABLE_NAME + "ck1") + .check(field(name("container_state")) + .in(enumStates))) + .execute(); + } + /** * Create the Missing Containers table. */ diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerEndpoint.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerEndpoint.java index 86ef6c022d5..cbdc198f8aa 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerEndpoint.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerEndpoint.java @@ -408,13 +408,18 @@ public Response getUnhealthyContainers( summary = containerHealthSchemaManager.getUnhealthyContainersSummary(); List containers = containerHealthSchemaManager .getUnhealthyContainers(internalState, offset, limit); - List emptyMissingFiltered = containers.stream() - .filter( - container -> !container.getContainerState() - .equals(UnHealthyContainerStates.EMPTY_MISSING.toString())) - .collect( - Collectors.toList()); - for (UnhealthyContainers c : emptyMissingFiltered) { + + // Filtering out EMPTY_MISSING and NEGATIVE_SIZE containers from the response. + // These container states are not being inserted into the database as they represent + // edge cases that are not critical to track as unhealthy containers. + List filteredContainers = containers.stream() + .filter(container -> !container.getContainerState() + .equals(UnHealthyContainerStates.EMPTY_MISSING.toString()) + && !container.getContainerState() + .equals(UnHealthyContainerStates.NEGATIVE_SIZE.toString())) + .collect(Collectors.toList()); + + for (UnhealthyContainers c : filteredContainers) { long containerID = c.getContainerId(); ContainerInfo containerInfo = containerManager.getContainer(ContainerID.valueOf(containerID)); diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/UnhealthyContainersResponse.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/UnhealthyContainersResponse.java index ba03ec61f14..eaf08d9ca83 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/UnhealthyContainersResponse.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/UnhealthyContainersResponse.java @@ -50,12 +50,6 @@ public class UnhealthyContainersResponse { @JsonProperty("misReplicatedCount") private long misReplicatedCount = 0; - /** - * Total count of containers with negative size. - */ - @JsonProperty("negativeSizeCount") - private long negativeSizeCount = 0; - /** * A collection of unhealthy containers. */ @@ -83,9 +77,6 @@ public void setSummaryCount(String state, long count) { } else if (state.equals( UnHealthyContainerStates.MIS_REPLICATED.toString())) { this.misReplicatedCount = count; - } else if (state.equals( - UnHealthyContainerStates.NEGATIVE_SIZE.toString())) { - this.negativeSizeCount = count; } } @@ -105,10 +96,6 @@ public long getMisReplicatedCount() { return misReplicatedCount; } - public long getNegativeSizeCount() { - return negativeSizeCount; - } - public Collection getContainers() { return containers; } diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/fsck/ContainerHealthTask.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/fsck/ContainerHealthTask.java index 639047d37bd..11af6eaff53 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/fsck/ContainerHealthTask.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/fsck/ContainerHealthTask.java @@ -29,6 +29,7 @@ import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; +import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.PlacementPolicy; @@ -78,6 +79,8 @@ public class ContainerHealthTask extends ReconScmTask { private final ReconContainerMetadataManager reconContainerMetadataManager; private final PlacementPolicy placementPolicy; private final long interval; + private Map> + unhealthyContainerStateStatsMapForTesting; private final Set processedContainers = new HashSet<>(); @@ -185,10 +188,12 @@ private void checkAndProcessContainers( private void logUnhealthyContainerStats( Map> unhealthyContainerStateStatsMap) { + unhealthyContainerStateStatsMapForTesting = + new HashMap<>(unhealthyContainerStateStatsMap); // If any EMPTY_MISSING containers, then it is possible that such // containers got stuck in the closing state which never got // any replicas created on the datanodes. In this case, we log it as - // EMPTY, and insert as EMPTY_MISSING in UNHEALTHY_CONTAINERS table. + // EMPTY_MISSING in unhealthy container statistics but do not add it to the table. unhealthyContainerStateStatsMap.entrySet().forEach(stateEntry -> { UnHealthyContainerStates unhealthyContainerState = stateEntry.getKey(); Map containerStateStatsMap = stateEntry.getValue(); @@ -256,6 +261,11 @@ private void completeProcessingContainer( * completeProcessingContainer is called. This will check to see if any * additional records need to be added to the database. * + * If a container is identified as missing, empty-missing, under-replicated, + * over-replicated or mis-replicated, the method checks with SCM to determine + * if it has been deleted, using {@code containerDeletedInSCM}. If the container is + * deleted in SCM, the corresponding record is removed from Recon. + * * @param currentTime Timestamp to place on all records generated by this run * @param unhealthyContainerStateCountMap * @return Count of records processed @@ -273,9 +283,11 @@ private long processExistingDBRecords(long currentTime, recordCount++; UnhealthyContainersRecord rec = cursor.fetchNext(); try { + // Set the current container if it's not already set if (currentContainer == null) { currentContainer = setCurrentContainer(rec.getContainerId()); } + // If the container ID has changed, finish processing the previous one if (currentContainer.getContainerID() != rec.getContainerId()) { completeProcessingContainer( currentContainer, existingRecords, currentTime, @@ -283,24 +295,29 @@ private long processExistingDBRecords(long currentTime, existingRecords.clear(); currentContainer = setCurrentContainer(rec.getContainerId()); } - if (ContainerHealthRecords - .retainOrUpdateRecord(currentContainer, rec - )) { - // Check if the missing container is deleted in SCM - if (currentContainer.isMissing() && - containerDeletedInSCM(currentContainer.getContainer())) { - rec.delete(); - } - existingRecords.add(rec.getContainerState()); - if (rec.changed()) { - rec.update(); - } - } else { + + // Unhealthy Containers such as MISSING, UNDER_REPLICATED, + // OVER_REPLICATED, MIS_REPLICATED can have their unhealthy states changed or retained. + if (!ContainerHealthRecords.retainOrUpdateRecord(currentContainer, rec)) { + rec.delete(); LOG.info("DELETED existing unhealthy container record...for Container: {}", currentContainer.getContainerID()); + } + + // If the container is marked as MISSING and it's deleted in SCM, remove the record + if (currentContainer.isMissing() && containerDeletedInSCM(currentContainer.getContainer())) { rec.delete(); + LOG.info("DELETED existing unhealthy container record...for Container: {}", + currentContainer.getContainerID()); + } + + existingRecords.add(rec.getContainerState()); + // If the record was changed, update it + if (rec.changed()) { + rec.update(); } } catch (ContainerNotFoundException cnf) { + // If the container is not found, delete the record and reset currentContainer rec.delete(); currentContainer = null; } @@ -326,13 +343,6 @@ private void processContainer(ContainerInfo container, long currentTime, containerReplicas, placementPolicy, reconContainerMetadataManager, conf); - // Handle negative sized containers separately - if (h.getContainer().getUsedBytes() < 0) { - handleNegativeSizedContainers(h, currentTime, - unhealthyContainerStateStatsMap); - return; - } - if (h.isHealthilyReplicated() || h.isDeleted()) { return; } @@ -349,6 +359,18 @@ private void processContainer(ContainerInfo container, long currentTime, } } + /** + * Ensures the container's state in Recon is updated to match its state in SCM. + * + * If SCM reports the container as DELETED, this method attempts to transition + * the container's state in Recon from CLOSED to DELETING, or from DELETING to + * DELETED, based on the current state in Recon. It logs each transition attempt + * and handles any exceptions that may occur. + * + * @param containerInfo the container whose state is being checked and potentially updated. + * @return {@code true} if the container was found to be DELETED in SCM and the + * state transition was attempted in Recon; {@code false} otherwise. + */ private boolean containerDeletedInSCM(ContainerInfo containerInfo) { try { ContainerWithPipeline containerWithPipeline = @@ -358,6 +380,8 @@ private boolean containerDeletedInSCM(ContainerInfo containerInfo) { if (containerInfo.getState() == HddsProtos.LifeCycleState.CLOSED) { containerManager.updateContainerState(containerInfo.containerID(), HddsProtos.LifeCycleEvent.DELETE); + LOG.debug("Successfully changed container {} state from CLOSED to DELETING.", + containerInfo.containerID()); } if (containerInfo.getState() == HddsProtos.LifeCycleState.DELETING && containerManager.getContainerReplicas(containerInfo.containerID()) @@ -365,6 +389,7 @@ private boolean containerDeletedInSCM(ContainerInfo containerInfo) { ) { containerManager.updateContainerState(containerInfo.containerID(), HddsProtos.LifeCycleEvent.CLEANUP); + LOG.info("Successfully Deleted container {} from Recon.", containerInfo.containerID()); } return true; } @@ -380,28 +405,50 @@ private boolean containerDeletedInSCM(ContainerInfo containerInfo) { /** * This method is used to handle containers with negative sizes. It logs an - * error message and inserts a record into the UNHEALTHY_CONTAINERS table. + * error message. * @param containerHealthStatus * @param currentTime * @param unhealthyContainerStateStatsMap */ - private void handleNegativeSizedContainers( + private static void handleNegativeSizedContainers( ContainerHealthStatus containerHealthStatus, long currentTime, Map> unhealthyContainerStateStatsMap) { + // NEGATIVE_SIZE containers are also not inserted into the database. + // This condition usually arises due to corrupted or invalid metadata, where + // the container's size is inaccurately recorded as negative. Since this does not + // represent a typical unhealthy scenario and may not have any meaningful + // impact on system health, such containers are logged for investigation but + // excluded from the UNHEALTHY_CONTAINERS table to maintain data integrity. ContainerInfo container = containerHealthStatus.getContainer(); - LOG.error( - "Container {} has negative size. Please visit Recon's unhealthy " + - "container endpoint for more details.", - container.getContainerID()); - UnhealthyContainers record = - ContainerHealthRecords.recordForState(containerHealthStatus, - UnHealthyContainerStates.NEGATIVE_SIZE, currentTime); - List records = Collections.singletonList(record); - populateContainerStats(containerHealthStatus, - UnHealthyContainerStates.NEGATIVE_SIZE, + LOG.error("Container {} has negative size.", container.getContainerID()); + populateContainerStats(containerHealthStatus, UnHealthyContainerStates.NEGATIVE_SIZE, + unhealthyContainerStateStatsMap); + } + + /** + * This method is used to handle containers that are empty and missing. It logs + * a debug message. + * @param containerHealthStatus + * @param currentTime + * @param unhealthyContainerStateStatsMap + */ + private static void handleEmptyMissingContainers( + ContainerHealthStatus containerHealthStatus, long currentTime, + Map> + unhealthyContainerStateStatsMap) { + // EMPTY_MISSING containers are not inserted into the database. + // These containers typically represent those that were never written to + // or remain in an incomplete state. Tracking such containers as unhealthy + // would not provide valuable insights since they don't pose a risk or issue + // to the system. Instead, they are logged for awareness, but not stored in + // the UNHEALTHY_CONTAINERS table to avoid unnecessary entries. + ContainerInfo container = containerHealthStatus.getContainer(); + LOG.debug("Empty container {} is missing. It will be logged in the " + + "unhealthy container statistics, but no record will be created in the " + + "UNHEALTHY_CONTAINERS table.", container.getContainerID()); + populateContainerStats(containerHealthStatus, EMPTY_MISSING, unhealthyContainerStateStatsMap); - containerHealthSchemaManager.insertUnhealthyContainerRecords(records); } /** @@ -492,22 +539,21 @@ public static List generateUnhealthyRecords( populateContainerStats(container, UnHealthyContainerStates.MISSING, unhealthyContainerStateStatsMap); } else { - - LOG.debug("Empty container {} is missing. Kindly check the " + - "consolidated container stats per UNHEALTHY state logged as " + - "starting with **Container State Stats:**"); - - records.add( - recordForState(container, EMPTY_MISSING, - time)); - populateContainerStats(container, - EMPTY_MISSING, + handleEmptyMissingContainers(container, time, unhealthyContainerStateStatsMap); } // A container cannot have any other records if it is missing so return return records; } + // For Negative sized containers we only log but not insert into DB + if (container.getContainer().getUsedBytes() < 0 + && !recordForStateExists.contains( + UnHealthyContainerStates.NEGATIVE_SIZE.toString())) { + handleNegativeSizedContainers(container, time, + unhealthyContainerStateStatsMap); + } + if (container.isUnderReplicated() && !recordForStateExists.contains( UnHealthyContainerStates.UNDER_REPLICATED.toString())) { @@ -650,4 +696,23 @@ private static void populateContainerStats( (value + container.getContainer().getUsedBytes())); } } + + /** + * Expose the logger for testing purposes. + * + * @return the logger instance + */ + @VisibleForTesting + public Logger getLogger() { + return LOG; + } + + /** + * Expose the unhealthyContainerStateStatsMap for testing purposes. + */ + @VisibleForTesting + public Map> getUnhealthyContainerStateStatsMap() { + return unhealthyContainerStateStatsMapForTesting; + } + } diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java index a7f486ea5ac..c773187c4b1 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java @@ -155,6 +155,7 @@ public class ReconStorageContainerManagerFacade private final SCMNodeDetails reconNodeDetails; private final SCMHAManager scmhaManager; private final SequenceIdGenerator sequenceIdGen; + private final ContainerHealthTask containerHealthTask; private DBStore dbStore; private ReconNodeManager nodeManager; @@ -272,7 +273,7 @@ public ReconStorageContainerManagerFacade(OzoneConfiguration conf, scmServiceProvider, reconTaskStatusDao, reconTaskConfig); - ContainerHealthTask containerHealthTask = new ContainerHealthTask( + containerHealthTask = new ContainerHealthTask( containerManager, scmServiceProvider, reconTaskStatusDao, containerHealthSchemaManager, containerPlacementPolicy, reconTaskConfig, reconContainerMetadataManager, conf); @@ -741,6 +742,12 @@ public StorageContainerServiceProvider getScmServiceProvider() { public ContainerSizeCountTask getContainerSizeCountTask() { return containerSizeCountTask; } + + @VisibleForTesting + public ContainerHealthTask getContainerHealthTask() { + return containerHealthTask; + } + @VisibleForTesting public ContainerCountBySizeDao getContainerCountBySizeDao() { return containerCountBySizeDao; diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java index 82c7c1b5bef..3c39e4192d2 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java @@ -886,7 +886,9 @@ public void testUnhealthyContainersFilteredResponse() throws IOException, TimeoutException { String missing = UnHealthyContainerStates.MISSING.toString(); String emptyMissing = UnHealthyContainerStates.EMPTY_MISSING.toString(); + String negativeSize = UnHealthyContainerStates.NEGATIVE_SIZE.toString(); // For NEGATIVE_SIZE state + // Initial empty response verification Response response = containerEndpoint .getUnhealthyContainers(missing, 1000, 1); @@ -899,44 +901,55 @@ public void testUnhealthyContainersFilteredResponse() assertEquals(0, responseObject.getMisReplicatedCount()); assertEquals(Collections.EMPTY_LIST, responseObject.getContainers()); + // Add unhealthy records putContainerInfos(5); uuid1 = newDatanode("host1", "127.0.0.1"); uuid2 = newDatanode("host2", "127.0.0.2"); uuid3 = newDatanode("host3", "127.0.0.3"); uuid4 = newDatanode("host4", "127.0.0.4"); createUnhealthyRecords(5, 4, 3, 2); - createEmptyMissingUnhealthyRecords(2); + createEmptyMissingUnhealthyRecords(2); // For EMPTY_MISSING state + createNegativeSizeUnhealthyRecords(2); // For NEGATIVE_SIZE state + // Check for unhealthy containers response = containerEndpoint.getUnhealthyContainers(missing, 1000, 1); responseObject = (UnhealthyContainersResponse) response.getEntity(); + // Summary should have the count for all unhealthy: assertEquals(5, responseObject.getMissingCount()); assertEquals(4, responseObject.getOverReplicatedCount()); assertEquals(3, responseObject.getUnderReplicatedCount()); assertEquals(2, responseObject.getMisReplicatedCount()); - Collection records - = responseObject.getContainers(); + Collection records = responseObject.getContainers(); assertTrue(records.stream() .flatMap(containerMetadata -> containerMetadata.getReplicas().stream() .map(ContainerHistory::getState)) .allMatch(s -> s.equals("UNHEALTHY"))); - // There should only be 5 missing containers and no others as we asked for - // only missing. + + // Verify only missing containers are returned assertEquals(5, records.size()); for (UnhealthyContainerMetadata r : records) { assertEquals(missing, r.getContainerState()); } + // Check for empty missing containers, should return zero Response filteredEmptyMissingResponse = containerEndpoint .getUnhealthyContainers(emptyMissing, 1000, 1); responseObject = (UnhealthyContainersResponse) filteredEmptyMissingResponse.getEntity(); records = responseObject.getContainers(); - // Assert for zero empty missing containers. + assertEquals(0, records.size()); + + // Check for negative size containers, should return zero + Response filteredNegativeSizeResponse = containerEndpoint + .getUnhealthyContainers(negativeSize, 1000, 1); + responseObject = (UnhealthyContainersResponse) filteredNegativeSizeResponse.getEntity(); + records = responseObject.getContainers(); assertEquals(0, records.size()); } + @Test public void testUnhealthyContainersInvalidState() { WebApplicationException e = assertThrows(WebApplicationException.class, @@ -1043,6 +1056,15 @@ private void createEmptyMissingUnhealthyRecords(int emptyMissing) { } } + private void createNegativeSizeUnhealthyRecords(int negativeSize) { + int cid = 0; + for (int i = 0; i < negativeSize; i++) { + createUnhealthyRecord(++cid, UnHealthyContainerStates.NEGATIVE_SIZE.toString(), + 3, 3, 0, null); // Added for NEGATIVE_SIZE state + } + } + + private void createUnhealthyRecords(int missing, int overRep, int underRep, int misRep) { int cid = 0; diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java index ae46bd8b5b5..46e4506a5ef 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java @@ -22,15 +22,18 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.hadoop.ozone.recon.schema.ContainerSchemaDefinition.UnHealthyContainerStates.ALL_REPLICAS_BAD; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.fail; -import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.times; import static org.mockito.Mockito.any; import static org.mockito.Mockito.anyInt; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; + import java.io.IOException; import java.time.Duration; import java.util.ArrayList; @@ -105,7 +108,7 @@ public void testRun() throws Exception { // Create 7 containers. The first 5 will have various unhealthy states // defined below. The container with ID=6 will be healthy and - // container with ID=7 will be EMPTY_MISSING + // container with ID=7 will be EMPTY_MISSING (but not inserted into DB) List mockContainers = getMockContainers(7); when(scmMock.getScmServiceProvider()).thenReturn(scmClientMock); when(scmMock.getContainerManager()).thenReturn(containerManagerMock); @@ -132,20 +135,20 @@ public void testRun() throws Exception { when(containerManagerMock.getContainerReplicas(containerInfo2.containerID())) .thenReturn(getMockReplicas(2L, State.UNHEALTHY)); - // return 0 replicas for container ID 3 -> Empty Missing + // return 0 replicas for container ID 3 -> EMPTY_MISSING (will not be inserted into DB) ContainerInfo containerInfo3 = TestContainerInfo.newBuilderForTest().setContainerID(3).setReplicationConfig(replicationConfig).build(); when(containerManagerMock.getContainer(ContainerID.valueOf(3L))).thenReturn(containerInfo3); when(containerManagerMock.getContainerReplicas(containerInfo3.containerID())) .thenReturn(Collections.emptySet()); - // Return 5 Healthy -> Over replicated + // Return 5 Healthy Replicas -> Over-replicated ContainerInfo containerInfo4 = TestContainerInfo.newBuilderForTest().setContainerID(4).setReplicationConfig(replicationConfig).build(); when(containerManagerMock.getContainer(ContainerID.valueOf(4L))).thenReturn(containerInfo4); when(containerManagerMock.getContainerReplicas(containerInfo4.containerID())) .thenReturn(getMockReplicas(4L, State.CLOSED, State.CLOSED, - State.CLOSED, State.CLOSED, State.CLOSED)); + State.CLOSED, State.CLOSED, State.CLOSED)); // Mis-replicated ContainerInfo containerInfo5 = @@ -158,7 +161,7 @@ public void testRun() throws Exception { when(containerManagerMock.getContainerReplicas(containerInfo5.containerID())) .thenReturn(misReplicas); - // Return 3 Healthy -> Healthy container + // Return 3 Healthy Replicas -> Healthy container ContainerInfo containerInfo6 = TestContainerInfo.newBuilderForTest().setContainerID(6).setReplicationConfig(replicationConfig).build(); when(containerManagerMock.getContainer(ContainerID.valueOf(6L))).thenReturn(containerInfo6); @@ -166,12 +169,14 @@ public void testRun() throws Exception { .thenReturn(getMockReplicas(6L, State.CLOSED, State.CLOSED, State.CLOSED)); - // return 0 replicas for container ID 7 -> MISSING + // return 0 replicas for container ID 7 -> MISSING (will later transition to EMPTY_MISSING but not inserted into DB) ContainerInfo containerInfo7 = TestContainerInfo.newBuilderForTest().setContainerID(7).setReplicationConfig(replicationConfig).build(); when(containerManagerMock.getContainer(ContainerID.valueOf(7L))).thenReturn(containerInfo7); when(containerManagerMock.getContainerReplicas(containerInfo7.containerID())) .thenReturn(Collections.emptySet()); + when(reconContainerMetadataManager.getKeyCountForContainer( + 7L)).thenReturn(5L); // Indicates non-empty container 7 for now List all = unHealthyContainersTableHandle.findAll(); assertThat(all).isEmpty(); @@ -180,8 +185,8 @@ public void testRun() throws Exception { ReconTaskStatusDao reconTaskStatusDao = getDao(ReconTaskStatusDao.class); ReconTaskConfig reconTaskConfig = new ReconTaskConfig(); reconTaskConfig.setMissingContainerTaskInterval(Duration.ofSeconds(5)); - when(reconContainerMetadataManager.getKeyCountForContainer( - 7L)).thenReturn(5L); + + // Start container health task ContainerHealthTask containerHealthTask = new ContainerHealthTask(scmMock.getContainerManager(), scmMock.getScmServiceProvider(), @@ -189,8 +194,12 @@ public void testRun() throws Exception { placementMock, reconTaskConfig, reconContainerMetadataManager, new OzoneConfiguration()); containerHealthTask.start(); + + // Ensure unhealthy container count in DB matches expected LambdaTestUtils.await(60000, 1000, () -> - (unHealthyContainersTableHandle.count() == 6)); + (unHealthyContainersTableHandle.count() == 5)); + + // Check for UNDER_REPLICATED container states UnhealthyContainers rec = unHealthyContainersTableHandle.fetchByContainerId(1L).get(0); assertEquals("UNDER_REPLICATED", rec.getContainerState()); @@ -200,6 +209,10 @@ public void testRun() throws Exception { assertEquals("UNDER_REPLICATED", rec.getContainerState()); assertEquals(3, rec.getReplicaDelta().intValue()); + // Assert that EMPTY_MISSING state containers were never added to DB. + assertEquals(0, + unHealthyContainersTableHandle.fetchByContainerId(3L).size()); + List unhealthyContainers = containerHealthSchemaManager.getUnhealthyContainers( ALL_REPLICAS_BAD, 0, Integer.MAX_VALUE); @@ -209,10 +222,7 @@ public void testRun() throws Exception { assertEquals(0, unhealthyContainers.get(0).getActualReplicaCount().intValue()); - rec = unHealthyContainersTableHandle.fetchByContainerId(3L).get(0); - assertEquals("EMPTY_MISSING", rec.getContainerState()); - assertEquals(3, rec.getReplicaDelta().intValue()); - + // Check for MISSING state in container ID 7 rec = unHealthyContainersTableHandle.fetchByContainerId(7L).get(0); assertEquals("MISSING", rec.getContainerState()); assertEquals(3, rec.getReplicaDelta().intValue()); @@ -233,9 +243,7 @@ public void testRun() throws Exception { assertThat(taskStatus.getLastUpdatedTimestamp()) .isGreaterThan(currentTime); - // Now run the job again, to check that relevant records are updated or - // removed as appropriate. Need to adjust the return value for all the mocks - // Under replicated -> Delta goes from 2 to 1 + // Adjust the mock results and rerun to check for updates or removal of records when(containerManagerMock.getContainerReplicas(ContainerID.valueOf(1L))) .thenReturn(getMockReplicas(1L, State.CLOSED, State.CLOSED)); @@ -244,7 +252,7 @@ public void testRun() throws Exception { .thenReturn(getMockReplicas(2L, State.CLOSED, State.CLOSED, State.CLOSED)); - // return 0 replicas for container ID 3 -> Still empty Missing + // Container 3 remains EMPTY_MISSING, but no DB insertion when(containerManagerMock.getContainerReplicas(ContainerID.valueOf(3L))) .thenReturn(Collections.emptySet()); @@ -253,11 +261,16 @@ public void testRun() throws Exception { .thenReturn(getMockReplicas(4L, State.CLOSED, State.CLOSED, State.CLOSED, State.CLOSED)); - // Was mis-replicated - make it healthy now + // Convert container 7 which was MISSING to EMPTY_MISSING (not inserted into DB) + when(reconContainerMetadataManager.getKeyCountForContainer( + 7L)).thenReturn(0L); + placementMock.setMisRepWhenDnPresent(null); + // Ensure count is reduced after EMPTY_MISSING containers are not inserted LambdaTestUtils.await(60000, 1000, () -> - (unHealthyContainersTableHandle.count() == 4)); + (unHealthyContainersTableHandle.count() == 2)); + rec = unHealthyContainersTableHandle.fetchByContainerId(1L).get(0); assertEquals("UNDER_REPLICATED", rec.getContainerState()); assertEquals(1, rec.getReplicaDelta().intValue()); @@ -266,36 +279,21 @@ public void testRun() throws Exception { assertEquals(0, unHealthyContainersTableHandle.fetchByContainerId(2L).size()); - rec = unHealthyContainersTableHandle.fetchByContainerId(3L).get(0); - assertEquals("EMPTY_MISSING", rec.getContainerState()); - assertEquals(3, rec.getReplicaDelta().intValue()); - - rec = unHealthyContainersTableHandle.fetchByContainerId(7L).get(0); - assertEquals("MISSING", rec.getContainerState()); - assertEquals(3, rec.getReplicaDelta().intValue()); + // Assert that for container 7 no records exist in DB because it's now EMPTY_MISSING + assertEquals(0, + unHealthyContainersTableHandle.fetchByContainerId(7L).size()); rec = unHealthyContainersTableHandle.fetchByContainerId(4L).get(0); assertEquals("OVER_REPLICATED", rec.getContainerState()); assertEquals(-1, rec.getReplicaDelta().intValue()); - // This container is now healthy, it should not be in the table any more + // Ensure container 5 is now healthy and not in the table assertEquals(0, unHealthyContainersTableHandle.fetchByContainerId(5L).size()); - // Again make container Id 7 as empty which was missing as well, so in next - // container health task run, this container also should be deleted from - // UNHEALTHY_CONTAINERS table because we want to cleanup any existing - // EMPTY and MISSING containers from UNHEALTHY_CONTAINERS table. - when(reconContainerMetadataManager.getKeyCountForContainer(7L)).thenReturn(0L); - LambdaTestUtils.await(6000, 1000, () -> { - UnhealthyContainers emptyMissingContainer = unHealthyContainersTableHandle.fetchByContainerId(7L).get(0); - return ("EMPTY_MISSING".equals(emptyMissingContainer.getContainerState())); - }); - - // Just check once again that count doesn't change, only state of - // container 7 changes from MISSING to EMPTY_MISSING + // Just check once again that count remains consistent LambdaTestUtils.await(60000, 1000, () -> - (unHealthyContainersTableHandle.count() == 4)); + (unHealthyContainersTableHandle.count() == 2)); } @Test @@ -370,17 +368,12 @@ public void testDeletedContainer() throws Exception { reconContainerMetadataManager, new OzoneConfiguration()); containerHealthTask.start(); LambdaTestUtils.await(6000, 1000, () -> - (unHealthyContainersTableHandle.count() == 2)); + (unHealthyContainersTableHandle.count() == 1)); UnhealthyContainers rec = unHealthyContainersTableHandle.fetchByContainerId(1L).get(0); assertEquals("MISSING", rec.getContainerState()); assertEquals(3, rec.getReplicaDelta().intValue()); - rec = - unHealthyContainersTableHandle.fetchByContainerId(3L).get(0); - assertEquals("EMPTY_MISSING", rec.getContainerState()); - assertEquals(3, rec.getReplicaDelta().intValue()); - ReconTaskStatus taskStatus = reconTaskStatusDao.findById(containerHealthTask.getTaskName()); assertThat(taskStatus.getLastUpdatedTimestamp()) @@ -473,64 +466,106 @@ public void testAllContainerStateInsertions() { } @Test - public void testNegativeSizeContainers() throws Exception { - // Setup mock objects and test environment - UnhealthyContainersDao unhealthyContainersDao = + public void testMissingAndEmptyMissingContainerDeletion() throws Exception { + // Setup mock DAOs and managers + UnhealthyContainersDao unHealthyContainersTableHandle = getDao(UnhealthyContainersDao.class); ContainerHealthSchemaManager containerHealthSchemaManager = new ContainerHealthSchemaManager( getSchemaDefinition(ContainerSchemaDefinition.class), - unhealthyContainersDao); + unHealthyContainersTableHandle); ReconStorageContainerManagerFacade scmMock = mock(ReconStorageContainerManagerFacade.class); + MockPlacementPolicy placementMock = new MockPlacementPolicy(); ContainerManager containerManagerMock = mock(ContainerManager.class); StorageContainerServiceProvider scmClientMock = mock(StorageContainerServiceProvider.class); ReconContainerMetadataManager reconContainerMetadataManager = mock(ReconContainerMetadataManager.class); - MockPlacementPolicy placementMock = new MockPlacementPolicy(); + mock(ReconContainerMetadataManager.class); - // Mock container info setup - List mockContainers = getMockContainers(3); - when(scmMock.getContainerManager()).thenReturn(containerManagerMock); + // Create 2 containers. They start in CLOSED state in Recon. + List mockContainers = getMockContainers(2); when(scmMock.getScmServiceProvider()).thenReturn(scmClientMock); + when(scmMock.getContainerManager()).thenReturn(containerManagerMock); when(containerManagerMock.getContainers(any(ContainerID.class), anyInt())).thenReturn(mockContainers); + + // Mark both containers as initially CLOSED in Recon for (ContainerInfo c : mockContainers) { - when(containerManagerMock.getContainer( - c.containerID())).thenReturn(c); - when(scmClientMock.getContainerWithPipeline( - c.getContainerID())).thenReturn(new ContainerWithPipeline(c, null)); - when(containerManagerMock.getContainer(c.containerID()) - .getUsedBytes()).thenReturn(Long.valueOf(-10)); + when(containerManagerMock.getContainer(c.containerID())).thenReturn(c); } - // Verify the table is initially empty - assertThat(unhealthyContainersDao.findAll()).isEmpty(); + // Simulate SCM reporting the containers as DELETED + ContainerInfo deletedContainer1 = getMockDeletedContainer(1); + ContainerInfo deletedContainer2 = getMockDeletedContainer(2); + + when(scmClientMock.getContainerWithPipeline(1)) + .thenReturn(new ContainerWithPipeline(deletedContainer1, null)); + when(scmClientMock.getContainerWithPipeline(2)) + .thenReturn(new ContainerWithPipeline(deletedContainer2, null)); + + // Both containers start as CLOSED in Recon (MISSING or EMPTY_MISSING) + when(containerManagerMock.getContainer(ContainerID.valueOf(1L)).getState()) + .thenReturn(HddsProtos.LifeCycleState.CLOSED); + when(containerManagerMock.getContainer(ContainerID.valueOf(2L)).getState()) + .thenReturn(HddsProtos.LifeCycleState.CLOSED); - // Setup and start the container health task + // Replicas are empty, so both containers should be considered for deletion + when(containerManagerMock.getContainerReplicas(ContainerID.valueOf(1L))) + .thenReturn(Collections.emptySet()); + when(containerManagerMock.getContainerReplicas(ContainerID.valueOf(2L))) + .thenReturn(Collections.emptySet()); + + // Initialize UnhealthyContainers in DB (MISSING and EMPTY_MISSING) + // Create and set up the first UnhealthyContainer for a MISSING container + UnhealthyContainers container1 = new UnhealthyContainers(); + container1.setContainerId(1L); + container1.setContainerState("MISSING"); + container1.setExpectedReplicaCount(3); + container1.setActualReplicaCount(0); + container1.setReplicaDelta(3); + container1.setInStateSince(System.currentTimeMillis()); + + // Create and set up the second UnhealthyContainer for an EMPTY_MISSING container + UnhealthyContainers container2 = new UnhealthyContainers(); + container2.setContainerId(2L); + container2.setContainerState("MISSING"); + container2.setExpectedReplicaCount(3); + container2.setActualReplicaCount(0); + container2.setReplicaDelta(3); + container2.setInStateSince(System.currentTimeMillis()); + + unHealthyContainersTableHandle.insert(container1); + unHealthyContainersTableHandle.insert(container2); + + when(reconContainerMetadataManager.getKeyCountForContainer(1L)).thenReturn(5L); + when(reconContainerMetadataManager.getKeyCountForContainer(2L)).thenReturn(0L); + + // Start the container health task ReconTaskStatusDao reconTaskStatusDao = getDao(ReconTaskStatusDao.class); ReconTaskConfig reconTaskConfig = new ReconTaskConfig(); reconTaskConfig.setMissingContainerTaskInterval(Duration.ofSeconds(2)); - ContainerHealthTask containerHealthTask = new ContainerHealthTask( - scmMock.getContainerManager(), scmMock.getScmServiceProvider(), - reconTaskStatusDao, - containerHealthSchemaManager, placementMock, reconTaskConfig, - reconContainerMetadataManager, - new OzoneConfiguration()); - containerHealthTask.start(); + ContainerHealthTask containerHealthTask = + new ContainerHealthTask(scmMock.getContainerManager(), + scmMock.getScmServiceProvider(), + reconTaskStatusDao, containerHealthSchemaManager, + placementMock, reconTaskConfig, + reconContainerMetadataManager, new OzoneConfiguration()); - // Wait for the task to identify unhealthy containers - LambdaTestUtils.await(6000, 1000, - () -> unhealthyContainersDao.count() == 3); + containerHealthTask.start(); - // Assert that all unhealthy containers have been identified as NEGATIVE_SIZE states - List negativeSizeContainers = - unhealthyContainersDao.fetchByContainerState("NEGATIVE_SIZE"); - assertThat(negativeSizeContainers).hasSize(3); + // Wait for the task to complete and ensure that updateContainerState is invoked for + // container IDs 1 and 2 to mark the containers as DELETED, since they are DELETED in SCM. + LambdaTestUtils.await(60000, 1000, () -> { + verify(containerManagerMock, times(1)) + .updateContainerState(ContainerID.valueOf(1L), HddsProtos.LifeCycleEvent.DELETE); + verify(containerManagerMock, times(1)) + .updateContainerState(ContainerID.valueOf(2L), HddsProtos.LifeCycleEvent.DELETE); + return true; + }); } - private Set getMockReplicas( long containerId, State...states) { Set replicas = new HashSet<>(); diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTaskRecordGenerator.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTaskRecordGenerator.java index 7d55e612bad..4e9965638a1 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTaskRecordGenerator.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTaskRecordGenerator.java @@ -127,6 +127,58 @@ public void testMissingRecordRetained() { )); } + @Test + public void testEmptyMissingRecordNotInsertedButLogged() { + // Create a container that is in EMPTY_MISSING state + Set replicas = new HashSet<>(); + ContainerHealthStatus status = new ContainerHealthStatus(emptyContainer, replicas, placementPolicy, + reconContainerMetadataManager, CONF); + + // Initialize stats map + Map> unhealthyContainerStateStatsMap = new HashMap<>(); + initializeUnhealthyContainerStateStatsMap(unhealthyContainerStateStatsMap); + + // Generate records for EMPTY_MISSING container + List records = ContainerHealthTask.ContainerHealthRecords.generateUnhealthyRecords( + status, (long) 345678, unhealthyContainerStateStatsMap); + + // Assert that no records are created for EMPTY_MISSING state + assertEquals(0, records.size()); + + // Assert that the EMPTY_MISSING state is logged + assertEquals(1, unhealthyContainerStateStatsMap.get(UnHealthyContainerStates.EMPTY_MISSING) + .getOrDefault(CONTAINER_COUNT, 0L)); + } + + @Test + public void testNegativeSizeRecordNotInsertedButLogged() { + // Simulate a container with NEGATIVE_SIZE state + when(container.getUsedBytes()).thenReturn(-10L); // Negative size + Set replicas = generateReplicas(container, CLOSED, CLOSED); + ContainerHealthStatus status = + new ContainerHealthStatus(container, replicas, placementPolicy, reconContainerMetadataManager, CONF); + + // Initialize stats map + Map> + unhealthyContainerStateStatsMap = new HashMap<>(); + initializeUnhealthyContainerStateStatsMap(unhealthyContainerStateStatsMap); + + // Generate records for NEGATIVE_SIZE container + List records = + ContainerHealthTask.ContainerHealthRecords.generateUnhealthyRecords( + status, (long) 123456, unhealthyContainerStateStatsMap); + + // Assert that none of the records are for negative. + records.forEach(record -> assertFalse(record.getContainerState() + .equals(UnHealthyContainerStates.NEGATIVE_SIZE.toString()))); + + + // Assert that the NEGATIVE_SIZE state is logged + assertEquals(1, unhealthyContainerStateStatsMap.get( + UnHealthyContainerStates.NEGATIVE_SIZE).getOrDefault(CONTAINER_COUNT, 0L)); + } + + @Test public void testUnderReplicatedRecordRetainedAndUpdated() { // under replicated container @@ -396,13 +448,9 @@ public void testCorrectRecordsGenerated() { status = new ContainerHealthStatus(emptyContainer, replicas, placementPolicy, reconContainerMetadataManager, CONF); - records = ContainerHealthTask.ContainerHealthRecords + ContainerHealthTask.ContainerHealthRecords .generateUnhealthyRecords(status, (long) 345678, unhealthyContainerStateStatsMap); - assertEquals(1, records.size()); - rec = records.get(0); - assertEquals(UnHealthyContainerStates.EMPTY_MISSING.toString(), - rec.getContainerState()); assertEquals(3, rec.getExpectedReplicaCount().intValue()); assertEquals(0, rec.getActualReplicaCount().intValue()); @@ -582,6 +630,8 @@ private void initializeUnhealthyContainerStateStatsMap( UnHealthyContainerStates.OVER_REPLICATED, new HashMap<>()); unhealthyContainerStateStatsMap.put( UnHealthyContainerStates.MIS_REPLICATED, new HashMap<>()); + unhealthyContainerStateStatsMap.put( + UnHealthyContainerStates.NEGATIVE_SIZE, new HashMap<>()); } private void logUnhealthyContainerStats( @@ -590,7 +640,7 @@ private void logUnhealthyContainerStats( // If any EMPTY_MISSING containers, then it is possible that such // containers got stuck in the closing state which never got // any replicas created on the datanodes. In this case, we log it as - // EMPTY, and insert as EMPTY_MISSING in UNHEALTHY_CONTAINERS table. + // EMPTY_MISSING containers, but dont add it to the unhealthy container table. unhealthyContainerStateStatsMap.entrySet().forEach(stateEntry -> { UnHealthyContainerStates unhealthyContainerState = stateEntry.getKey(); Map containerStateStatsMap = stateEntry.getValue(); From cd251f23a9f660ab514973b87668e00c9564faca Mon Sep 17 00:00:00 2001 From: len548 <63490262+len548@users.noreply.github.com> Date: Wed, 11 Sep 2024 18:02:11 +0200 Subject: [PATCH 09/43] HDDS-11438. Ensure DataInputBuffer is closed in OMPBHelper#convert (#7182) --- .../apache/hadoop/ozone/protocolPB/OMPBHelper.java | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/protocolPB/OMPBHelper.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/protocolPB/OMPBHelper.java index ccb2080a875..e28c9477f29 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/protocolPB/OMPBHelper.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/protocolPB/OMPBHelper.java @@ -242,11 +242,13 @@ public static MD5MD5Crc32FileChecksumProto convert( DataOutputBuffer buf = new DataOutputBuffer(); checksum.write(buf); byte[] bytes = buf.getData(); - DataInputBuffer buffer = new DataInputBuffer(); - buffer.reset(bytes, 0, bytes.length); - int bytesPerCRC = buffer.readInt(); - long crcPerBlock = buffer.readLong(); - buffer.close(); + int bytesPerCRC; + long crcPerBlock; + try (DataInputBuffer buffer = new DataInputBuffer()) { + buffer.reset(bytes, 0, bytes.length); + bytesPerCRC = buffer.readInt(); + crcPerBlock = buffer.readLong(); + } int offset = Integer.BYTES + Long.BYTES; ByteString byteString = ByteString.copyFrom( From 0e49f7adcd4eff7b1375bffc445017f02824edd2 Mon Sep 17 00:00:00 2001 From: Ashish Kumar <117710273+ashishkumar50@users.noreply.github.com> Date: Wed, 11 Sep 2024 23:57:46 +0530 Subject: [PATCH 10/43] HDDS-11449. Remove unnecessary log from client console. (#7184) Co-authored-by: ashishk --- .../org/apache/hadoop/ozone/om/helpers/OzoneFSUtils.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OzoneFSUtils.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OzoneFSUtils.java index aa7d06e2a9f..bf4ffa9d8de 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OzoneFSUtils.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OzoneFSUtils.java @@ -317,9 +317,11 @@ public static boolean canEnableHsync(ConfigurationSource conf, boolean isClient) if (confHBaseEnhancementsAllowed) { return confHsyncEnabled; } else { - LOG.warn("Ignoring {} = {} because HBase enhancements are disallowed. To enable it, set {} = true as well.", - OzoneConfigKeys.OZONE_FS_HSYNC_ENABLED, confHsyncEnabled, - confKey); + if (confHsyncEnabled) { + LOG.warn("Ignoring {} = {} because HBase enhancements are disallowed. To enable it, set {} = true as well.", + OzoneConfigKeys.OZONE_FS_HSYNC_ENABLED, true, + confKey); + } return false; } } From e57370124a36315d2be5791753912901f836ccd8 Mon Sep 17 00:00:00 2001 From: jianghuazhu <740087514@qq.com> Date: Thu, 12 Sep 2024 05:45:13 +0800 Subject: [PATCH 11/43] HDDS-11448. Improve documentation in ContainerStateMachine (#7183) --- .../server/ratis/ContainerStateMachine.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java index b3398de07ad..9dc6af19353 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java @@ -233,7 +233,7 @@ public ContainerStateMachine(HddsDatanodeService hddsDatanodeService, RaftGroupI // cache with FIFO eviction, and if element not found, this needs // to be obtained from disk for slow follower stateMachineDataCache = new ResourceCache<>( - (index, data) -> ((ByteString)data).size(), + (index, data) -> data.size(), pendingRequestsBytesLimit, (p) -> { if (p.wasEvicted()) { @@ -704,9 +704,10 @@ private ExecutorService getChunkExecutor(WriteChunkRequestProto req) { return chunkExecutors.get(i); } - /* - * writeStateMachineData calls are not synchronized with each other - * and also with applyTransaction. + /** + * {@link #writeStateMachineData(ContainerCommandRequestProto, long, long, long)} + * calls are not synchronized with each other + * and also with {@link #applyTransaction(TransactionContext)}. */ @Override public CompletableFuture write(LogEntryProto entry, TransactionContext trx) { @@ -824,7 +825,7 @@ public CompletableFuture flush(long index) { } /** - * This method is used by the Leader to read state machine date for sending appendEntries to followers. + * This method is used by the Leader to read state machine data for sending appendEntries to followers. * It will first get the data from {@link #stateMachineDataCache}. * If the data is not in the cache, it will read from the file by dispatching a command * @@ -1197,7 +1198,7 @@ public void notifyGroupRemove() { try { containerController.markContainerForClose(cid); containerController.quasiCloseContainer(cid, - "Ratis group removed"); + "Ratis group removed. Group id: " + gid); } catch (IOException e) { LOG.debug("Failed to quasi-close container {}", cid); } From d2210657055cd13d81369c749ca73725d3826d74 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran <47532440+swamirishi@users.noreply.github.com> Date: Thu, 12 Sep 2024 13:32:28 -0700 Subject: [PATCH 12/43] HDDS-11440. Add a lastTransactionInfo field in SnapshotInfo to check for transactions in flight on the snapshot (#7179) --- .../hadoop/hdds/utils/TransactionInfo.java | 11 ++- .../hadoop/ozone/om/helpers/SnapshotInfo.java | 57 +++++++----- .../src/main/proto/OmClientProtocol.proto | 1 + .../hadoop/ozone/om/OmSnapshotManager.java | 36 ++++++++ .../key/OMDirectoriesPurgeRequestWithFSO.java | 6 ++ .../om/request/key/OMKeyPurgeRequest.java | 29 ++++-- .../snapshot/OMSnapshotCreateRequest.java | 3 +- .../OMSnapshotMoveDeletedKeysRequest.java | 26 ++++-- .../snapshot/OMSnapshotPurgeRequest.java | 10 +- .../OMDirectoriesPurgeResponseWithFSO.java | 4 +- .../om/response/key/OMKeyPurgeResponse.java | 4 +- .../OMSnapshotMoveDeletedKeysResponse.java | 5 + ...tOMDirectoriesPurgeRequestAndResponse.java | 91 +++++++++++++++---- .../key/TestOMKeyPurgeRequestAndResponse.java | 48 +++------- .../om/request/key/TestOMKeyRequest.java | 39 +++++++- .../snapshot/TestOMSnapshotCreateRequest.java | 4 +- ...TestOMSnapshotPurgeRequestAndResponse.java | 36 ++++++-- .../TestOMSnapshotCreateResponse.java | 4 +- .../ozone/om/snapshot/TestSnapshotInfo.java | 47 ++++++++++ 19 files changed, 351 insertions(+), 110 deletions(-) diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/TransactionInfo.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/TransactionInfo.java index e7c4ec4ce3d..29531f31518 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/TransactionInfo.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/TransactionInfo.java @@ -24,6 +24,7 @@ import java.util.List; import java.util.Objects; +import com.google.protobuf.ByteString; import org.apache.hadoop.hdds.utils.db.Codec; import org.apache.hadoop.hdds.utils.db.DelegatedCodec; import org.apache.hadoop.hdds.utils.db.StringCodec; @@ -162,7 +163,15 @@ public String toString() { */ public static TransactionInfo readTransactionInfo( DBStoreHAManager metadataManager) throws IOException { - return metadataManager.getTransactionInfoTable().get(TRANSACTION_INFO_KEY); + return metadataManager.getTransactionInfoTable().getSkipCache(TRANSACTION_INFO_KEY); + } + + public ByteString toByteString() throws IOException { + return ByteString.copyFrom(getCodec().toPersistedFormat(this)); + } + + public static TransactionInfo fromByteString(ByteString byteString) throws IOException { + return byteString == null ? null : getCodec().fromPersistedFormat(byteString.toByteArray()); } public SnapshotInfo toSnapshotInfo() { diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/SnapshotInfo.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/SnapshotInfo.java index 47a48c37e8e..8584796c2e9 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/SnapshotInfo.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/SnapshotInfo.java @@ -19,6 +19,7 @@ */ import com.google.common.annotations.VisibleForTesting; +import com.google.protobuf.ByteString; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.hdds.utils.db.Codec; import org.apache.hadoop.hdds.utils.db.CopyObject; @@ -124,6 +125,7 @@ public static SnapshotStatus valueOf(SnapshotStatusProto status) { private long exclusiveSize; private long exclusiveReplicatedSize; private boolean deepCleanedDeletedDir; + private ByteString lastTransactionInfo; private SnapshotInfo(Builder b) { this.snapshotId = b.snapshotId; @@ -145,6 +147,7 @@ private SnapshotInfo(Builder b) { this.exclusiveSize = b.exclusiveSize; this.exclusiveReplicatedSize = b.exclusiveReplicatedSize; this.deepCleanedDeletedDir = b.deepCleanedDeletedDir; + this.lastTransactionInfo = b.lastTransactionInfo; } public void setName(String name) { @@ -261,13 +264,15 @@ public SnapshotInfo.Builder toBuilder() { .setGlobalPreviousSnapshotId(globalPreviousSnapshotId) .setSnapshotPath(snapshotPath) .setCheckpointDir(checkpointDir) + .setDbTxSequenceNumber(dbTxSequenceNumber) .setDeepClean(deepClean) .setSstFiltered(sstFiltered) .setReferencedSize(referencedSize) .setReferencedReplicatedSize(referencedReplicatedSize) .setExclusiveSize(exclusiveSize) .setExclusiveReplicatedSize(exclusiveReplicatedSize) - .setDeepCleanedDeletedDir(deepCleanedDeletedDir); + .setDeepCleanedDeletedDir(deepCleanedDeletedDir) + .setLastTransactionInfo(lastTransactionInfo); } /** @@ -293,6 +298,7 @@ public static class Builder { private long exclusiveSize; private long exclusiveReplicatedSize; private boolean deepCleanedDeletedDir; + private ByteString lastTransactionInfo; public Builder() { // default values @@ -411,6 +417,11 @@ public Builder setDeepCleanedDeletedDir(boolean deepCleanedDeletedDir) { return this; } + public Builder setLastTransactionInfo(ByteString lastTransactionInfo) { + this.lastTransactionInfo = lastTransactionInfo; + return this; + } + public SnapshotInfo build() { Preconditions.checkNotNull(name); return new SnapshotInfo(this); @@ -445,6 +456,10 @@ public OzoneManagerProtocolProtos.SnapshotInfo getProtobuf() { sib.setGlobalPreviousSnapshotID(toProtobuf(globalPreviousSnapshotId)); } + if (lastTransactionInfo != null) { + sib.setLastTransactionInfo(lastTransactionInfo); + } + sib.setSnapshotPath(snapshotPath) .setCheckpointDir(checkpointDir) .setDbTxSequenceNumber(dbTxSequenceNumber) @@ -513,6 +528,10 @@ public static SnapshotInfo getFromProtobuf( snapshotInfoProto.getDeepCleanedDeletedDir()); } + if (snapshotInfoProto.hasLastTransactionInfo()) { + osib.setLastTransactionInfo(snapshotInfoProto.getLastTransactionInfo()); + } + osib.setSnapshotPath(snapshotInfoProto.getSnapshotPath()) .setCheckpointDir(snapshotInfoProto.getCheckpointDir()) .setDbTxSequenceNumber(snapshotInfoProto.getDbTxSequenceNumber()); @@ -605,6 +624,14 @@ public void setDeepCleanedDeletedDir(boolean deepCleanedDeletedDir) { this.deepCleanedDeletedDir = deepCleanedDeletedDir; } + public ByteString getLastTransactionInfo() { + return lastTransactionInfo; + } + + public void setLastTransactionInfo(ByteString lastTransactionInfo) { + this.lastTransactionInfo = lastTransactionInfo; + } + /** * Generate default name of snapshot, (used if user doesn't provide one). */ @@ -673,7 +700,8 @@ public boolean equals(Object o) { referencedReplicatedSize == that.referencedReplicatedSize && exclusiveSize == that.exclusiveSize && exclusiveReplicatedSize == that.exclusiveReplicatedSize && - deepCleanedDeletedDir == that.deepCleanedDeletedDir; + deepCleanedDeletedDir == that.deepCleanedDeletedDir && + Objects.equals(lastTransactionInfo, that.lastTransactionInfo); } @Override @@ -684,7 +712,7 @@ public int hashCode() { globalPreviousSnapshotId, snapshotPath, checkpointDir, deepClean, sstFiltered, referencedSize, referencedReplicatedSize, - exclusiveSize, exclusiveReplicatedSize, deepCleanedDeletedDir); + exclusiveSize, exclusiveReplicatedSize, deepCleanedDeletedDir, lastTransactionInfo); } /** @@ -692,27 +720,7 @@ public int hashCode() { */ @Override public SnapshotInfo copyObject() { - return new Builder() - .setSnapshotId(snapshotId) - .setName(name) - .setVolumeName(volumeName) - .setBucketName(bucketName) - .setSnapshotStatus(snapshotStatus) - .setCreationTime(creationTime) - .setDeletionTime(deletionTime) - .setPathPreviousSnapshotId(pathPreviousSnapshotId) - .setGlobalPreviousSnapshotId(globalPreviousSnapshotId) - .setSnapshotPath(snapshotPath) - .setCheckpointDir(checkpointDir) - .setDbTxSequenceNumber(dbTxSequenceNumber) - .setDeepClean(deepClean) - .setSstFiltered(sstFiltered) - .setReferencedSize(referencedSize) - .setReferencedReplicatedSize(referencedReplicatedSize) - .setExclusiveSize(exclusiveSize) - .setExclusiveReplicatedSize(exclusiveReplicatedSize) - .setDeepCleanedDeletedDir(deepCleanedDeletedDir) - .build(); + return this.toBuilder().build(); } @Override @@ -737,6 +745,7 @@ public String toString() { ", exclusiveSize: '" + exclusiveSize + '\'' + ", exclusiveReplicatedSize: '" + exclusiveReplicatedSize + '\'' + ", deepCleanedDeletedDir: '" + deepCleanedDeletedDir + '\'' + + ", lastTransactionInfo: '" + lastTransactionInfo + '\'' + '}'; } } diff --git a/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto b/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto index e79797993c1..eefcfa7552c 100644 --- a/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto +++ b/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto @@ -886,6 +886,7 @@ message SnapshotInfo { optional uint64 exclusiveReplicatedSize = 18; // note: shared sizes can be calculated from: referenced - exclusive optional bool deepCleanedDeletedDir = 19; + optional bytes lastTransactionInfo = 20; } message SnapshotDiffJobProto { diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java index 18b29118182..dde5b22e793 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java @@ -41,6 +41,7 @@ import org.apache.hadoop.hdds.StringUtils; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.server.ServerUtils; +import org.apache.hadoop.hdds.utils.TransactionInfo; import org.apache.hadoop.hdds.utils.db.BatchOperation; import org.apache.hadoop.hdds.utils.db.CodecRegistry; import org.apache.hadoop.hdds.utils.db.DBCheckpoint; @@ -674,6 +675,41 @@ private ReferenceCounted getSnapshot(String snapshotTableKey, boolea return snapshotCache.get(snapshotInfo.getSnapshotId()); } + /** + * Checks if the last transaction performed on the snapshot has been flushed to disk. + * @param metadataManager Metadatamanager of Active OM. + * @param snapshotTableKey table key corresponding to snapshot in snapshotInfoTable. + * @return True if the changes have been flushed to DB otherwise false + * @throws IOException + */ + public static boolean areSnapshotChangesFlushedToDB(OMMetadataManager metadataManager, String snapshotTableKey) + throws IOException { + // Need this info from cache since the snapshot could have been updated only on cache and not on disk. + SnapshotInfo snapshotInfo = metadataManager.getSnapshotInfoTable().get(snapshotTableKey); + return areSnapshotChangesFlushedToDB(metadataManager, snapshotInfo); + } + + /** + * Checks if the last transaction performed on the snapshot has been flushed to disk. + * @param metadataManager Metadatamanager of Active OM. + * @param snapshotInfo SnapshotInfo value. + * @return True if the changes have been flushed to DB otherwise false. It would return true if the snapshot + * provided is null meaning the snapshot doesn't exist. + * @throws IOException + */ + public static boolean areSnapshotChangesFlushedToDB(OMMetadataManager metadataManager, SnapshotInfo snapshotInfo) + throws IOException { + if (snapshotInfo != null) { + TransactionInfo snapshotTransactionInfo = snapshotInfo.getLastTransactionInfo() != null ? + TransactionInfo.fromByteString(snapshotInfo.getLastTransactionInfo()) : null; + TransactionInfo omTransactionInfo = TransactionInfo.readTransactionInfo(metadataManager); + // If transactionInfo field is null then return true to keep things backward compatible. + return snapshotTransactionInfo == null || omTransactionInfo.compareTo(snapshotTransactionInfo) >= 0; + } + return true; + } + + /** * Returns OmSnapshot object and skips active check. * This should only be used for API calls initiated by background service e.g. purgeKeys, purgeSnapshot, diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMDirectoriesPurgeRequestWithFSO.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMDirectoriesPurgeRequestWithFSO.java index cb10c0d2e40..dd08ff17165 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMDirectoriesPurgeRequestWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMDirectoriesPurgeRequestWithFSO.java @@ -25,6 +25,7 @@ import java.util.Map; import java.util.Set; import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.hdds.utils.TransactionInfo; import org.apache.hadoop.hdds.utils.db.cache.CacheKey; import org.apache.hadoop.hdds.utils.db.cache.CacheValue; import org.apache.hadoop.ozone.OzoneConsts; @@ -149,6 +150,11 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn } } } + if (fromSnapshotInfo != null) { + fromSnapshotInfo.setLastTransactionInfo(TransactionInfo.valueOf(termIndex).toByteString()); + omMetadataManager.getSnapshotInfoTable().addCacheEntry(new CacheKey<>(fromSnapshotInfo.getTableKey()), + CacheValue.get(termIndex.getIndex(), fromSnapshotInfo)); + } } catch (IOException ex) { // Case of IOException for fromProtobuf will not happen // as this is created and send within OM diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyPurgeRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyPurgeRequest.java index 5738d7945bf..14c80bb7a93 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyPurgeRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyPurgeRequest.java @@ -21,6 +21,10 @@ import java.io.IOException; import java.util.ArrayList; +import org.apache.hadoop.hdds.utils.TransactionInfo; +import org.apache.hadoop.hdds.utils.db.cache.CacheKey; +import org.apache.hadoop.hdds.utils.db.cache.CacheValue; +import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; import org.apache.hadoop.ozone.om.snapshot.SnapshotUtils; import org.apache.ratis.server.protocol.TermIndex; import org.apache.hadoop.ozone.om.OzoneManager; @@ -61,6 +65,7 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn String fromSnapshot = purgeKeysRequest.hasSnapshotTableKey() ? purgeKeysRequest.getSnapshotTableKey() : null; List keysToBePurgedList = new ArrayList<>(); + OmMetadataManagerImpl omMetadataManager = (OmMetadataManagerImpl) ozoneManager.getMetadataManager(); OMResponse.Builder omResponse = OmResponseUtil.getOMResponseBuilder( getOmRequest()); @@ -71,17 +76,27 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn keysToBePurgedList.add(deletedKey); } } + final SnapshotInfo fromSnapshotInfo; try { - SnapshotInfo fromSnapshotInfo = null; - if (fromSnapshot != null) { - fromSnapshotInfo = SnapshotUtils.getSnapshotInfo(ozoneManager, fromSnapshot); - } - omClientResponse = new OMKeyPurgeResponse(omResponse.build(), - keysToBePurgedList, fromSnapshotInfo, keysToUpdateList); + fromSnapshotInfo = fromSnapshot == null ? null : SnapshotUtils.getSnapshotInfo(ozoneManager, fromSnapshot); } catch (IOException ex) { - omClientResponse = new OMKeyPurgeResponse(createErrorOMResponse(omResponse, ex)); + return new OMKeyPurgeResponse(createErrorOMResponse(omResponse, ex)); + } + + // Setting transaction info for snapshot, this is to prevent duplicate purge requests to OM from background + // services. + try { + if (fromSnapshotInfo != null) { + fromSnapshotInfo.setLastTransactionInfo(TransactionInfo.valueOf(termIndex).toByteString()); + omMetadataManager.getSnapshotInfoTable().addCacheEntry(new CacheKey<>(fromSnapshotInfo.getTableKey()), + CacheValue.get(termIndex.getIndex(), fromSnapshotInfo)); + } + } catch (IOException e) { + return new OMKeyPurgeResponse(createErrorOMResponse(omResponse, e)); } + omClientResponse = new OMKeyPurgeResponse(omResponse.build(), keysToBePurgedList, fromSnapshotInfo, + keysToUpdateList); return omClientResponse; } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java index 3aa4151cea3..2ded4f6a83e 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java @@ -21,6 +21,7 @@ import org.apache.hadoop.hdds.client.DefaultReplicationConfig; import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.utils.TransactionInfo; import org.apache.ratis.server.protocol.TermIndex; import org.apache.hadoop.hdds.utils.db.RDBStore; import org.apache.hadoop.hdds.utils.db.cache.CacheKey; @@ -166,7 +167,7 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn ((RDBStore) omMetadataManager.getStore()).getDb() .getLatestSequenceNumber(); snapshotInfo.setDbTxSequenceNumber(dbLatestSequenceNumber); - + snapshotInfo.setLastTransactionInfo(TransactionInfo.valueOf(termIndex).toByteString()); // Snapshot referenced size should be bucket's used bytes OmBucketInfo omBucketInfo = getBucketInfo(omMetadataManager, volumeName, bucketName); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotMoveDeletedKeysRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotMoveDeletedKeysRequest.java index df4781bb0ca..58fdb1232d3 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotMoveDeletedKeysRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotMoveDeletedKeysRequest.java @@ -20,6 +20,9 @@ package org.apache.hadoop.ozone.om.request.snapshot; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.utils.TransactionInfo; +import org.apache.hadoop.hdds.utils.db.cache.CacheKey; +import org.apache.hadoop.hdds.utils.db.cache.CacheValue; import org.apache.ratis.server.protocol.TermIndex; import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; import org.apache.hadoop.ozone.om.OzoneManager; @@ -82,15 +85,20 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn nextSnapshot = SnapshotUtils.getNextActiveSnapshot(fromSnapshot, snapshotChainManager, ozoneManager); // Get next non-deleted snapshot. - List nextDBKeysList = - moveDeletedKeysRequest.getNextDBKeysList(); - List reclaimKeysList = - moveDeletedKeysRequest.getReclaimKeysList(); - List renamedKeysList = - moveDeletedKeysRequest.getRenamedKeysList(); - List movedDirs = - moveDeletedKeysRequest.getDeletedDirsToMoveList(); - + List nextDBKeysList = moveDeletedKeysRequest.getNextDBKeysList(); + List reclaimKeysList = moveDeletedKeysRequest.getReclaimKeysList(); + List renamedKeysList = moveDeletedKeysRequest.getRenamedKeysList(); + List movedDirs = moveDeletedKeysRequest.getDeletedDirsToMoveList(); + + // Update lastTransactionInfo for fromSnapshot and the nextSnapshot. + fromSnapshot.setLastTransactionInfo(TransactionInfo.valueOf(termIndex).toByteString()); + omMetadataManager.getSnapshotInfoTable().addCacheEntry(new CacheKey<>(fromSnapshot.getTableKey()), + CacheValue.get(termIndex.getIndex(), fromSnapshot)); + if (nextSnapshot != null) { + nextSnapshot.setLastTransactionInfo(TransactionInfo.valueOf(termIndex).toByteString()); + omMetadataManager.getSnapshotInfoTable().addCacheEntry(new CacheKey<>(nextSnapshot.getTableKey()), + CacheValue.get(termIndex.getIndex(), nextSnapshot)); + } omClientResponse = new OMSnapshotMoveDeletedKeysResponse( omResponse.build(), fromSnapshot, nextSnapshot, nextDBKeysList, reclaimKeysList, renamedKeysList, movedDirs); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotPurgeRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotPurgeRequest.java index 47304b416ae..6602f52514b 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotPurgeRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotPurgeRequest.java @@ -19,6 +19,7 @@ package org.apache.hadoop.ozone.om.request.snapshot; +import org.apache.hadoop.hdds.utils.TransactionInfo; import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OMMetrics; import org.apache.ratis.server.protocol.TermIndex; @@ -110,9 +111,16 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn updateSnapshotInfoAndCache(nextSnapshot, omMetadataManager, trxnLogIndex); // Step 2: Update the snapshot chain. updateSnapshotChainAndCache(omMetadataManager, fromSnapshot, trxnLogIndex); - // Step 3: Purge the snapshot from SnapshotInfoTable cache. + // Step 3: Purge the snapshot from SnapshotInfoTable cache and also remove from the map. omMetadataManager.getSnapshotInfoTable() .addCacheEntry(new CacheKey<>(fromSnapshot.getTableKey()), CacheValue.get(trxnLogIndex)); + updatedSnapshotInfos.remove(fromSnapshot.getTableKey()); + } + + for (SnapshotInfo snapshotInfo : updatedSnapshotInfos.values()) { + snapshotInfo.setLastTransactionInfo(TransactionInfo.valueOf(termIndex).toByteString()); + omMetadataManager.getSnapshotInfoTable().addCacheEntry(new CacheKey<>(snapshotInfo.getTableKey()), + CacheValue.get(termIndex.getIndex(), snapshotInfo)); } omClientResponse = new OMSnapshotPurgeResponse(omResponse.build(), snapshotDbKeys, updatedSnapshotInfos); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMDirectoriesPurgeResponseWithFSO.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMDirectoriesPurgeResponseWithFSO.java index 138e942e2b6..28c3e3d758e 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMDirectoriesPurgeResponseWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMDirectoriesPurgeResponseWithFSO.java @@ -48,12 +48,13 @@ import static org.apache.hadoop.ozone.om.OmMetadataManagerImpl.DELETED_TABLE; import static org.apache.hadoop.ozone.om.OmMetadataManagerImpl.DIRECTORY_TABLE; import static org.apache.hadoop.ozone.om.OmMetadataManagerImpl.FILE_TABLE; +import static org.apache.hadoop.ozone.om.OmMetadataManagerImpl.SNAPSHOT_INFO_TABLE; /** * Response for {@link OMDirectoriesPurgeRequestWithFSO} request. */ @CleanupTableInfo(cleanupTables = {DELETED_TABLE, DELETED_DIR_TABLE, - DIRECTORY_TABLE, FILE_TABLE}) + DIRECTORY_TABLE, FILE_TABLE, SNAPSHOT_INFO_TABLE}) public class OMDirectoriesPurgeResponseWithFSO extends OmKeyResponse { private static final Logger LOG = LoggerFactory.getLogger(OMDirectoriesPurgeResponseWithFSO.class); @@ -97,6 +98,7 @@ public void addToDBBatch(OMMetadataManager metadataManager, fromSnapshotStore.commitBatchOperation(writeBatch); } } + metadataManager.getSnapshotInfoTable().putWithBatch(batchOp, fromSnapshotInfo.getTableKey(), fromSnapshotInfo); } else { processPaths(metadataManager, batchOp); } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMKeyPurgeResponse.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMKeyPurgeResponse.java index b59c7d18d40..cd2f7d190f4 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMKeyPurgeResponse.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMKeyPurgeResponse.java @@ -39,12 +39,13 @@ import jakarta.annotation.Nonnull; import static org.apache.hadoop.ozone.om.OmMetadataManagerImpl.DELETED_TABLE; +import static org.apache.hadoop.ozone.om.OmMetadataManagerImpl.SNAPSHOT_INFO_TABLE; import static org.apache.hadoop.ozone.om.response.snapshot.OMSnapshotMoveDeletedKeysResponse.createRepeatedOmKeyInfo; /** * Response for {@link OMKeyPurgeRequest} request. */ -@CleanupTableInfo(cleanupTables = {DELETED_TABLE}) +@CleanupTableInfo(cleanupTables = {DELETED_TABLE, SNAPSHOT_INFO_TABLE}) public class OMKeyPurgeResponse extends OmKeyResponse { private List purgeKeyList; private SnapshotInfo fromSnapshot; @@ -90,6 +91,7 @@ public void addToDBBatch(OMMetadataManager omMetadataManager, fromSnapshotStore.commitBatchOperation(writeBatch); } } + omMetadataManager.getSnapshotInfoTable().putWithBatch(batchOperation, fromSnapshot.getTableKey(), fromSnapshot); } else { processKeys(batchOperation, omMetadataManager); processKeysToUpdate(batchOperation, omMetadataManager); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/snapshot/OMSnapshotMoveDeletedKeysResponse.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/snapshot/OMSnapshotMoveDeletedKeysResponse.java index 3b5a7454f9d..f39d5827a0c 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/snapshot/OMSnapshotMoveDeletedKeysResponse.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/snapshot/OMSnapshotMoveDeletedKeysResponse.java @@ -133,6 +133,11 @@ protected void addToDBBatch(OMMetadataManager omMetadataManager, } } + // Flush snapshot info to rocksDB. + omMetadataManager.getSnapshotInfoTable().putWithBatch(batchOperation, fromSnapshot.getTableKey(), fromSnapshot); + if (nextSnapshot != null) { + omMetadataManager.getSnapshotInfoTable().putWithBatch(batchOperation, nextSnapshot.getTableKey(), nextSnapshot); + } } private void deleteDirsFromSnapshot(BatchOperation batchOp, diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMDirectoriesPurgeRequestAndResponse.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMDirectoriesPurgeRequestAndResponse.java index cbb782e184f..9eb8738b9d4 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMDirectoriesPurgeRequestAndResponse.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMDirectoriesPurgeRequestAndResponse.java @@ -26,18 +26,23 @@ import java.util.List; import java.util.UUID; import org.apache.hadoop.hdds.client.BlockID; +import org.apache.hadoop.hdds.utils.TransactionInfo; import org.apache.hadoop.hdds.utils.db.BatchOperation; import org.apache.hadoop.hdds.utils.db.cache.CacheKey; import org.apache.hadoop.hdds.utils.db.cache.CacheValue; import org.apache.hadoop.ozone.ClientVersion; +import org.apache.hadoop.ozone.om.OMMetadataManager; +import org.apache.hadoop.ozone.om.OmSnapshot; import org.apache.hadoop.ozone.om.helpers.BucketLayout; import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup; +import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; import org.apache.hadoop.ozone.om.request.OMRequestTestUtils; import org.apache.hadoop.ozone.om.response.key.OMDirectoriesPurgeResponseWithFSO; import org.apache.hadoop.ozone.om.response.key.OMKeyPurgeResponse; +import org.apache.hadoop.ozone.om.snapshot.ReferenceCounted; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; import jakarta.annotation.Nonnull; @@ -109,7 +114,7 @@ private void updateBlockInfo(OmKeyInfo omKeyInfo) throws IOException { * Create OMRequest which encapsulates DeleteKeyRequest. * @return OMRequest */ - private OMRequest createPurgeKeysRequest(String purgeDeletedDir, + private OMRequest createPurgeKeysRequest(String fromSnapshot, String purgeDeletedDir, List keyList, OmBucketInfo bucketInfo) throws IOException { List purgePathRequestList = new ArrayList<>(); @@ -127,7 +132,9 @@ private OMRequest createPurgeKeysRequest(String purgeDeletedDir, OzoneManagerProtocolProtos.PurgeDirectoriesRequest.Builder purgeDirRequest = OzoneManagerProtocolProtos.PurgeDirectoriesRequest.newBuilder(); purgeDirRequest.addAllDeletedPath(purgePathRequestList); - + if (fromSnapshot != null) { + purgeDirRequest.setSnapshotTableKey(fromSnapshot); + } OzoneManagerProtocolProtos.OMRequest omRequest = OzoneManagerProtocolProtos.OMRequest.newBuilder() .setCmdType(OzoneManagerProtocolProtos.Type.PurgeDirectories) @@ -138,8 +145,7 @@ private OMRequest createPurgeKeysRequest(String purgeDeletedDir, } private OzoneManagerProtocolProtos.PurgePathRequest wrapPurgeRequest( final long volumeId, final long bucketId, final String purgeDeletedDir, - final List purgeDeletedFiles, - final List markDirsAsDeleted) { + final List purgeDeletedFiles, final List markDirsAsDeleted) { // Put all keys to be purged in a list OzoneManagerProtocolProtos.PurgePathRequest.Builder purgePathsRequest = OzoneManagerProtocolProtos.PurgePathRequest.newBuilder(); @@ -182,13 +188,13 @@ public void testValidateAndUpdateCacheCheckQuota() throws Exception { // Create and Delete keys. The keys should be moved to DeletedKeys table List deletedKeyInfos = createAndDeleteKeys(1, null); // The keys should be present in the DeletedKeys table before purging - List deletedKeyNames = validateDeletedKeysTable(deletedKeyInfos); + List deletedKeyNames = validateDeletedKeysTable(omMetadataManager, deletedKeyInfos, true); // Create PurgeKeysRequest to purge the deleted keys String bucketKey = omMetadataManager.getBucketKey(volumeName, bucketName); OmBucketInfo omBucketInfo = omMetadataManager.getBucketTable().get( bucketKey); - OMRequest omRequest = createPurgeKeysRequest( + OMRequest omRequest = createPurgeKeysRequest(null, null, deletedKeyInfos, omBucketInfo); OMRequest preExecutedRequest = preExecute(omRequest); OMDirectoriesPurgeRequestWithFSO omKeyPurgeRequest = @@ -205,7 +211,59 @@ public void testValidateAndUpdateCacheCheckQuota() throws Exception { performBatchOperationCommit(omClientResponse); // The keys should exist in the DeletedKeys table after dir delete - validateDeletedKeys(deletedKeyNames); + validateDeletedKeys(omMetadataManager, deletedKeyNames); + } + + @Test + public void testValidateAndUpdateCacheSnapshotLastTransactionInfoUpdated() throws Exception { + // Create and Delete keys. The keys should be moved to DeletedKeys table + List deletedKeyInfos = createAndDeleteKeys(1, null); + // The keys should be present in the DeletedKeys table before purging + List deletedKeyNames = validateDeletedKeysTable(omMetadataManager, deletedKeyInfos, true); + + String snapshotName = "snap1"; + SnapshotInfo snapshotInfo = createSnapshot(snapshotName); + ReferenceCounted rcOmSnapshot = ozoneManager.getOmSnapshotManager() + .getSnapshot(snapshotInfo.getVolumeName(), snapshotInfo.getBucketName(), snapshotInfo.getName()); + // Keys should be present in snapshot + validateDeletedKeysTable(rcOmSnapshot.get().getMetadataManager(), deletedKeyInfos, true); + // keys should have been moved from AOS + validateDeletedKeysTable(omMetadataManager, deletedKeyInfos, false); + + // Create PurgeKeysRequest to purge the deleted keys + assertEquals(snapshotInfo.getLastTransactionInfo(), + TransactionInfo.valueOf(TransactionInfo.getTermIndex(1L)).toByteString()); + String bucketKey = omMetadataManager.getBucketKey(volumeName, bucketName); + OmBucketInfo omBucketInfo = omMetadataManager.getBucketTable().get( + bucketKey); + OMRequest omRequest = createPurgeKeysRequest(snapshotInfo.getTableKey(), + null, deletedKeyInfos, omBucketInfo); + OMRequest preExecutedRequest = preExecute(omRequest); + OMDirectoriesPurgeRequestWithFSO omKeyPurgeRequest = + new OMDirectoriesPurgeRequestWithFSO(preExecutedRequest); + + assertEquals(1000L * deletedKeyNames.size(), omBucketInfo.getUsedBytes()); + OMDirectoriesPurgeResponseWithFSO omClientResponse + = (OMDirectoriesPurgeResponseWithFSO) omKeyPurgeRequest + .validateAndUpdateCache(ozoneManager, 100L); + + SnapshotInfo snapshotInfoOnDisk = omMetadataManager.getSnapshotInfoTable().getSkipCache(snapshotInfo.getTableKey()); + SnapshotInfo updatedSnapshotInfo = omMetadataManager.getSnapshotInfoTable().get(snapshotInfo.getTableKey()); + + assertEquals(snapshotInfoOnDisk, snapshotInfo); + snapshotInfo.setLastTransactionInfo(TransactionInfo.valueOf(TransactionInfo.getTermIndex(100L)) + .toByteString()); + assertEquals(snapshotInfo, updatedSnapshotInfo); + omBucketInfo = omMetadataManager.getBucketTable().get(bucketKey); + assertEquals(0L * deletedKeyNames.size(), omBucketInfo.getUsedBytes()); + + performBatchOperationCommit(omClientResponse); + + // The keys should exist in the DeletedKeys table after dir delete + validateDeletedKeys(rcOmSnapshot.get().getMetadataManager(), deletedKeyNames); + snapshotInfoOnDisk = omMetadataManager.getSnapshotInfoTable().getSkipCache(snapshotInfo.getTableKey()); + assertEquals(snapshotInfo, snapshotInfoOnDisk); + rcOmSnapshot.close(); } @Test @@ -214,13 +272,13 @@ public void testValidateAndUpdateCacheQuotaBucketRecreated() // Create and Delete keys. The keys should be moved to DeletedKeys table List deletedKeyInfos = createAndDeleteKeys(1, null); // The keys should be present in the DeletedKeys table before purging - List deletedKeyNames = validateDeletedKeysTable(deletedKeyInfos); + List deletedKeyNames = validateDeletedKeysTable(omMetadataManager, deletedKeyInfos, true); // Create PurgeKeysRequest to purge the deleted keys String bucketKey = omMetadataManager.getBucketKey(volumeName, bucketName); OmBucketInfo omBucketInfo = omMetadataManager.getBucketTable().get( bucketKey); - OMRequest omRequest = createPurgeKeysRequest( + OMRequest omRequest = createPurgeKeysRequest(null, null, deletedKeyInfos, omBucketInfo); OMRequest preExecutedRequest = preExecute(omRequest); OMDirectoriesPurgeRequestWithFSO omKeyPurgeRequest = @@ -258,35 +316,32 @@ public void testValidateAndUpdateCacheQuotaBucketRecreated() performBatchOperationCommit(omClientResponse); // The keys should exist in the DeletedKeys table after dir delete - validateDeletedKeys(deletedKeyNames); + validateDeletedKeys(omMetadataManager, deletedKeyNames); } - private void performBatchOperationCommit( - OMDirectoriesPurgeResponseWithFSO omClientResponse) throws IOException { + private void performBatchOperationCommit(OMDirectoriesPurgeResponseWithFSO omClientResponse) throws IOException { try (BatchOperation batchOperation = omMetadataManager.getStore().initBatchOperation()) { - omClientResponse.addToDBBatch(omMetadataManager, batchOperation); - // Do manual commit and see whether addToBatch is successful or not. omMetadataManager.getStore().commitBatchOperation(batchOperation); } } @Nonnull - private List validateDeletedKeysTable( - List deletedKeyInfos) throws IOException { + private List validateDeletedKeysTable(OMMetadataManager omMetadataManager, + List deletedKeyInfos, boolean keyExists) throws IOException { List deletedKeyNames = new ArrayList<>(); for (OmKeyInfo deletedKey : deletedKeyInfos) { String keyName = omMetadataManager.getOzoneKey(deletedKey.getVolumeName(), deletedKey.getBucketName(), deletedKey.getKeyName()); - assertTrue(omMetadataManager.getDeletedTable().isExist(keyName)); + assertEquals(omMetadataManager.getDeletedTable().isExist(keyName), keyExists); deletedKeyNames.add(keyName); } return deletedKeyNames; } - private void validateDeletedKeys( + private void validateDeletedKeys(OMMetadataManager omMetadataManager, List deletedKeyNames) throws IOException { for (String deletedKey : deletedKeyNames) { assertTrue(omMetadataManager.getDeletedTable().isExist( diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyPurgeRequestAndResponse.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyPurgeRequestAndResponse.java index 2cd0de920be..c323fecd501 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyPurgeRequestAndResponse.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyPurgeRequestAndResponse.java @@ -23,12 +23,10 @@ import java.util.List; import java.util.UUID; +import org.apache.hadoop.hdds.utils.TransactionInfo; import org.apache.hadoop.ozone.om.OmSnapshot; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; import org.apache.hadoop.ozone.om.request.OMRequestTestUtils; -import org.apache.hadoop.ozone.om.request.snapshot.OMSnapshotCreateRequest; -import org.apache.hadoop.ozone.om.request.snapshot.TestOMSnapshotCreateRequest; -import org.apache.hadoop.ozone.om.response.snapshot.OMSnapshotCreateResponse; import org.apache.hadoop.ozone.om.snapshot.ReferenceCounted; import org.junit.jupiter.api.Test; @@ -42,12 +40,10 @@ import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type; import org.apache.hadoop.hdds.utils.db.BatchOperation; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.Mockito.any; -import static org.mockito.Mockito.when; /** * Tests {@link OMKeyPurgeRequest} and {@link OMKeyPurgeResponse}. @@ -115,35 +111,6 @@ private OMRequest createPurgeKeysRequest(List deletedKeys, .build(); } - /** - * Create snapshot and checkpoint directory. - */ - private SnapshotInfo createSnapshot(String snapshotName) throws Exception { - when(ozoneManager.isAdmin(any())).thenReturn(true); - BatchOperation batchOperation = omMetadataManager.getStore() - .initBatchOperation(); - OMRequest omRequest = OMRequestTestUtils - .createSnapshotRequest(volumeName, bucketName, snapshotName); - // Pre-Execute OMSnapshotCreateRequest. - OMSnapshotCreateRequest omSnapshotCreateRequest = - TestOMSnapshotCreateRequest.doPreExecute(omRequest, ozoneManager); - - // validateAndUpdateCache OMSnapshotCreateResponse. - OMSnapshotCreateResponse omClientResponse = (OMSnapshotCreateResponse) - omSnapshotCreateRequest.validateAndUpdateCache(ozoneManager, 1L); - // Add to batch and commit to DB. - omClientResponse.addToDBBatch(omMetadataManager, batchOperation); - omMetadataManager.getStore().commitBatchOperation(batchOperation); - batchOperation.close(); - - String key = SnapshotInfo.getTableKey(volumeName, - bucketName, snapshotName); - SnapshotInfo snapshotInfo = - omMetadataManager.getSnapshotInfoTable().get(key); - assertNotNull(snapshotInfo); - return snapshotInfo; - } - private OMRequest preExecute(OMRequest originalOmRequest) throws IOException { OMKeyPurgeRequest omKeyPurgeRequest = new OMKeyPurgeRequest(originalOmRequest); @@ -205,6 +172,8 @@ public void testKeyPurgeInSnapshot() throws Exception { List deletedKeyNames = createAndDeleteKeys(1, null); SnapshotInfo snapInfo = createSnapshot("snap1"); + assertEquals(snapInfo.getLastTransactionInfo(), + TransactionInfo.valueOf(TransactionInfo.getTermIndex(1L)).toByteString()); // The keys should be not present in the active Db's deletedTable for (String deletedKey : deletedKeyNames) { assertFalse(omMetadataManager.getDeletedTable().isExist(deletedKey)); @@ -230,6 +199,12 @@ public void testKeyPurgeInSnapshot() throws Exception { omKeyPurgeRequest.validateAndUpdateCache(ozoneManager, 100L); + SnapshotInfo snapshotInfoOnDisk = omMetadataManager.getSnapshotInfoTable().getSkipCache(snapInfo.getTableKey()); + SnapshotInfo updatedSnapshotInfo = omMetadataManager.getSnapshotInfoTable().get(snapInfo.getTableKey()); + assertEquals(snapshotInfoOnDisk, snapInfo); + snapInfo.setLastTransactionInfo(TransactionInfo.valueOf(TransactionInfo.getTermIndex(100L)) + .toByteString()); + assertEquals(snapInfo, updatedSnapshotInfo); OMResponse omResponse = OMResponse.newBuilder() .setPurgeKeysResponse(PurgeKeysResponse.getDefaultInstance()) .setCmdType(Type.PurgeKeys) @@ -245,7 +220,8 @@ public void testKeyPurgeInSnapshot() throws Exception { // Do manual commit and see whether addToBatch is successful or not. omMetadataManager.getStore().commitBatchOperation(batchOperation); } - + snapshotInfoOnDisk = omMetadataManager.getSnapshotInfoTable().getSkipCache(snapInfo.getTableKey()); + assertEquals(snapshotInfoOnDisk, snapInfo); // The keys should not exist in the DeletedKeys table for (String deletedKey : deletedKeyNames) { assertFalse(omSnapshot.getMetadataManager() diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java index f636152c35c..e2219d5fcc1 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList; import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol; +import org.apache.hadoop.hdds.utils.db.BatchOperation; import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.ozone.om.IOmMetadataReader; import org.apache.hadoop.ozone.om.OMPerformanceMetrics; @@ -43,9 +44,15 @@ import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.apache.hadoop.ozone.om.helpers.BucketLayout; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; import org.apache.hadoop.ozone.om.request.OMClientRequest; +import org.apache.hadoop.ozone.om.request.OMRequestTestUtils; +import org.apache.hadoop.ozone.om.request.snapshot.OMSnapshotCreateRequest; +import org.apache.hadoop.ozone.om.request.snapshot.TestOMSnapshotCreateRequest; +import org.apache.hadoop.ozone.om.response.snapshot.OMSnapshotCreateResponse; import org.apache.hadoop.ozone.om.snapshot.ReferenceCounted; import org.apache.hadoop.ozone.om.upgrade.OMLayoutVersionManager; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.KeyArgs; import org.apache.hadoop.ozone.security.acl.OzoneNativeAuthorizer; import org.apache.hadoop.security.UserGroupInformation; @@ -237,7 +244,7 @@ public void setup() throws Exception { .thenReturn(bucket); when(ozoneManager.resolveBucketLink(any(Pair.class))) .thenReturn(bucket); - OmSnapshotManager omSnapshotManager = new OmSnapshotManager(ozoneManager); + OmSnapshotManager omSnapshotManager = Mockito.spy(new OmSnapshotManager(ozoneManager)); when(ozoneManager.getOmSnapshotManager()) .thenReturn(omSnapshotManager); @@ -285,4 +292,34 @@ public void stop() { omMetrics.unRegister(); framework().clearInlineMocks(); } + + /** + * Create snapshot and checkpoint directory. + */ + protected SnapshotInfo createSnapshot(String snapshotName) throws Exception { + when(ozoneManager.isAdmin(any())).thenReturn(true); + BatchOperation batchOperation = omMetadataManager.getStore() + .initBatchOperation(); + OzoneManagerProtocolProtos.OMRequest omRequest = OMRequestTestUtils + .createSnapshotRequest(volumeName, bucketName, snapshotName); + // Pre-Execute OMSnapshotCreateRequest. + OMSnapshotCreateRequest omSnapshotCreateRequest = + TestOMSnapshotCreateRequest.doPreExecute(omRequest, ozoneManager); + + // validateAndUpdateCache OMSnapshotCreateResponse. + OMSnapshotCreateResponse omClientResponse = (OMSnapshotCreateResponse) + omSnapshotCreateRequest.validateAndUpdateCache(ozoneManager, 1L); + // Add to batch and commit to DB. + omClientResponse.addToDBBatch(omMetadataManager, batchOperation); + omMetadataManager.getStore().commitBatchOperation(batchOperation); + batchOperation.close(); + + String key = SnapshotInfo.getTableKey(volumeName, + bucketName, snapshotName); + SnapshotInfo snapshotInfo = + omMetadataManager.getSnapshotInfoTable().get(key); + assertNotNull(snapshotInfo); + return snapshotInfo; + } + } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java index 3997f39d7bd..806c1b90f7f 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java @@ -20,6 +20,7 @@ import org.apache.hadoop.hdds.client.RatisReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.utils.TransactionInfo; import org.apache.hadoop.hdds.utils.db.BatchOperation; import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.ozone.audit.AuditLogger; @@ -229,7 +230,8 @@ public void testValidateAndUpdateCache() throws Exception { omMetadataManager.getSnapshotInfoTable().get(key); assertNotNull(snapshotInfoInCache); assertEquals(snapshotInfoFromProto, snapshotInfoInCache); - + assertEquals(snapshotInfoInCache.getLastTransactionInfo(), + TransactionInfo.valueOf(TransactionInfo.getTermIndex(1L)).toByteString()); assertEquals(0, omMetrics.getNumSnapshotCreateFails()); assertEquals(1, omMetrics.getNumSnapshotActive()); assertEquals(1, omMetrics.getNumSnapshotCreates()); diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotPurgeRequestAndResponse.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotPurgeRequestAndResponse.java index 8edd096e766..d0a5559a87b 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotPurgeRequestAndResponse.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotPurgeRequestAndResponse.java @@ -19,7 +19,9 @@ package org.apache.hadoop.ozone.om.request.snapshot; +import com.google.protobuf.ByteString; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.utils.TransactionInfo; import org.apache.hadoop.hdds.utils.db.BatchOperation; import org.apache.hadoop.hdds.utils.db.RDBStore; import org.apache.hadoop.hdds.utils.db.Table; @@ -57,6 +59,7 @@ import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Random; @@ -450,12 +453,29 @@ public void testSnapshotChainInSnapshotInfoTableAfterSnapshotPurge( .countRowsInTable(omMetadataManager.getSnapshotInfoTable()); assertEquals(totalKeys, numberOfSnapshotBeforePurge); assertEquals(totalKeys, chainManager.getGlobalSnapshotChain().size()); - - validateSnapshotOrderInSnapshotInfoTableAndSnapshotChain(snapshotInfoList); - + Map expectedTransactionInfos = new HashMap<>(); + // Ratis transaction uses term index 1 while creating snapshot. + ByteString expectedLastTransactionVal = TransactionInfo.valueOf(TransactionInfo.getTermIndex(1L)) + .toByteString(); + for (SnapshotInfo snapshotInfo : snapshotInfoList) { + expectedTransactionInfos.put(snapshotInfo.getSnapshotId(), expectedLastTransactionVal); + } + validateSnapshotOrderInSnapshotInfoTableAndSnapshotChain(snapshotInfoList, expectedTransactionInfos); + // Ratis transaction uses term index 200 while purging snapshot. + expectedLastTransactionVal = TransactionInfo.valueOf(TransactionInfo.getTermIndex(200L)) + .toByteString(); List purgeSnapshotKeys = new ArrayList<>(); for (int i = fromIndex; i <= toIndex; i++) { SnapshotInfo purgeSnapshotInfo = snapshotInfoList.get(i); + UUID snapId = purgeSnapshotInfo.getSnapshotId(); + // expecting nextPathSnapshot & nextGlobalSnapshot in chain gets updated. + if (chainManager.hasNextGlobalSnapshot(snapId)) { + expectedTransactionInfos.put(chainManager.nextGlobalSnapshot(snapId), expectedLastTransactionVal); + } + if (chainManager.hasNextPathSnapshot(purgeSnapshotInfo.getSnapshotPath(), snapId)) { + expectedTransactionInfos.put(chainManager.nextPathSnapshot(purgeSnapshotInfo.getSnapshotPath(), snapId), + expectedLastTransactionVal); + } String purgeSnapshotKey = SnapshotInfo.getTableKey(volumeName, purgeSnapshotInfo.getBucketName(), purgeSnapshotInfo.getName()); @@ -484,17 +504,17 @@ public void testSnapshotChainInSnapshotInfoTableAfterSnapshotPurge( actualNumberOfSnapshotAfterPurge); assertEquals(expectNumberOfSnapshotAfterPurge, chainManager .getGlobalSnapshotChain().size()); - validateSnapshotOrderInSnapshotInfoTableAndSnapshotChain( - snapshotInfoListAfterPurge); + validateSnapshotOrderInSnapshotInfoTableAndSnapshotChain(snapshotInfoListAfterPurge, expectedTransactionInfos); } private void validateSnapshotOrderInSnapshotInfoTableAndSnapshotChain( - List snapshotInfoList - ) throws IOException { + List snapshotInfoList, Map expectedTransactionInfos) throws IOException { if (snapshotInfoList.isEmpty()) { return; } - + for (SnapshotInfo snapshotInfo : snapshotInfoList) { + assertEquals(snapshotInfo.getLastTransactionInfo(), expectedTransactionInfos.get(snapshotInfo.getSnapshotId())); + } OmMetadataManagerImpl metadataManager = (OmMetadataManagerImpl) omMetadataManager; SnapshotChainManager chainManager = metadataManager diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotCreateResponse.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotCreateResponse.java index 7f74f3d17ec..a370c20ad1b 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotCreateResponse.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotCreateResponse.java @@ -27,6 +27,7 @@ import java.util.UUID; import org.apache.hadoop.hdds.client.StandaloneReplicationConfig; +import org.apache.hadoop.hdds.utils.TransactionInfo; import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.hdds.utils.db.TableIterator; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; @@ -98,7 +99,8 @@ public void testAddToDBBatch(int numberOfKeys) throws Exception { snapshotName, snapshotId, Time.now()); - + snapshotInfo.setLastTransactionInfo( + TransactionInfo.valueOf(TransactionInfo.getTermIndex(1L)).toByteString()); // confirm table is empty assertEquals(0, omMetadataManager .countRowsInTable(omMetadataManager.getSnapshotInfoTable())); diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotInfo.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotInfo.java index dc00433e179..29e0115861f 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotInfo.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotInfo.java @@ -19,12 +19,18 @@ package org.apache.hadoop.ozone.om.snapshot; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.utils.TransactionInfo; import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.cache.CacheKey; +import org.apache.hadoop.hdds.utils.db.cache.CacheValue; +import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; +import org.apache.hadoop.ozone.om.OmSnapshotManager; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo.SnapshotStatus; import org.apache.hadoop.util.Time; +import org.apache.ratis.server.protocol.TermIndex; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -35,6 +41,7 @@ import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_DB_DIRS; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; /** @@ -114,4 +121,44 @@ public void testSnapshotSSTFilteredFlag() throws Exception { snapshotInfo.put(EXPECTED_SNAPSHOT_KEY, info); assertTrue(snapshotInfo.get(EXPECTED_SNAPSHOT_KEY).isSstFiltered()); } + + @Test + public void testLastTransactionInfo() throws Exception { + Table snapshotInfo = + omMetadataManager.getSnapshotInfoTable(); + SnapshotInfo info = createSnapshotInfo(); + snapshotInfo.put(EXPECTED_SNAPSHOT_KEY, info); + assertNull(snapshotInfo.get(EXPECTED_SNAPSHOT_KEY).getLastTransactionInfo()); + // checking if true value is returned when snapshot is null. + assertTrue(OmSnapshotManager.areSnapshotChangesFlushedToDB(omMetadataManager, (SnapshotInfo)null)); + omMetadataManager.getTransactionInfoTable().put(OzoneConsts.TRANSACTION_INFO_KEY, TransactionInfo.valueOf(0, 0)); + // Checking if changes have been flushed when lastTransactionInfo is null + assertTrue(OmSnapshotManager.areSnapshotChangesFlushedToDB(omMetadataManager, info)); + TermIndex termIndex = TermIndex.valueOf(1, 1); + info.setLastTransactionInfo(TransactionInfo.valueOf(termIndex).toByteString()); + // Checking if changes to snapshot object has been updated but not updated on cache or disk. + assertTrue(OmSnapshotManager.areSnapshotChangesFlushedToDB(omMetadataManager, EXPECTED_SNAPSHOT_KEY)); + snapshotInfo.addCacheEntry(new CacheKey<>(EXPECTED_SNAPSHOT_KEY), CacheValue.get(termIndex.getIndex(), info)); + + assertEquals(snapshotInfo.get(EXPECTED_SNAPSHOT_KEY).getLastTransactionInfo(), info.getLastTransactionInfo()); + + // Checking if changes have not been flushed when snapshot last transaction info is behind OmTransactionTable value. + assertFalse(OmSnapshotManager.areSnapshotChangesFlushedToDB(omMetadataManager, EXPECTED_SNAPSHOT_KEY)); + omMetadataManager.getTransactionInfoTable().addCacheEntry(new CacheKey<>(OzoneConsts.TRANSACTION_INFO_KEY), + CacheValue.get(termIndex.getIndex(), TransactionInfo.valueOf(1, 1))); + assertFalse(OmSnapshotManager.areSnapshotChangesFlushedToDB(omMetadataManager, EXPECTED_SNAPSHOT_KEY)); + + // Checking changes are flushed when transaction is equal. + omMetadataManager.getTransactionInfoTable().put(OzoneConsts.TRANSACTION_INFO_KEY, + TransactionInfo.valueOf(1, 1)); + + + assertTrue(OmSnapshotManager.areSnapshotChangesFlushedToDB(omMetadataManager, EXPECTED_SNAPSHOT_KEY)); + // Checking changes are flushed when transactionIndex is greater . + omMetadataManager.getTransactionInfoTable().put(OzoneConsts.TRANSACTION_INFO_KEY, TransactionInfo.valueOf(1, 2)); + assertTrue(OmSnapshotManager.areSnapshotChangesFlushedToDB(omMetadataManager, EXPECTED_SNAPSHOT_KEY)); + // Checking changes are flushed when both term & transactionIndex is greater. + omMetadataManager.getTransactionInfoTable().put(OzoneConsts.TRANSACTION_INFO_KEY, TransactionInfo.valueOf(2, 2)); + assertTrue(OmSnapshotManager.areSnapshotChangesFlushedToDB(omMetadataManager, EXPECTED_SNAPSHOT_KEY)); + } } From 703c4d50f8982fba16b618fc8e8bb0cf684e0fa0 Mon Sep 17 00:00:00 2001 From: Sadanand Shenoy Date: Fri, 13 Sep 2024 12:27:49 +0530 Subject: [PATCH 13/43] HDDS-10984. Tool to restore SCM certificates from RocksDB. (#6781) --- .../main/compose/ozonesecure-ha/docker-config | 1 + .../ozone/repair/RecoverSCMCertificate.java | 261 ++++++++++++++++++ 2 files changed, 262 insertions(+) create mode 100644 hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/RecoverSCMCertificate.java diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/docker-config b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/docker-config index db517a7f7c6..38cc5b71a18 100644 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/docker-config +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/docker-config @@ -33,6 +33,7 @@ OZONE-SITE.XML_ozone.om.http-address.omservice.om3=om3 OZONE-SITE.XML_ozone.om.ratis.enable=true OZONE-SITE.XML_ozone.scm.service.ids=scmservice +OZONE-SITE.XML_ozone.scm.primordial.node.id=scm1 OZONE-SITE.XML_ozone.scm.nodes.scmservice=scm1,scm2,scm3 OZONE-SITE.XML_ozone.scm.address.scmservice.scm1=scm1.org OZONE-SITE.XML_ozone.scm.address.scmservice.scm2=scm2.org diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/RecoverSCMCertificate.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/RecoverSCMCertificate.java new file mode 100644 index 00000000000..aca41844a18 --- /dev/null +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/RecoverSCMCertificate.java @@ -0,0 +1,261 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.repair; + +import org.apache.hadoop.hdds.cli.SubcommandWithParent; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.security.SecurityConfig; +import org.apache.hadoop.hdds.security.x509.certificate.authority.CAType; +import org.apache.hadoop.hdds.security.x509.certificate.client.SCMCertificateClient; +import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec; +import org.apache.hadoop.hdds.utils.db.DBColumnFamilyDefinition; +import org.apache.hadoop.hdds.utils.db.DBDefinition; +import org.apache.hadoop.hdds.utils.db.managed.ManagedRocksDB; +import org.apache.hadoop.hdds.utils.db.managed.ManagedRocksIterator; +import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.debug.DBDefinitionFactory; +import org.apache.hadoop.ozone.debug.RocksDBUtils; +import java.security.cert.CertificateFactory; +import org.kohsuke.MetaInfServices; +import org.rocksdb.ColumnFamilyDescriptor; +import org.rocksdb.ColumnFamilyHandle; +import org.rocksdb.RocksDBException; +import picocli.CommandLine; + +import java.io.IOException; +import java.io.PrintWriter; +import java.math.BigInteger; +import java.net.InetAddress; +import java.nio.charset.StandardCharsets; +import java.nio.file.Paths; +import java.security.cert.CertPath; +import java.security.cert.CertificateException; +import java.security.cert.X509Certificate; +import java.util.Map; +import java.util.HashMap; +import java.util.List; +import java.util.ArrayList; +import java.util.Optional; +import java.util.Arrays; +import java.util.concurrent.Callable; + +import static org.apache.hadoop.hdds.scm.metadata.SCMDBDefinition.VALID_SCM_CERTS; +import static org.apache.hadoop.hdds.security.x509.certificate.client.DefaultCertificateClient.CERT_FILE_NAME_FORMAT; +import static org.apache.hadoop.ozone.om.helpers.OzoneFSUtils.removeTrailingSlashIfNeeded; + +/** + * In case of accidental deletion of SCM certificates from local storage, + * this tool restores the certs that are persisted into the SCM DB. + * Note that this will only work if the SCM has persisted certs in its RocksDB + * and private keys of the SCM are intact. + */ +@CommandLine.Command( + name = "cert-recover", + description = "Recover Deleted SCM Certificate from RocksDB") +@MetaInfServices(SubcommandWithParent.class) +public class RecoverSCMCertificate implements Callable, SubcommandWithParent { + + @CommandLine.Option(names = {"--db"}, + required = true, + description = "SCM DB Path") + private String dbPath; + + @CommandLine.ParentCommand + private OzoneRepair parent; + + @CommandLine.Spec + private CommandLine.Model.CommandSpec spec; + + @Override + public Class getParentType() { + return OzoneRepair.class; + } + + private PrintWriter err() { + return spec.commandLine().getErr(); + } + + private PrintWriter out() { + return spec.commandLine().getOut(); + } + + @Override + public Void call() throws Exception { + dbPath = removeTrailingSlashIfNeeded(dbPath); + String tableName = VALID_SCM_CERTS.getName(); + DBDefinition dbDefinition = + DBDefinitionFactory.getDefinition(Paths.get(dbPath), new OzoneConfiguration()); + if (dbDefinition == null) { + throw new Exception("Error: Incorrect DB Path"); + } + DBColumnFamilyDefinition columnFamilyDefinition = + getDbColumnFamilyDefinition(tableName, dbDefinition); + + try { + List cfDescList = RocksDBUtils.getColumnFamilyDescriptors(dbPath); + final List cfHandleList = new ArrayList<>(); + byte[] tableNameBytes = tableName.getBytes(StandardCharsets.UTF_8); + ColumnFamilyHandle cfHandle = null; + try (ManagedRocksDB db = ManagedRocksDB.openReadOnly(dbPath, cfDescList, + cfHandleList)) { + cfHandle = getColumnFamilyHandle(cfHandleList, tableNameBytes); + SecurityConfig securityConfig = new SecurityConfig(parent.getOzoneConf()); + + Map allCerts = getAllCerts(columnFamilyDefinition, cfHandle, db); + out().println("All Certs in DB : " + allCerts.keySet()); + String hostName = InetAddress.getLocalHost().getHostName(); + out().println("Host: " + hostName); + + X509Certificate subCertificate = getSubCertificate(allCerts, hostName); + X509Certificate rootCertificate = getRootCertificate(allCerts); + + out().println("Sub cert serialID for this host: " + subCertificate.getSerialNumber().toString()); + out().println("Root cert serialID: " + rootCertificate.getSerialNumber().toString()); + + boolean isRootCA = false; + + String caPrincipal = rootCertificate.getSubjectDN().getName(); + if (caPrincipal.contains(hostName)) { + isRootCA = true; + } + storeCerts(subCertificate, rootCertificate, isRootCA, securityConfig); + } + } catch (RocksDBException | CertificateException exception) { + err().print("Failed to recover scm cert"); + } + return null; + } + + private static ColumnFamilyHandle getColumnFamilyHandle( + List cfHandleList, byte[] tableNameBytes) throws Exception { + ColumnFamilyHandle cfHandle = null; + for (ColumnFamilyHandle cf : cfHandleList) { + if (Arrays.equals(cf.getName(), tableNameBytes)) { + cfHandle = cf; + break; + } + } + if (cfHandle == null) { + throw new Exception("Error: VALID_SCM_CERTS table not found in DB"); + } + return cfHandle; + } + + private static X509Certificate getRootCertificate( + Map allCerts) throws Exception { + Optional cert = allCerts.values().stream().filter( + c -> c.getSubjectDN().getName() + .contains(OzoneConsts.SCM_ROOT_CA_PREFIX)).findFirst(); + if (!cert.isPresent()) { + throw new Exception("Root CA Cert not found in the DB for this host, Certs in the DB : " + allCerts.keySet()); + } + return cert.get(); + } + + + private static X509Certificate getSubCertificate( + Map allCerts, String hostName) throws Exception { + Optional cert = allCerts.values().stream().filter( + c -> c.getSubjectDN().getName() + .contains(OzoneConsts.SCM_SUB_CA_PREFIX) && c.getSubjectDN() + .getName().contains(hostName)).findFirst(); + if (!cert.isPresent()) { + throw new Exception("Sub CA Cert not found in the DB for this host, Certs in the DB : " + allCerts.keySet()); + } + return cert.get(); + } + + private static Map getAllCerts( + DBColumnFamilyDefinition columnFamilyDefinition, + ColumnFamilyHandle cfHandle, ManagedRocksDB db) throws IOException, RocksDBException { + Map allCerts = new HashMap<>(); + ManagedRocksIterator rocksIterator = ManagedRocksIterator.managed(db.get().newIterator(cfHandle)); + rocksIterator.get().seekToFirst(); + while (rocksIterator.get().isValid()) { + BigInteger id = (BigInteger) columnFamilyDefinition.getKeyCodec() + .fromPersistedFormat(rocksIterator.get().key()); + X509Certificate certificate = + (X509Certificate) columnFamilyDefinition.getValueCodec() + .fromPersistedFormat(rocksIterator.get().value()); + allCerts.put(id, certificate); + rocksIterator.get().next(); + } + return allCerts; + } + + private static DBColumnFamilyDefinition getDbColumnFamilyDefinition( + String tableName, DBDefinition dbDefinition) throws Exception { + DBColumnFamilyDefinition columnFamilyDefinition = + dbDefinition.getColumnFamily(tableName); + if (columnFamilyDefinition == null) { + throw new Exception( + "Error: VALID_SCM_CERTS table no found in Definition"); + } + return columnFamilyDefinition; + } + + private void storeCerts(X509Certificate scmCertificate, + X509Certificate rootCertificate, boolean isRootCA, SecurityConfig securityConfig) + throws CertificateException, IOException { + CertificateCodec certCodec = + new CertificateCodec(securityConfig, SCMCertificateClient.COMPONENT_NAME); + + out().println("Writing certs to path : " + certCodec.getLocation().toString()); + + CertPath certPath = addRootCertInPath(scmCertificate, rootCertificate); + CertPath rootCertPath = getRootCertPath(rootCertificate); + String encodedCert = CertificateCodec.getPEMEncodedString(certPath); + String certName = String.format(CERT_FILE_NAME_FORMAT, + CAType.NONE.getFileNamePrefix() + scmCertificate.getSerialNumber().toString()); + certCodec.writeCertificate(certName, encodedCert); + + String rootCertName = String.format(CERT_FILE_NAME_FORMAT, + CAType.SUBORDINATE.getFileNamePrefix() + rootCertificate.getSerialNumber().toString()); + String encodedRootCert = CertificateCodec.getPEMEncodedString(rootCertPath); + certCodec.writeCertificate(rootCertName, encodedRootCert); + + certCodec.writeCertificate(certCodec.getLocation().toAbsolutePath(), + securityConfig.getCertificateFileName(), encodedCert); + + if (isRootCA) { + CertificateCodec rootCertCodec = + new CertificateCodec(securityConfig, OzoneConsts.SCM_ROOT_CA_COMPONENT_NAME); + out().println("Writing root certs to path : " + rootCertCodec.getLocation().toString()); + rootCertCodec.writeCertificate(rootCertCodec.getLocation().toAbsolutePath(), + securityConfig.getCertificateFileName(), encodedRootCert); + } + } + + public CertPath addRootCertInPath(X509Certificate scmCert, + X509Certificate rootCert) throws CertificateException { + ArrayList updatedList = new ArrayList<>(); + updatedList.add(scmCert); + updatedList.add(rootCert); + CertificateFactory certFactory = + CertificateCodec.getCertFactory(); + return certFactory.generateCertPath(updatedList); + } + + public CertPath getRootCertPath(X509Certificate rootCert) + throws CertificateException { + ArrayList updatedList = new ArrayList<>(); + updatedList.add(rootCert); + CertificateFactory factory = CertificateCodec.getCertFactory(); + return factory.generateCertPath(updatedList); + } +} From 5feb9eacd8318d8cd0106dc78a9fef7ddc4e8303 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran <47532440+swamirishi@users.noreply.github.com> Date: Fri, 13 Sep 2024 05:44:03 -0700 Subject: [PATCH 14/43] HDDS-11453. OmSnapshotPurge should be in a different ozone manager double buffer batch (#7188) --- .../om/ratis/OzoneManagerDoubleBuffer.java | 32 +++++++--- .../ratis/TestOzoneManagerDoubleBuffer.java | 63 +++++++++++++++---- 2 files changed, 72 insertions(+), 23 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerDoubleBuffer.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerDoubleBuffer.java index a6fcc40dda1..8e4cc9fbf4d 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerDoubleBuffer.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerDoubleBuffer.java @@ -46,6 +46,7 @@ import org.apache.hadoop.ozone.om.codec.OMDBDefinition; import org.apache.hadoop.ozone.om.response.CleanupTableInfo; import org.apache.hadoop.ozone.om.response.OMClientResponse; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMResponse; import org.apache.hadoop.util.Daemon; import org.apache.hadoop.util.Time; @@ -426,8 +427,12 @@ private String addToBatch(Queue buffer, BatchOperation batchOperation) { * in RocksDB callback flush. If multiple operations are flushed in one * specific batch, we are not sure at the flush of which specific operation * the callback is coming. - * There could be a possibility of race condition that is exposed to rocksDB - * behaviour for the batch. + * PurgeSnapshot is also considered a barrier, since purgeSnapshot transaction on a standalone basis is an + * idempotent operation. Once the directory gets deleted the previous transactions that have been performed on the + * snapshotted rocksdb would start failing on replay since those transactions have not been committed but the + * directory could have been partially deleted/ fully deleted. This could also lead to inconsistencies in the DB + * reads from the purged rocksdb if operations are not performed consciously. + * There could be a possibility of race condition that is exposed to rocksDB behaviour for the batch. * Hence, we treat createSnapshot as separate batch flush. *

* e.g. requestBuffer = [request1, request2, snapshotRequest1, @@ -435,19 +440,17 @@ private String addToBatch(Queue buffer, BatchOperation batchOperation) { * response = [[request1, request2], [snapshotRequest1], [request3], * [snapshotRequest2], [request4]] */ - private List> splitReadyBufferAtCreateSnapshot() { + private synchronized List> splitReadyBufferAtCreateSnapshot() { final List> response = new ArrayList<>(); - OMResponse previousOmResponse = null; for (final Entry entry : readyBuffer) { OMResponse omResponse = entry.getResponse().getOMResponse(); // New queue gets created in three conditions: // 1. It is first element in the response, - // 2. Current request is createSnapshot request. - // 3. Previous request was createSnapshot request. - if (response.isEmpty() || omResponse.hasCreateSnapshotResponse() - || (previousOmResponse != null && - previousOmResponse.hasCreateSnapshotResponse())) { + // 2. Current request is createSnapshot/purgeSnapshot request. + // 3. Previous request was createSnapshot/purgeSnapshot request. + if (response.isEmpty() || isStandaloneBatchCmdTypes(omResponse) + || isStandaloneBatchCmdTypes(previousOmResponse)) { response.add(new LinkedList<>()); } @@ -458,6 +461,15 @@ private List> splitReadyBufferAtCreateSnapshot() { return response; } + private static boolean isStandaloneBatchCmdTypes(OMResponse response) { + if (response == null) { + return false; + } + final OzoneManagerProtocolProtos.Type type = response.getCmdType(); + return type == OzoneManagerProtocolProtos.Type.SnapshotPurge + || type == OzoneManagerProtocolProtos.Type.CreateSnapshot; + } + private void addCleanupEntry(Entry entry, Map> cleanupEpochs) { Class responseClass = entry.getResponse().getClass(); @@ -612,7 +624,7 @@ int getCurrentBufferSize() { return currentBuffer.size(); } - int getReadyBufferSize() { + synchronized int getReadyBufferSize() { return readyBuffer.size(); } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerDoubleBuffer.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerDoubleBuffer.java index 125c9efcaf2..6e24c9ff93f 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerDoubleBuffer.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerDoubleBuffer.java @@ -44,9 +44,9 @@ import org.apache.hadoop.ozone.om.response.bucket.OMBucketCreateResponse; import org.apache.hadoop.ozone.om.response.key.OMKeyCreateResponse; import org.apache.hadoop.ozone.om.response.snapshot.OMSnapshotCreateResponse; +import org.apache.hadoop.ozone.om.response.snapshot.OMSnapshotPurgeResponse; import org.apache.hadoop.ozone.om.s3.S3SecretCacheProvider; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; -import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.CreateSnapshotResponse; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMResponse; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authentication.util.KerberosName; @@ -81,12 +81,12 @@ class TestOzoneManagerDoubleBuffer { private OzoneManagerDoubleBuffer doubleBuffer; private OzoneManager ozoneManager; private S3SecretLockedManager secretManager; - private final CreateSnapshotResponse snapshotResponse1 = mock(CreateSnapshotResponse.class); - private final CreateSnapshotResponse snapshotResponse2 = mock(CreateSnapshotResponse.class); private final OMResponse omKeyResponse = mock(OMResponse.class); private final OMResponse omBucketResponse = mock(OMResponse.class); private final OMResponse omSnapshotResponse1 = mock(OMResponse.class); private final OMResponse omSnapshotResponse2 = mock(OMResponse.class); + private final OMResponse omSnapshotPurgeResponseProto1 = mock(OMResponse.class); + private final OMResponse omSnapshotPurgeResponseProto2 = mock(OMResponse.class); private static OMClientResponse omKeyCreateResponse = mock(OMKeyCreateResponse.class); private static OMClientResponse omBucketCreateResponse = @@ -95,6 +95,9 @@ class TestOzoneManagerDoubleBuffer { mock(OMSnapshotCreateResponse.class); private static OMClientResponse omSnapshotCreateResponse2 = mock(OMSnapshotCreateResponse.class); + private static OMClientResponse omSnapshotPurgeResponse1 = mock(OMSnapshotPurgeResponse.class); + private static OMClientResponse omSnapshotPurgeResponse2 = mock(OMSnapshotPurgeResponse.class); + @TempDir private File tempDir; private OzoneManagerDoubleBuffer.FlushNotifier flushNotifier; @@ -143,19 +146,22 @@ public void setup() throws IOException { doNothing().when(omBucketCreateResponse).checkAndUpdateDB(any(), any()); doNothing().when(omSnapshotCreateResponse1).checkAndUpdateDB(any(), any()); doNothing().when(omSnapshotCreateResponse2).checkAndUpdateDB(any(), any()); + doNothing().when(omSnapshotPurgeResponse1).checkAndUpdateDB(any(), any()); + doNothing().when(omSnapshotPurgeResponse2).checkAndUpdateDB(any(), any()); when(omKeyResponse.getTraceID()).thenReturn("keyTraceId"); when(omBucketResponse.getTraceID()).thenReturn("bucketTraceId"); when(omSnapshotResponse1.getTraceID()).thenReturn("snapshotTraceId-1"); when(omSnapshotResponse2.getTraceID()).thenReturn("snapshotTraceId-2"); - when(omSnapshotResponse1.hasCreateSnapshotResponse()) - .thenReturn(true); - when(omSnapshotResponse2.hasCreateSnapshotResponse()) - .thenReturn(true); - when(omSnapshotResponse1.getCreateSnapshotResponse()) - .thenReturn(snapshotResponse1); - when(omSnapshotResponse2.getCreateSnapshotResponse()) - .thenReturn(snapshotResponse2); + when(omSnapshotPurgeResponseProto1.getTraceID()).thenReturn("snapshotPurgeTraceId-1"); + when(omSnapshotPurgeResponseProto2.getTraceID()).thenReturn("snapshotPurgeTraceId-2"); + + when(omKeyResponse.getCmdType()).thenReturn(OzoneManagerProtocolProtos.Type.CreateKey); + when(omBucketResponse.getCmdType()).thenReturn(OzoneManagerProtocolProtos.Type.CreateBucket); + when(omSnapshotPurgeResponseProto1.getCmdType()).thenReturn(OzoneManagerProtocolProtos.Type.SnapshotPurge); + when(omSnapshotPurgeResponseProto2.getCmdType()).thenReturn(OzoneManagerProtocolProtos.Type.SnapshotPurge); + when(omSnapshotResponse1.getCmdType()).thenReturn(OzoneManagerProtocolProtos.Type.SnapshotPurge); + when(omSnapshotResponse2.getCmdType()).thenReturn(OzoneManagerProtocolProtos.Type.SnapshotPurge); when(omKeyCreateResponse.getOMResponse()).thenReturn(omKeyResponse); when(omBucketCreateResponse.getOMResponse()).thenReturn(omBucketResponse); @@ -163,6 +169,10 @@ public void setup() throws IOException { .thenReturn(omSnapshotResponse1); when(omSnapshotCreateResponse2.getOMResponse()) .thenReturn(omSnapshotResponse2); + when(omSnapshotPurgeResponse1.getOMResponse()) + .thenReturn(omSnapshotPurgeResponseProto1); + when(omSnapshotPurgeResponse2.getOMResponse()) + .thenReturn(omSnapshotPurgeResponseProto2); } @AfterEach @@ -194,8 +204,35 @@ private static Stream doubleBufferFlushCases() { omSnapshotCreateResponse1, omSnapshotCreateResponse2, omBucketCreateResponse), - 4L, 4L, 14L, 16L, 1L, 1.142F) - ); + 4L, 4L, 14L, 16L, 1L, 1.142F), + Arguments.of(Arrays.asList(omSnapshotPurgeResponse1, + omSnapshotPurgeResponse2), + 2L, 2L, 16L, 18L, 1L, 1.125F), + Arguments.of(Arrays.asList(omKeyCreateResponse, + omBucketCreateResponse, + omSnapshotPurgeResponse1, + omSnapshotPurgeResponse2), + 3L, 4L, 19L, 22L, 2L, 1.157F), + Arguments.of(Arrays.asList(omKeyCreateResponse, + omSnapshotPurgeResponse1, + omBucketCreateResponse, + omSnapshotPurgeResponse2), + 4L, 4L, 23L, 26L, 1L, 1.1300F), + Arguments.of(Arrays.asList(omKeyCreateResponse, + omSnapshotPurgeResponse1, + omSnapshotPurgeResponse2, + omBucketCreateResponse), + 4L, 4L, 27L, 30L, 1L, 1.111F), + Arguments.of(Arrays.asList(omKeyCreateResponse, + omBucketCreateResponse, + omSnapshotPurgeResponse1, + omSnapshotCreateResponse1, + omSnapshotPurgeResponse2, + omBucketCreateResponse, + omSnapshotCreateResponse2), + 6L, 7L, 33L, 37L, 2L, 1.121F) + + ); } /** From a7d7e37fe18a70064e6f50f1152cdabfd0ab3932 Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" <6454655+adoroszlai@users.noreply.github.com> Date: Fri, 13 Sep 2024 16:30:05 +0200 Subject: [PATCH 15/43] HDDS-11456. Require successful dependency/licence checks for acceptance/compile/kubernetes (#7192) --- .github/workflows/ci.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bd0a12edd93..49034e7fe68 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -171,6 +171,8 @@ jobs: - build-info - build - basic + - dependency + - license timeout-minutes: 45 if: needs.build-info.outputs.needs-compile == 'true' strategy: @@ -407,6 +409,8 @@ jobs: - build-info - build - basic + - dependency + - license runs-on: ubuntu-20.04 timeout-minutes: 150 if: needs.build-info.outputs.needs-compose-tests == 'true' @@ -454,6 +458,8 @@ jobs: - build-info - build - basic + - dependency + - license runs-on: ubuntu-20.04 timeout-minutes: 60 if: needs.build-info.outputs.needs-kubernetes-tests == 'true' From 50f256300e96e4b86d6b9c693280887d2b243f98 Mon Sep 17 00:00:00 2001 From: Abhishek Pal <43001336+devabhishekpal@users.noreply.github.com> Date: Sat, 14 Sep 2024 21:00:38 +0530 Subject: [PATCH 16/43] HDDS-11419. Fix waitForCheckpointDirectoryExist log message (#7199) --- .../org/apache/hadoop/hdds/utils/db/RDBCheckpointUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBCheckpointUtils.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBCheckpointUtils.java index c47b176e93b..015cd10b8b9 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBCheckpointUtils.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBCheckpointUtils.java @@ -49,7 +49,7 @@ public static boolean waitForCheckpointDirectoryExist(File file, final boolean success = RatisHelper.attemptUntilTrue(file::exists, POLL_INTERVAL_DURATION, maxWaitTimeout); if (!success) { LOG.info("Checkpoint directory: {} didn't get created in {} secs.", - maxWaitTimeout.getSeconds(), file.getAbsolutePath()); + file.getAbsolutePath(), maxWaitTimeout.getSeconds()); } return success; } From 10c47a16e43974a54c0bd52c81206e1c25d53443 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 15 Sep 2024 18:16:19 +0200 Subject: [PATCH 17/43] HDDS-11459. Bump develocity-maven-extension to 1.22.1 (#7201) --- .mvn/extensions.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.mvn/extensions.xml b/.mvn/extensions.xml index 4e1cfae4489..20f1c034c58 100644 --- a/.mvn/extensions.xml +++ b/.mvn/extensions.xml @@ -24,7 +24,7 @@ com.gradle develocity-maven-extension - 1.22 + 1.22.1 com.gradle From 9f5bf439107155f291bb4fb78e115ef3a05be6f5 Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" <6454655+adoroszlai@users.noreply.github.com> Date: Mon, 16 Sep 2024 09:02:24 +0200 Subject: [PATCH 18/43] HDDS-11457. Internal error on S3 CompleteMultipartUpload if parts are not specified (#7195) --- .../dist/src/main/smoketest/s3/MultipartUpload.robot | 5 +++++ .../apache/hadoop/ozone/s3/AuthorizationFilter.java | 9 +-------- .../CompleteMultipartUploadRequestUnmarshaller.java | 11 ++++++++++- .../s3/endpoint/MultiDeleteRequestUnmarshaller.java | 6 ++++-- .../s3/endpoint/PutBucketAclRequestUnmarshaller.java | 4 +++- .../hadoop/ozone/s3/exception/OS3Exception.java | 5 +++++ .../java/org/apache/hadoop/ozone/s3/util/S3Utils.java | 9 +++++++++ 7 files changed, 37 insertions(+), 12 deletions(-) diff --git a/hadoop-ozone/dist/src/main/smoketest/s3/MultipartUpload.robot b/hadoop-ozone/dist/src/main/smoketest/s3/MultipartUpload.robot index dd06d55f75f..d62a217e606 100644 --- a/hadoop-ozone/dist/src/main/smoketest/s3/MultipartUpload.robot +++ b/hadoop-ozone/dist/src/main/smoketest/s3/MultipartUpload.robot @@ -107,6 +107,11 @@ Test Multipart Upload Complete ${part2Md5Sum} = Execute md5sum /tmp/part2 | awk '{print $1}' Should Be Equal As Strings ${eTag2} ${part2Md5Sum} +#complete multipart upload without any parts + ${result} = Execute AWSS3APICli and checkrc complete-multipart-upload --upload-id ${uploadID} --bucket ${BUCKET} --key ${PREFIX}/multipartKey1 255 + Should contain ${result} InvalidRequest + Should contain ${result} must specify at least one part + #complete multipart upload ${result} = Execute AWSS3APICli complete-multipart-upload --upload-id ${uploadID} --bucket ${BUCKET} --key ${PREFIX}/multipartKey1 --multipart-upload 'Parts=[{ETag=${eTag1},PartNumber=1},{ETag=${eTag2},PartNumber=2}]' Should contain ${result} ${BUCKET} diff --git a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/AuthorizationFilter.java b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/AuthorizationFilter.java index d49ff17f3bf..cc63663bf22 100644 --- a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/AuthorizationFilter.java +++ b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/AuthorizationFilter.java @@ -19,11 +19,9 @@ import javax.annotation.Priority; import javax.inject.Inject; -import javax.ws.rs.WebApplicationException; import javax.ws.rs.container.ContainerRequestContext; import javax.ws.rs.container.ContainerRequestFilter; import javax.ws.rs.container.PreMatching; -import javax.ws.rs.core.Response; import javax.ws.rs.ext.Provider; import com.google.common.annotations.VisibleForTesting; @@ -41,6 +39,7 @@ import static org.apache.hadoop.ozone.s3.exception.S3ErrorTable.ACCESS_DENIED; import static org.apache.hadoop.ozone.s3.exception.S3ErrorTable.INTERNAL_ERROR; import static org.apache.hadoop.ozone.s3.exception.S3ErrorTable.S3_AUTHINFO_CREATION_ERROR; +import static org.apache.hadoop.ozone.s3.util.S3Utils.wrapOS3Exception; /** * Filter used to construct string to sign from unfiltered request. @@ -116,10 +115,4 @@ public SignatureInfo getSignatureInfo() { return signatureInfo; } - private WebApplicationException wrapOS3Exception(OS3Exception os3Exception) { - return new WebApplicationException(os3Exception.getErrorMessage(), - os3Exception, - Response.status(os3Exception.getHttpCode()) - .entity(os3Exception.toXml()).build()); - } } diff --git a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/CompleteMultipartUploadRequestUnmarshaller.java b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/CompleteMultipartUploadRequestUnmarshaller.java index cdaaa228ecd..5881baa174b 100644 --- a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/CompleteMultipartUploadRequestUnmarshaller.java +++ b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/CompleteMultipartUploadRequestUnmarshaller.java @@ -34,7 +34,9 @@ import java.lang.reflect.Type; import javax.ws.rs.ext.Provider; +import static org.apache.hadoop.ozone.s3.exception.S3ErrorTable.INVALID_REQUEST; import static org.apache.hadoop.ozone.s3.util.S3Consts.S3_XML_NAMESPACE; +import static org.apache.hadoop.ozone.s3.util.S3Utils.wrapOS3Exception; /** * Custom unmarshaller to read CompleteMultipartUploadRequest wo namespace. @@ -69,6 +71,10 @@ public CompleteMultipartUploadRequest readFrom( MultivaluedMap multivaluedMap, InputStream inputStream) throws IOException, WebApplicationException { try { + if (inputStream.available() == 0) { + throw wrapOS3Exception(INVALID_REQUEST.withMessage("You must specify at least one part")); + } + XMLReader xmlReader = saxParserFactory.newSAXParser().getXMLReader(); UnmarshallerHandler unmarshallerHandler = context.createUnmarshaller().getUnmarshallerHandler(); @@ -78,8 +84,11 @@ public CompleteMultipartUploadRequest readFrom( filter.setParent(xmlReader); filter.parse(new InputSource(inputStream)); return (CompleteMultipartUploadRequest) unmarshallerHandler.getResult(); + } catch (WebApplicationException e) { + throw e; } catch (Exception e) { - throw new WebApplicationException("Can't parse request body to XML.", e); + throw wrapOS3Exception(INVALID_REQUEST.withMessage(e.getMessage())); } } + } diff --git a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/MultiDeleteRequestUnmarshaller.java b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/MultiDeleteRequestUnmarshaller.java index 0c34c08091a..775ec789f38 100644 --- a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/MultiDeleteRequestUnmarshaller.java +++ b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/MultiDeleteRequestUnmarshaller.java @@ -18,7 +18,6 @@ package org.apache.hadoop.ozone.s3.endpoint; import javax.ws.rs.Produces; -import javax.ws.rs.WebApplicationException; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.MultivaluedMap; import javax.ws.rs.ext.MessageBodyReader; @@ -34,6 +33,9 @@ import org.xml.sax.InputSource; import org.xml.sax.XMLReader; +import static org.apache.hadoop.ozone.s3.exception.S3ErrorTable.INVALID_REQUEST; +import static org.apache.hadoop.ozone.s3.util.S3Utils.wrapOS3Exception; + /** * Custom unmarshaller to read MultiDeleteRequest w/wo namespace. */ @@ -78,7 +80,7 @@ public MultiDeleteRequest readFrom(Class type, filter.parse(new InputSource(entityStream)); return (MultiDeleteRequest) unmarshallerHandler.getResult(); } catch (Exception e) { - throw new WebApplicationException("Can't parse request body to XML.", e); + throw wrapOS3Exception(INVALID_REQUEST.withMessage(e.getMessage())); } } } diff --git a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/PutBucketAclRequestUnmarshaller.java b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/PutBucketAclRequestUnmarshaller.java index 3fa6149815e..c832915176b 100644 --- a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/PutBucketAclRequestUnmarshaller.java +++ b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/endpoint/PutBucketAclRequestUnmarshaller.java @@ -34,7 +34,9 @@ import java.lang.annotation.Annotation; import java.lang.reflect.Type; +import static org.apache.hadoop.ozone.s3.exception.S3ErrorTable.INVALID_REQUEST; import static org.apache.hadoop.ozone.s3.util.S3Consts.S3_XML_NAMESPACE; +import static org.apache.hadoop.ozone.s3.util.S3Utils.wrapOS3Exception; /** * Custom unmarshaller to read PutBucketAclRequest wo namespace. @@ -79,7 +81,7 @@ public S3BucketAcl readFrom( filter.parse(new InputSource(inputStream)); return (S3BucketAcl)(unmarshallerHandler.getResult()); } catch (Exception e) { - throw new WebApplicationException("Can't parse request body to XML.", e); + throw wrapOS3Exception(INVALID_REQUEST.withMessage(e.getMessage())); } } } diff --git a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/exception/OS3Exception.java b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/exception/OS3Exception.java index 810aa2085f4..3660457146f 100644 --- a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/exception/OS3Exception.java +++ b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/exception/OS3Exception.java @@ -158,4 +158,9 @@ public String toXml() { this.getErrorMessage(), this.getResource(), this.getRequestId()); } + + /** Create a copy with specific message. */ + public OS3Exception withMessage(String message) { + return new OS3Exception(code, message, httpCode); + } } diff --git a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/util/S3Utils.java b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/util/S3Utils.java index d644162a8ec..fda298f27dc 100644 --- a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/util/S3Utils.java +++ b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/util/S3Utils.java @@ -23,6 +23,8 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.ozone.s3.exception.OS3Exception; +import javax.ws.rs.WebApplicationException; +import javax.ws.rs.core.Response; import java.io.UnsupportedEncodingException; import java.net.URLDecoder; import java.net.URLEncoder; @@ -116,4 +118,11 @@ public static S3StorageType toS3StorageType(String storageType) throw newError(INVALID_ARGUMENT, storageType, ex); } } + + public static WebApplicationException wrapOS3Exception(OS3Exception ex) { + return new WebApplicationException(ex.getErrorMessage(), ex, + Response.status(ex.getHttpCode()) + .entity(ex.toXml()) + .build()); + } } From 22ddfb951a20d5b05e60bdeb805f455952709ac6 Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" Date: Mon, 16 Sep 2024 16:36:30 +0200 Subject: [PATCH 19/43] Revert "HDDS-11456. Require successful dependency/licence checks for acceptance/compile/kubernetes (#7192)" This reverts commit a7d7e37fe18a70064e6f50f1152cdabfd0ab3932. --- .github/workflows/ci.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 49034e7fe68..bd0a12edd93 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -171,8 +171,6 @@ jobs: - build-info - build - basic - - dependency - - license timeout-minutes: 45 if: needs.build-info.outputs.needs-compile == 'true' strategy: @@ -409,8 +407,6 @@ jobs: - build-info - build - basic - - dependency - - license runs-on: ubuntu-20.04 timeout-minutes: 150 if: needs.build-info.outputs.needs-compose-tests == 'true' @@ -458,8 +454,6 @@ jobs: - build-info - build - basic - - dependency - - license runs-on: ubuntu-20.04 timeout-minutes: 60 if: needs.build-info.outputs.needs-kubernetes-tests == 'true' From e0060a8f50fc8c919e08dbba2294b29d9fb5c84a Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Tue, 17 Sep 2024 18:07:44 +0800 Subject: [PATCH 20/43] HDDS-11196. Improve SCM WebUI Display (#6960) --- .../main/resources/webapps/static/ozone.css | 4 ++ .../webapps/static/templates/jvm.html | 2 +- .../hadoop/hdds/scm/node/SCMNodeManager.java | 21 ++++++++ .../hadoop/hdds/scm/server/SCMMXBean.java | 3 +- .../scm/server/StorageContainerManager.java | 51 +++++++++++++++++-- .../resources/webapps/scm/scm-overview.html | 39 ++++++++++++-- .../src/main/resources/webapps/scm/scm.js | 7 +++ 7 files changed, 118 insertions(+), 9 deletions(-) diff --git a/hadoop-hdds/framework/src/main/resources/webapps/static/ozone.css b/hadoop-hdds/framework/src/main/resources/webapps/static/ozone.css index e08e9c52060..389d9d78f21 100644 --- a/hadoop-hdds/framework/src/main/resources/webapps/static/ozone.css +++ b/hadoop-hdds/framework/src/main/resources/webapps/static/ozone.css @@ -91,3 +91,7 @@ body { .om-roles-background { background-color: #dcfbcd!important; } + +.scm-roles-background { + background-color: #dcfbcd!important; +} \ No newline at end of file diff --git a/hadoop-hdds/framework/src/main/resources/webapps/static/templates/jvm.html b/hadoop-hdds/framework/src/main/resources/webapps/static/templates/jvm.html index c1f7d16aefa..9706ebdf6b3 100644 --- a/hadoop-hdds/framework/src/main/resources/webapps/static/templates/jvm.html +++ b/hadoop-hdds/framework/src/main/resources/webapps/static/templates/jvm.html @@ -21,6 +21,6 @@ Input arguments: - {{$ctrl.jmx.InputArguments}} +

{{$ctrl.jmx.InputArguments.join('\n')}}
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java index fa8f316aa42..7121d8f7a9d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java @@ -146,6 +146,8 @@ public class SCMNodeManager implements NodeManager { private static final String LASTHEARTBEAT = "LASTHEARTBEAT"; private static final String USEDSPACEPERCENT = "USEDSPACEPERCENT"; private static final String TOTALCAPACITY = "CAPACITY"; + private static final String DNUUID = "UUID"; + private static final String VERSION = "VERSION"; /** * Constructs SCM machine Manager. */ @@ -447,6 +449,11 @@ public RegisteredCommand register( processNodeReport(datanodeDetails, nodeReport); LOG.info("Updated datanode to: {}", dn); scmNodeEventPublisher.fireEvent(SCMEvents.NODE_ADDRESS_UPDATE, dn); + } else if (isVersionChange(oldNode.getVersion(), datanodeDetails.getVersion())) { + LOG.info("Update the version for registered datanode = {}, " + + "oldVersion = {}, newVersion = {}.", + datanodeDetails.getUuid(), oldNode.getVersion(), datanodeDetails.getVersion()); + nodeStateManager.updateNode(datanodeDetails, layoutInfo); } } catch (NodeNotFoundException e) { LOG.error("Cannot find datanode {} from nodeStateManager", @@ -508,6 +515,18 @@ private boolean updateDnsToUuidMap( return ipChanged || hostNameChanged; } + /** + * Check if the version has been updated. + * + * @param oldVersion datanode oldVersion + * @param newVersion datanode newVersion + * @return true means replacement is needed, while false means replacement is not needed. + */ + private boolean isVersionChange(String oldVersion, String newVersion) { + final boolean versionChanged = !Objects.equals(oldVersion, newVersion); + return versionChanged; + } + /** * Send heartbeat to indicate the datanode is alive and doing well. * @@ -1136,6 +1155,8 @@ public Map> getNodeStatusInfo() { String nonScmUsedPerc = storagePercentage[1]; map.put(USEDSPACEPERCENT, "Ozone: " + scmUsedPerc + "%, other: " + nonScmUsedPerc + "%"); + map.put(DNUUID, dni.getUuidString()); + map.put(VERSION, dni.getVersion()); nodes.put(hostName, map); } return nodes; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMMXBean.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMMXBean.java index de609356b22..75a5193116c 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMMXBean.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMMXBean.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdds.scm.server; +import java.util.List; import java.util.Map; import org.apache.hadoop.hdds.annotation.InterfaceAudience; @@ -72,7 +73,7 @@ public interface SCMMXBean extends ServiceRuntimeInfo { String getClusterId(); - String getScmRatisRoles(); + List> getScmRatisRoles(); /** * Primordial node is the node on which scm init operation is performed. diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java index 868e54f1935..5f69d9fee2b 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java @@ -171,6 +171,7 @@ import org.apache.hadoop.security.authentication.client.AuthenticationException; import org.apache.hadoop.util.ReflectionUtils; import org.apache.ratis.protocol.RaftPeerId; +import org.apache.ratis.server.RaftServer; import org.apache.ratis.util.ExitUtils; import org.apache.ratis.util.JvmPauseMonitor; import org.slf4j.Logger; @@ -2131,10 +2132,54 @@ public ContainerTokenGenerator getContainerTokenGenerator() { } @Override - public String getScmRatisRoles() { + public List> getScmRatisRoles() { final SCMRatisServer server = getScmHAManager().getRatisServer(); - return server != null ? - HddsUtils.format(server.getRatisRoles()) : "STANDALONE"; + + // If Ratis is disabled + if (server == null) { + return getRatisRolesException("Ratis is disabled"); + } + + // To attempt to find the SCM Leader, + // and if the Leader is not found + // return Leader is not found message. + RaftServer.Division division = server.getDivision(); + RaftPeerId leaderId = division.getInfo().getLeaderId(); + if (leaderId == null) { + return getRatisRolesException("No leader found"); + } + + // If the SCMRatisServer is stopped, return a service stopped message. + if (server.isStopped()) { + return getRatisRolesException("Server is shutting down"); + } + + // Attempt to retrieve role information. + try { + List ratisRoles = server.getRatisRoles(); + List> result = new ArrayList<>(); + for (String role : ratisRoles) { + String[] roleArr = role.split(":"); + List scmInfo = new ArrayList<>(); + // Host Name + scmInfo.add(roleArr[0]); + // Node ID + scmInfo.add(roleArr[3]); + // Ratis Port + scmInfo.add(roleArr[1]); + // Role + scmInfo.add(roleArr[2]); + result.add(scmInfo); + } + return result; + } catch (Exception e) { + LOG.error("Failed to getRatisRoles.", e); + return getRatisRolesException("Exception Occurred, " + e.getMessage()); + } + } + + private static List> getRatisRolesException(String exceptionString) { + return Collections.singletonList(Collections.singletonList(exceptionString)); } /** diff --git a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm-overview.html b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm-overview.html index 3f825d4e25f..0f233bf4ea1 100644 --- a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm-overview.html +++ b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm-overview.html @@ -140,6 +140,10 @@

Node Status

'sortdesc':(columnName == 'comstate' && !reverse)}">Commisioned State Last Heartbeat + UUID + Version @@ -157,6 +161,8 @@

Node Status

{{typestat.opstate}} {{typestat.comstate}} {{typestat.lastheartbeat}} + {{typestat.uuid}} + {{typestat.version}} @@ -210,10 +216,6 @@

Status

Force Exit Safe Mode {{$ctrl.overview.jmx.SafeModeExitForceful}} - - SCM Roles (HA) - {{$ctrl.overview.jmx.ScmRatisRoles}} - Primordial Node (HA) {{$ctrl.overview.jmx.PrimordialNode}} @@ -235,6 +237,35 @@

Meta-Data Volume Information

+

SCM Roles (HA)

+

{{$ctrl.overview.jmx.ScmRatisRoles[0][0]}}

+
+ + + + + + + + + + + + + + + + + + + + + + + +
Host NameNode IDRatis PortRole
{{roles[0]}}{{roles[1]}}{{roles[2]}}{{roles[3]}}
{{roles[0]}}{{roles[1]}}{{roles[2]}}{{roles[3]}}
+
+

Safemode rules statuses

diff --git a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js index 6fac6849530..e00f8b8ede8 100644 --- a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js +++ b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js @@ -56,6 +56,11 @@ } } + $http.get("jmx?qry=Ratis:service=RaftServer,group=*,id=*") + .then(function (result) { + ctrl.role = result.data.beans[0]; + }); + function get_protocol(URLScheme, value, baseProto, fallbackProto) { let protocol = "unknown" let port = -1; @@ -95,6 +100,8 @@ capacity: value && value.find((element) => element.key === "CAPACITY").value, comstate: value && value.find((element) => element.key === "COMSTATE").value, lastheartbeat: value && value.find((element) => element.key === "LASTHEARTBEAT").value, + uuid: value && value.find((element) => element.key === "UUID").value, + version: value && value.find((element) => element.key === "VERSION").value, port: portSpec.port, protocol: portSpec.proto } From 88dd4369e2336b9a8a5b07ecf53afbf9af3ec820 Mon Sep 17 00:00:00 2001 From: Tejaskriya <87555809+Tejaskriya@users.noreply.github.com> Date: Tue, 17 Sep 2024 15:40:19 +0530 Subject: [PATCH 21/43] HDDS-11423. Implement equals operation for --filter option to ozone ldb scan (#7167) --- .../smoketest/debug/ozone-debug-ldb.robot | 93 ++++++++++ .../apache/hadoop/ozone/debug/TestLDBCli.java | 12 ++ .../apache/hadoop/ozone/debug/DBScanner.java | 169 ++++++++++++++++-- .../org/apache/hadoop/ozone/utils/Filter.java | 107 +++++++++++ 4 files changed, 370 insertions(+), 11 deletions(-) create mode 100644 hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-ldb.robot create mode 100644 hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/utils/Filter.java diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-ldb.robot b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-ldb.robot new file mode 100644 index 00000000000..e006e154af1 --- /dev/null +++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-ldb.robot @@ -0,0 +1,93 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +*** Settings *** +Documentation Test ozone debug ldb CLI +Library OperatingSystem +Resource ../lib/os.robot +Test Timeout 5 minute +Suite Setup Write keys + +*** Variables *** +${PREFIX} ${EMPTY} +${VOLUME} cli-debug-volume${PREFIX} +${BUCKET} cli-debug-bucket +${DEBUGKEY} debugKey +${TESTFILE} testfile + +*** Keywords *** +Write keys + Run Keyword if '${SECURITY_ENABLED}' == 'true' Kinit test user testuser testuser.keytab + Execute ozone sh volume create ${VOLUME} + Execute ozone sh bucket create ${VOLUME}/${BUCKET} -l OBJECT_STORE + Execute dd if=/dev/urandom of=${TEMP_DIR}/${TESTFILE} bs=100000 count=15 + Execute ozone sh key put ${VOLUME}/${BUCKET}/${TESTFILE}1 ${TEMP_DIR}/${TESTFILE} + Execute ozone sh key put ${VOLUME}/${BUCKET}/${TESTFILE}2 ${TEMP_DIR}/${TESTFILE} + Execute ozone sh key put ${VOLUME}/${BUCKET}/${TESTFILE}3 ${TEMP_DIR}/${TESTFILE} + Execute ozone sh key addacl -a user:systest:a ${VOLUME}/${BUCKET}/${TESTFILE}3 + +*** Test Cases *** +Test ozone debug ldb ls + ${output} = Execute ozone debug ldb --db=/data/metadata/om.db ls + Should contain ${output} keyTable + +Test ozone debug ldb scan + # test count option + ${output} = Execute ozone debug ldb --db=/data/metadata/om.db scan --cf=keyTable --count + Should Not Be Equal ${output} 0 + # test valid json for scan command + ${output} = Execute ozone debug ldb --db=/data/metadata/om.db scan --cf=keyTable | jq -r '.' + Should contain ${output} keyName + Should contain ${output} testfile1 + Should contain ${output} testfile2 + Should contain ${output} testfile3 + # test startkey option + ${output} = Execute ozone debug ldb --db=/data/metadata/om.db scan --cf=keyTable --startkey="/cli-debug-volume/cli-debug-bucket/testfile2" + Should not contain ${output} testfile1 + Should contain ${output} testfile2 + Should contain ${output} testfile3 + # test endkey option + ${output} = Execute ozone debug ldb --db=/data/metadata/om.db scan --cf=keyTable --endkey="/cli-debug-volume/cli-debug-bucket/testfile2" + Should contain ${output} testfile1 + Should contain ${output} testfile2 + Should not contain ${output} testfile3 + # test fields option + ${output} = Execute ozone debug ldb --db=/data/metadata/om.db scan --cf=keyTable --fields="volumeName,bucketName,keyName" + Should contain ${output} volumeName + Should contain ${output} bucketName + Should contain ${output} keyName + Should not contain ${output} objectID + Should not contain ${output} dataSize + Should not contain ${output} keyLocationVersions + # test filter option with one filter + ${output} = Execute ozone debug ldb --db=/data/metadata/om.db scan --cf=keyTable --filter="keyName:equals:testfile2" + Should not contain ${output} testfile1 + Should contain ${output} testfile2 + Should not contain ${output} testfile3 + # test filter option with one multi-level filter + ${output} = Execute ozone debug ldb --db=/data/metadata/om.db scan --cf=keyTable --filter="acls.name:equals:systest" + Should not contain ${output} testfile1 + Should not contain ${output} testfile2 + Should contain ${output} testfile3 + # test filter option with multiple filter + ${output} = Execute ozone debug ldb --db=/data/metadata/om.db scan --cf=keyTable --filter="keyName:equals:testfile3,acls.name:equals:systest" + Should not contain ${output} testfile1 + Should not contain ${output} testfile2 + Should contain ${output} testfile3 + # test filter option with no records match both filters + ${output} = Execute ozone debug ldb --db=/data/metadata/om.db scan --cf=keyTable --filter="acls.name:equals:systest,keyName:equals:testfile2" + Should not contain ${output} testfile1 + Should not contain ${output} testfile2 + Should not contain ${output} testfile3 diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/debug/TestLDBCli.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/debug/TestLDBCli.java index 7af0b5f9aa1..a4327a49bfa 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/debug/TestLDBCli.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/debug/TestLDBCli.java @@ -170,6 +170,18 @@ private static Stream scanTestCases() { Named.of("Invalid EndKey key9", Arrays.asList("--endkey", "key9")), Named.of("Expect key1-key5", Pair.of("key1", "key6")) ), + Arguments.of( + Named.of(KEY_TABLE, Pair.of(KEY_TABLE, false)), + Named.of("Default", Pair.of(0, "")), + Named.of("Filter key3", Arrays.asList("--filter", "keyName:equals:key3")), + Named.of("Expect key3", Pair.of("key3", "key4")) + ), + Arguments.of( + Named.of(KEY_TABLE, Pair.of(KEY_TABLE, false)), + Named.of("Default", Pair.of(0, "")), + Named.of("Filter invalid key", Arrays.asList("--filter", "keyName:equals:key9")), + Named.of("Expect key1-key3", null) + ), Arguments.of( Named.of(BLOCK_DATA + " V3", Pair.of(BLOCK_DATA, true)), Named.of("Default", Pair.of(0, "")), diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/DBScanner.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/DBScanner.java index 4653aa3eeb3..5e1207519ab 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/DBScanner.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/DBScanner.java @@ -44,6 +44,7 @@ import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; import org.apache.hadoop.ozone.container.metadata.DatanodeSchemaThreeDBDefinition; +import org.apache.hadoop.ozone.utils.Filter; import org.kohsuke.MetaInfServices; import org.rocksdb.ColumnFamilyDescriptor; import org.rocksdb.ColumnFamilyHandle; @@ -128,6 +129,14 @@ public class DBScanner implements Callable, SubcommandWithParent { "eg.) \"name,acls.type\" for showing name and type under acls.") private String fieldsFilter; + @CommandLine.Option(names = {"--filter"}, + description = "Comma-separated list of \"::\" where " + + " is any valid field of the record, " + + " is (EQUALS,MAX or MIN) and " + + " is the value of the field. " + + "eg.) \"dataSize:equals:1000\" for showing records having the value 1000 for dataSize") + private String filter; + @CommandLine.Option(names = {"--dnSchema", "--dn-schema", "-d"}, description = "Datanode DB Schema Version: V1/V2/V3", defaultValue = "V3") @@ -298,7 +307,7 @@ private void processRecords(ManagedRocksIterator iterator, } Future future = threadPool.submit( new Task(dbColumnFamilyDef, batch, logWriter, sequenceId, - withKey, schemaV3, fieldsFilter)); + withKey, schemaV3, fieldsFilter, filter)); futures.add(future); batch = new ArrayList<>(batchSize); sequenceId++; @@ -306,7 +315,7 @@ private void processRecords(ManagedRocksIterator iterator, } if (!batch.isEmpty()) { Future future = threadPool.submit(new Task(dbColumnFamilyDef, - batch, logWriter, sequenceId, withKey, schemaV3, fieldsFilter)); + batch, logWriter, sequenceId, withKey, schemaV3, fieldsFilter, filter)); futures.add(future); } @@ -473,10 +482,12 @@ private static class Task implements Callable { private final boolean withKey; private final boolean schemaV3; private String valueFields; + private String valueFilter; + @SuppressWarnings("checkstyle:parameternumber") Task(DBColumnFamilyDefinition dbColumnFamilyDefinition, ArrayList batch, LogWriter logWriter, - long sequenceId, boolean withKey, boolean schemaV3, String valueFields) { + long sequenceId, boolean withKey, boolean schemaV3, String valueFields, String filter) { this.dbColumnFamilyDefinition = dbColumnFamilyDefinition; this.batch = batch; this.logWriter = logWriter; @@ -484,6 +495,7 @@ private static class Task implements Callable { this.withKey = withKey; this.schemaV3 = schemaV3; this.valueFields = valueFields; + this.valueFilter = filter; } Map getFieldSplit(List fields, Map fieldMap) { @@ -504,6 +516,31 @@ Map getFieldSplit(List fields, Map field return fieldMap; } + void getFilterSplit(List fields, Map fieldMap, Filter leafValue) throws IOException { + int len = fields.size(); + if (len == 1) { + Filter currentValue = fieldMap.get(fields.get(0)); + if (currentValue != null) { + err().println("Cannot pass multiple values for the same field and " + + "cannot have filter for both parent and child"); + throw new IOException("Invalid filter passed"); + } + fieldMap.put(fields.get(0), leafValue); + } else { + Filter fieldMapGet = fieldMap.computeIfAbsent(fields.get(0), k -> new Filter()); + if (fieldMapGet.getValue() != null) { + err().println("Cannot pass multiple values for the same field and " + + "cannot have filter for both parent and child"); + throw new IOException("Invalid filter passed"); + } + Map nextLevel = fieldMapGet.getNextLevel(); + if (nextLevel == null) { + fieldMapGet.setNextLevel(new HashMap<>()); + } + getFilterSplit(fields.subList(1, len), fieldMapGet.getNextLevel(), leafValue); + } + } + @Override public Void call() { try { @@ -517,6 +554,26 @@ public Void call() { } } + Map fieldsFilterSplitMap = new HashMap<>(); + if (valueFilter != null) { + for (String field : valueFilter.split(",")) { + String[] fieldValue = field.split(":"); + if (fieldValue.length != 3) { + err().println("Error: Invalid format for filter \"" + field + + "\". Usage: ::. Ignoring filter passed"); + } else { + Filter filter = new Filter(fieldValue[1], fieldValue[2]); + if (filter.getOperator() == null) { + err().println("Error: Invalid format for filter \"" + filter + + "\". can be one of [EQUALS,MIN,MAX]. Ignoring filter passed"); + } else { + String[] subfields = fieldValue[0].split("\\."); + getFilterSplit(Arrays.asList(subfields), fieldsFilterSplitMap, filter); + } + } + } + } + for (ByteArrayKeyValue byteArrayKeyValue : batch) { StringBuilder sb = new StringBuilder(); if (!(sequenceId == FIRST_SEQUENCE_ID && results.isEmpty())) { @@ -552,9 +609,14 @@ public Void call() { Object o = dbColumnFamilyDefinition.getValueCodec() .fromPersistedFormat(byteArrayKeyValue.getValue()); + if (valueFilter != null && + !checkFilteredObject(o, dbColumnFamilyDefinition.getValueType(), fieldsFilterSplitMap)) { + // the record doesn't pass the filter + continue; + } if (valueFields != null) { Map filteredValue = new HashMap<>(); - filteredValue.putAll(getFilteredObject(o, dbColumnFamilyDefinition.getValueType(), fieldsSplitMap)); + filteredValue.putAll(getFieldsFilteredObject(o, dbColumnFamilyDefinition.getValueType(), fieldsSplitMap)); sb.append(WRITER.writeValueAsString(filteredValue)); } else { sb.append(WRITER.writeValueAsString(o)); @@ -570,7 +632,92 @@ public Void call() { return null; } - Map getFilteredObject(Object obj, Class clazz, Map fieldsSplitMap) { + boolean checkFilteredObject(Object obj, Class clazz, Map fieldsSplitMap) + throws IOException { + for (Map.Entry field : fieldsSplitMap.entrySet()) { + try { + Field valueClassField = getRequiredFieldFromAllFields(clazz, field.getKey()); + Object valueObject = valueClassField.get(obj); + Filter fieldValue = field.getValue(); + + if (valueObject == null) { + // there is no such field in the record. This filter will be ignored for the current record. + continue; + } + if (fieldValue == null) { + err().println("Malformed filter. Check input"); + throw new IOException("Invalid filter passed"); + } else if (fieldValue.getNextLevel() == null) { + // reached the end of fields hierarchy, check if they match the filter + // Currently, only equals operation is supported + if (Filter.FilterOperator.EQUALS.equals(fieldValue.getOperator()) && + !String.valueOf(valueObject).equals(fieldValue.getValue())) { + return false; + } else if (!Filter.FilterOperator.EQUALS.equals(fieldValue.getOperator())) { + err().println("Only EQUALS operator is supported currently."); + throw new IOException("Invalid filter passed"); + } + } else { + Map subfields = fieldValue.getNextLevel(); + if (Collection.class.isAssignableFrom(valueObject.getClass())) { + if (!checkFilteredObjectCollection((Collection) valueObject, subfields)) { + return false; + } + } else if (Map.class.isAssignableFrom(valueObject.getClass())) { + Map valueObjectMap = (Map) valueObject; + boolean flag = false; + for (Map.Entry ob : valueObjectMap.entrySet()) { + boolean subflag; + if (Collection.class.isAssignableFrom(ob.getValue().getClass())) { + subflag = checkFilteredObjectCollection((Collection)ob.getValue(), subfields); + } else { + subflag = checkFilteredObject(ob.getValue(), ob.getValue().getClass(), subfields); + } + if (subflag) { + // atleast one item in the map/list of the record has matched the filter, + // so record passes the filter. + flag = true; + break; + } + } + if (!flag) { + // none of the items in the map/list passed the filter => record doesn't pass the filter + return false; + } + } else { + if (!checkFilteredObject(valueObject, valueClassField.getType(), subfields)) { + return false; + } + } + } + } catch (NoSuchFieldException ex) { + err().println("ERROR: no such field: " + field); + exception = true; + return false; + } catch (IllegalAccessException e) { + err().println("ERROR: Cannot get field from object: " + field); + exception = true; + return false; + } catch (Exception ex) { + err().println("ERROR: field: " + field + ", ex: " + ex); + exception = true; + return false; + } + } + return true; + } + + boolean checkFilteredObjectCollection(Collection valueObject, Map fields) + throws NoSuchFieldException, IllegalAccessException, IOException { + for (Object ob : valueObject) { + if (checkFilteredObject(ob, ob.getClass(), fields)) { + return true; + } + } + return false; + } + + Map getFieldsFilteredObject(Object obj, Class clazz, Map fieldsSplitMap) { Map valueMap = new HashMap<>(); for (Map.Entry field : fieldsSplitMap.entrySet()) { try { @@ -583,7 +730,7 @@ Map getFilteredObject(Object obj, Class clazz, Map subfieldObjectsList = - getFilteredObjectCollection((Collection) valueObject, subfields); + getFieldsFilteredObjectCollection((Collection) valueObject, subfields); valueMap.put(field.getKey(), subfieldObjectsList); } else if (Map.class.isAssignableFrom(valueObject.getClass())) { Map subfieldObjectsMap = new HashMap<>(); @@ -591,16 +738,16 @@ Map getFilteredObject(Object obj, Class clazz, Map ob : valueObjectMap.entrySet()) { Object subfieldValue; if (Collection.class.isAssignableFrom(ob.getValue().getClass())) { - subfieldValue = getFilteredObjectCollection((Collection)ob.getValue(), subfields); + subfieldValue = getFieldsFilteredObjectCollection((Collection)ob.getValue(), subfields); } else { - subfieldValue = getFilteredObject(ob.getValue(), ob.getValue().getClass(), subfields); + subfieldValue = getFieldsFilteredObject(ob.getValue(), ob.getValue().getClass(), subfields); } subfieldObjectsMap.put(ob.getKey(), subfieldValue); } valueMap.put(field.getKey(), subfieldObjectsMap); } else { valueMap.put(field.getKey(), - getFilteredObject(valueObject, valueClassField.getType(), subfields)); + getFieldsFilteredObject(valueObject, valueClassField.getType(), subfields)); } } } catch (NoSuchFieldException ex) { @@ -612,11 +759,11 @@ Map getFilteredObject(Object obj, Class clazz, Map getFilteredObjectCollection(Collection valueObject, Map fields) + List getFieldsFilteredObjectCollection(Collection valueObject, Map fields) throws NoSuchFieldException, IllegalAccessException { List subfieldObjectsList = new ArrayList<>(); for (Object ob : valueObject) { - Object subfieldValue = getFilteredObject(ob, ob.getClass(), fields); + Object subfieldValue = getFieldsFilteredObject(ob, ob.getClass(), fields); subfieldObjectsList.add(subfieldValue); } return subfieldObjectsList; diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/utils/Filter.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/utils/Filter.java new file mode 100644 index 00000000000..129e1a6158d --- /dev/null +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/utils/Filter.java @@ -0,0 +1,107 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.utils; + +import java.util.Map; + +/** + * Represent class which has info of what operation and value a set of records should be filtered with. + */ +public class Filter { + private FilterOperator operator; + private Object value; + private Map nextLevel = null; + + public Filter() { + this.operator = null; + this.value = null; + } + + public Filter(FilterOperator operator, Object value) { + this.operator = operator; + this.value = value; + } + + public Filter(String op, Object value) { + this.operator = getFilterOperator(op); + this.value = value; + } + + public Filter(FilterOperator operator, Object value, Map next) { + this.operator = operator; + this.value = value; + this.nextLevel = next; + } + + public Filter(String op, Object value, Map next) { + this.operator = getFilterOperator(op); + this.value = value; + this.nextLevel = next; + } + + public FilterOperator getOperator() { + return operator; + } + + public void setOperator(FilterOperator operator) { + this.operator = operator; + } + + public Object getValue() { + return value; + } + + public void setValue(Object value) { + this.value = value; + } + + public Map getNextLevel() { + return nextLevel; + } + + public void setNextLevel(Map nextLevel) { + this.nextLevel = nextLevel; + } + + public FilterOperator getFilterOperator(String op) { + if (op.equalsIgnoreCase("equals")) { + return FilterOperator.EQUALS; + } else if (op.equalsIgnoreCase("max")) { + return FilterOperator.MAX; + } else if (op.equalsIgnoreCase("min")) { + return FilterOperator.MIN; + } else { + return null; + } + } + + @Override + public String toString() { + return "(" + operator + "," + value + "," + nextLevel + ")"; + } + + /** + * Operation of the filter. + */ + public enum FilterOperator { + EQUALS, + MAX, + MIN; + } +} From c365aa025fafdca042d920b7292fd6828b083a3f Mon Sep 17 00:00:00 2001 From: Nandakumar Vadivelu Date: Tue, 17 Sep 2024 22:42:31 +0530 Subject: [PATCH 22/43] HDDS-11181. Cleanup of unnecessary try-catch blocks (#7210) --- ...eManagerProtocolClientSideTranslatorPB.java | 8 ++------ .../hadoop/ozone/om/OzoneManagerUtils.java | 8 ++------ .../hadoop/ozone/s3/OzoneClientCache.java | 2 -- .../hadoop/ozone/freon/RandomKeyGenerator.java | 18 +++--------------- .../shell/volume/DeleteVolumeHandler.java | 6 +----- 5 files changed, 8 insertions(+), 34 deletions(-) diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OzoneManagerProtocolClientSideTranslatorPB.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OzoneManagerProtocolClientSideTranslatorPB.java index bdd1428b16d..432b55051da 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OzoneManagerProtocolClientSideTranslatorPB.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OzoneManagerProtocolClientSideTranslatorPB.java @@ -2117,12 +2117,8 @@ public OzoneFileStatus getFileStatus(OmKeyArgs args) throws IOException { .setGetFileStatusRequest(req) .build(); - final GetFileStatusResponse resp; - try { - resp = handleError(submitRequest(omRequest)).getGetFileStatusResponse(); - } catch (IOException e) { - throw e; - } + final GetFileStatusResponse resp = handleError(submitRequest(omRequest)) + .getGetFileStatusResponse(); return OzoneFileStatus.getFromProtobuf(resp.getStatus()); } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManagerUtils.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManagerUtils.java index 5a4ff643157..2301bbbdbf2 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManagerUtils.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManagerUtils.java @@ -164,12 +164,8 @@ private static OmBucketInfo resolveBucketInfoLink( * buck-src has the actual BucketLayout that will be used by the * links. */ - try { - return resolveBucketInfoLink(metadataManager, - buckInfo.getSourceVolume(), buckInfo.getSourceBucket(), visited); - } catch (IOException e) { - throw e; - } + return resolveBucketInfoLink(metadataManager, buckInfo.getSourceVolume(), + buckInfo.getSourceBucket(), visited); } return buckInfo; } diff --git a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/OzoneClientCache.java b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/OzoneClientCache.java index 4f08527668c..7614c4933a8 100644 --- a/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/OzoneClientCache.java +++ b/hadoop-ozone/s3gateway/src/main/java/org/apache/hadoop/ozone/s3/OzoneClientCache.java @@ -154,8 +154,6 @@ private void setCertificate(String omServiceID, } } catch (CertificateException ce) { throw new IOException(ce); - } catch (IOException e) { - throw e; } finally { if (certClient != null) { certClient.close(); diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/RandomKeyGenerator.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/RandomKeyGenerator.java index c964676f266..58b62d22b98 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/RandomKeyGenerator.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/RandomKeyGenerator.java @@ -355,11 +355,7 @@ public Void call() throws Exception { // wait until all keys are added or exception occurred. while ((numberOfKeysAdded.get() != totalKeyCount) && exception == null) { - try { - Thread.sleep(CHECK_INTERVAL_MILLIS); - } catch (InterruptedException e) { - throw e; - } + Thread.sleep(CHECK_INTERVAL_MILLIS); } executor.shutdown(); executor.awaitTermination(Integer.MAX_VALUE, TimeUnit.MILLISECONDS); @@ -373,11 +369,7 @@ public Void call() throws Exception { if (validateExecutor != null) { while (!validationQueue.isEmpty()) { - try { - Thread.sleep(CHECK_INTERVAL_MILLIS); - } catch (InterruptedException e) { - throw e; - } + Thread.sleep(CHECK_INTERVAL_MILLIS); } validateExecutor.shutdown(); validateExecutor.awaitTermination(Integer.MAX_VALUE, @@ -421,11 +413,7 @@ private void doCleanObjects() throws InterruptedException { // wait until all Buckets are cleaned or exception occurred. while ((numberOfBucketsCleaned.get() != totalBucketCount) && exception == null) { - try { - Thread.sleep(CHECK_INTERVAL_MILLIS); - } catch (InterruptedException e) { - throw e; - } + Thread.sleep(CHECK_INTERVAL_MILLIS); } } catch (InterruptedException e) { LOG.error("Failed to wait until all Buckets are cleaned", e); diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/volume/DeleteVolumeHandler.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/volume/DeleteVolumeHandler.java index 8cc80502386..00270310737 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/volume/DeleteVolumeHandler.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/shell/volume/DeleteVolumeHandler.java @@ -227,11 +227,7 @@ private void doCleanBuckets() throws InterruptedException { // wait until all Buckets are cleaned or exception occurred. while (numberOfBucketsCleaned.get() != totalBucketCount && exception == null) { - try { - Thread.sleep(100); - } catch (InterruptedException e) { - throw e; - } + Thread.sleep(100); } } catch (InterruptedException e) { LOG.error("Failed to wait until all Buckets are cleaned", e); From 7a26afff5a06da2876d53cce69bc79efa1953251 Mon Sep 17 00:00:00 2001 From: Abhishek Pal <43001336+devabhishekpal@users.noreply.github.com> Date: Wed, 18 Sep 2024 13:50:00 +0530 Subject: [PATCH 23/43] HDDS-11158. Improve Pipelines page UI (#7171) --- .../src/v2/components/search/search.tsx | 2 + .../src/v2/components/tables/bucketsTable.tsx | 267 ++++++++++++++++++ .../v2/components/tables/pipelinesTable.tsx | 211 ++++++++++++++ .../src/v2/components/tables/volumesTable.tsx | 179 ++++++++++++ .../src/v2/pages/buckets/buckets.tsx | 236 +--------------- .../src/v2/pages/pipelines/pipelines.less | 48 ++++ .../src/v2/pages/pipelines/pipelines.tsx | 160 +++++++++++ .../src/v2/pages/volumes/volumes.tsx | 147 +--------- .../ozone-recon-web/src/v2/routes-v2.tsx | 5 + .../src/v2/types/bucket.types.ts | 12 +- .../src/v2/types/pipelines.types.ts | 62 ++++ .../src/v2/types/volume.types.ts | 10 +- .../src/v2/utils/momentUtils.ts | 63 +++++ 13 files changed, 1039 insertions(+), 363 deletions(-) create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/tables/bucketsTable.tsx create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/tables/pipelinesTable.tsx create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/tables/volumesTable.tsx create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/pipelines/pipelines.less create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/pipelines/pipelines.tsx create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/types/pipelines.types.ts create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/utils/momentUtils.ts diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/search/search.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/search/search.tsx index 8cac2a9c047..d320fd659a6 100644 --- a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/search/search.tsx +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/search/search.tsx @@ -20,6 +20,7 @@ import React from 'react'; import { Input, Select } from 'antd'; import { Option } from '@/v2/components/select/singleSelect'; +import { DownOutlined } from '@ant-design/icons'; // ------------- Types -------------- // type SearchProps = { @@ -51,6 +52,7 @@ const Search: React.FC = ({ const selectFilter = searchColumn ? (
+ + ) +} + +export default BucketsTable; \ No newline at end of file diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/tables/pipelinesTable.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/tables/pipelinesTable.tsx new file mode 100644 index 00000000000..6c07749436d --- /dev/null +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/tables/pipelinesTable.tsx @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import React from 'react'; + +import Table, { + ColumnsType, + TablePaginationConfig +} from 'antd/es/table'; +import Tooltip from 'antd/es/tooltip'; +import { InfoCircleOutlined } from '@ant-design/icons'; + +import { ReplicationIcon } from '@/utils/themeIcons'; +import { getDurationFromTimestamp, getTimeDiffFromTimestamp } from '@/v2/utils/momentUtils'; +import { Pipeline, PipelinesTableProps, PipelineStatusList } from '@/v2/types/pipelines.types'; + + +// TODO: When Datanodes PR gets merged remove these declarations +// And import from datanodes.types + +type SummaryDatanodeDetails = { + level: number; + parent: unknown | null; + cost: number; + uuid: string; + uuidString: string; + ipAddress: string; + hostName: string; + ports: { + name: string; + value: number + }[]; + certSerialId: null, + version: string | null; + setupTime: number; + revision: string | null; + buildDate: string; + persistedOpState: string; + persistedOpStateExpiryEpochSec: number; + initialVersion: number; + currentVersion: number; + signature: number; + decommissioned: boolean; + networkName: string; + networkLocation: string; + networkFullPath: string; + numOfLeaves: number; +} + +export const COLUMNS: ColumnsType = [ + { + title: 'Pipeline ID', + dataIndex: 'pipelineId', + key: 'pipelineId', + sorter: (a: Pipeline, b: Pipeline) => a.pipelineId.localeCompare(b.pipelineId), + + }, + { + title: 'Replication Type & Factor', + dataIndex: 'replicationType', + key: 'replicationType', + render: (replicationType: string, record: Pipeline) => { + const replicationFactor = record.replicationFactor; + return ( + + + {replicationType} ({replicationFactor}) + + ); + }, + sorter: (a: Pipeline, b: Pipeline) => + (a.replicationType + a.replicationFactor.toString()).localeCompare(b.replicationType + b.replicationFactor.toString()), + defaultSortOrder: 'descend' as const + }, + { + title: 'Status', + dataIndex: 'status', + key: 'status', + filterMultiple: true, + filters: PipelineStatusList.map(status => ({ text: status, value: status })), + onFilter: (value, record: Pipeline) => record.status === value, + sorter: (a: Pipeline, b: Pipeline) => a.status.localeCompare(b.status) + }, + { + title: 'Containers', + dataIndex: 'containers', + key: 'containers', + sorter: (a: Pipeline, b: Pipeline) => a.containers - b.containers + }, + { + title: 'Datanodes', + dataIndex: 'datanodes', + key: 'datanodes', + render: (datanodes: SummaryDatanodeDetails[]) => ( +
+ {datanodes.map(datanode => ( +
+ triggerNode}> + {datanode?.hostName ?? 'N/A'} + +
+ ))} +
+ ) + }, + { + title: 'Leader', + dataIndex: 'leaderNode', + key: 'leaderNode', + sorter: (a: Pipeline, b: Pipeline) => a.leaderNode.localeCompare(b.leaderNode) + }, + { + title: () => ( + + Last Leader Election  + + + + + ), + dataIndex: 'lastLeaderElection', + key: 'lastLeaderElection', + render: (lastLeaderElection: number) => lastLeaderElection > 0 ? + getTimeDiffFromTimestamp(lastLeaderElection) : 'NA', + sorter: (a: Pipeline, b: Pipeline) => a.lastLeaderElection - b.lastLeaderElection + }, + { + title: 'Lifetime', + dataIndex: 'duration', + key: 'duration', + render: (duration: number) => getDurationFromTimestamp(duration), + sorter: (a: Pipeline, b: Pipeline) => a.duration - b.duration + }, + { + title: () => ( + + No. of Elections  + + + + + ), + dataIndex: 'leaderElections', + key: 'leaderElections', + render: (leaderElections: number) => leaderElections > 0 ? + leaderElections : 'NA', + sorter: (a: Pipeline, b: Pipeline) => a.leaderElections - b.leaderElections + } +]; + +const PipelinesTable: React.FC = ({ + loading = false, + data, + selectedColumns, + searchTerm = '' +}) => { + const paginationConfig: TablePaginationConfig = { + showTotal: (total: number, range) => `${range[0]}-${range[1]} of ${total} pipelines`, + showSizeChanger: true, + }; + + function filterSelectedColumns() { + const columnKeys = selectedColumns.map((column) => column.value); + return COLUMNS.filter( + (column) => columnKeys.indexOf(column.key as string) >= 0 + ) + } + + function getFilteredData(data: Pipeline[]) { + return data.filter( + (pipeline: Pipeline) => pipeline['pipelineId'].includes(searchTerm) + ) + } + + return ( +
+
+ + ) +} + +export default PipelinesTable; \ No newline at end of file diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/tables/volumesTable.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/tables/volumesTable.tsx new file mode 100644 index 00000000000..4de0d713fce --- /dev/null +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/tables/volumesTable.tsx @@ -0,0 +1,179 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import QuotaBar from '@/components/quotaBar/quotaBar'; +import { byteToSize } from '@/utils/common'; +import { Volume, VolumesTableProps } from '@/v2/types/volume.types'; +import Table, { ColumnsType, ColumnType, TablePaginationConfig } from 'antd/es/table'; +import moment from 'moment'; +import React from 'react'; +import { Link } from 'react-router-dom'; + +export const COLUMNS: ColumnsType = [ + { + title: 'Volume', + dataIndex: 'volume', + key: 'volume', + sorter: (a: Volume, b: Volume) => a.volume.localeCompare(b.volume), + defaultSortOrder: 'ascend' as const, + width: '15%' + }, + { + title: 'Owner', + dataIndex: 'owner', + key: 'owner', + sorter: (a: Volume, b: Volume) => a.owner.localeCompare(b.owner) + }, + { + title: 'Admin', + dataIndex: 'admin', + key: 'admin', + sorter: (a: Volume, b: Volume) => a.admin.localeCompare(b.admin) + }, + { + title: 'Creation Time', + dataIndex: 'creationTime', + key: 'creationTime', + sorter: (a: Volume, b: Volume) => a.creationTime - b.creationTime, + render: (creationTime: number) => { + return creationTime > 0 ? moment(creationTime).format('ll LTS') : 'NA'; + } + }, + { + title: 'Modification Time', + dataIndex: 'modificationTime', + key: 'modificationTime', + sorter: (a: Volume, b: Volume) => a.modificationTime - b.modificationTime, + render: (modificationTime: number) => { + return modificationTime > 0 ? moment(modificationTime).format('ll LTS') : 'NA'; + } + }, + { + title: 'Quota (Size)', + dataIndex: 'quotaInBytes', + key: 'quotaInBytes', + render: (quotaInBytes: number) => { + return quotaInBytes && quotaInBytes !== -1 ? byteToSize(quotaInBytes, 3) : 'NA'; + } + }, + { + title: 'Namespace Capacity', + key: 'namespaceCapacity', + sorter: (a: Volume, b: Volume) => a.usedNamespace - b.usedNamespace, + render: (text: string, record: Volume) => ( + + ) + }, +]; + +const VolumesTable: React.FC = ({ + loading = false, + data, + handleAclClick, + selectedColumns, + searchColumn = 'volume', + searchTerm = '' +}) => { + + React.useEffect(() => { + // On table mount add the actions column + console.log("Adding new column"); + const actionsColumn: ColumnType = { + title: 'Actions', + key: 'actions', + render: (_: any, record: Volume) => { + const searchParams = new URLSearchParams(); + searchParams.append('volume', record.volume); + + return ( + <> + + Show buckets + + handleAclClick(record)}> + Show ACL + + + ); + } + } + + if (COLUMNS.length > 0 && COLUMNS[COLUMNS.length - 1].key !== 'actions') { + // Push the ACL column for initial + COLUMNS.push(actionsColumn); + selectedColumns.push({ + label: actionsColumn.title as string, + value: actionsColumn.key as string + }); + } else { + // Replace old ACL column with new ACL column with correct reference + // e.g. After page is reloaded / redirect from other page + COLUMNS[COLUMNS.length - 1] = actionsColumn; + selectedColumns[selectedColumns.length - 1] = { + label: actionsColumn.title as string, + value: actionsColumn.key as string + } + } + + }, []); + + function filterSelectedColumns() { + const columnKeys = selectedColumns.map((column) => column.value); + return COLUMNS.filter( + (column) => columnKeys.indexOf(column.key as string) >= 0 + ) + } + + function getFilteredData(data: Volume[]) { + return data.filter( + (volume: Volume) => volume[searchColumn].includes(searchTerm) + ); + } + + const paginationConfig: TablePaginationConfig = { + showTotal: (total: number, range) => `${range[0]}-${range[1]} of ${total} volumes`, + showSizeChanger: true + }; + + return ( +
+
+ + ) +} + +export default VolumesTable; \ No newline at end of file diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/buckets/buckets.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/buckets/buckets.tsx index bd8950e54c8..12af3bb4281 100644 --- a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/buckets/buckets.tsx +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/buckets/buckets.tsx @@ -18,25 +18,9 @@ import React, { useEffect, useState } from 'react'; import moment from 'moment'; -import { Table, Tag } from 'antd'; -import { - ColumnProps, - ColumnsType, - TablePaginationConfig -} from 'antd/es/table'; -import { - CheckCircleOutlined, - CloseCircleOutlined, - CloudServerOutlined, - FileUnknownOutlined, - HddOutlined, - LaptopOutlined, - SaveOutlined -} from '@ant-design/icons'; import { ValueType } from 'react-select'; import { useLocation } from 'react-router-dom'; -import QuotaBar from '@/components/quotaBar/quotaBar'; import AutoReloadPanel from '@/components/autoReloadPanel/autoReloadPanel'; import AclPanel from '@/v2/components/aclDrawer/aclDrawer'; import Search from '@/v2/components/search/search'; @@ -45,20 +29,17 @@ import SingleSelect, { Option } from '@/v2/components/select/singleSelect'; import { AutoReloadHelper } from '@/utils/autoReloadHelper'; import { AxiosGetHelper } from "@/utils/axiosRequestHelper"; -import { nullAwareLocaleCompare, showDataFetchError } from '@/utils/common'; +import { showDataFetchError } from '@/utils/common'; import { useDebounce } from '@/v2/hooks/debounce.hook'; import { Bucket, - BucketLayout, - BucketLayoutTypeList, BucketResponse, BucketsState, - BucketStorage, - BucketStorageTypeList } from '@/v2/types/bucket.types'; import './buckets.less'; +import BucketsTable, { COLUMNS } from '@/v2/components/tables/bucketsTable'; const LIMIT_OPTIONS: Option[] = [ @@ -80,38 +61,6 @@ const LIMIT_OPTIONS: Option[] = [ } ] -const renderIsVersionEnabled = (isVersionEnabled: boolean) => { - return isVersionEnabled - ? - : -}; - -const renderStorageType = (bucketStorage: BucketStorage) => { - const bucketStorageIconMap: Record = { - RAM_DISK: , - SSD: , - DISK: , - ARCHIVE: - }; - const icon = bucketStorage in bucketStorageIconMap - ? bucketStorageIconMap[bucketStorage] - : ; - return {icon} {bucketStorage}; -}; - -const renderBucketLayout = (bucketLayout: BucketLayout) => { - const bucketLayoutColorMap = { - FILE_SYSTEM_OPTIMIZED: 'green', - OBJECT_STORE: 'orange', - LEGACY: 'blue' - }; - const color = bucketLayout in bucketLayoutColorMap ? - bucketLayoutColorMap[bucketLayout] : ''; - return {bucketLayout}; -}; - const SearchableColumnOpts = [{ label: 'Bucket', value: 'name' @@ -120,113 +69,6 @@ const SearchableColumnOpts = [{ value: 'volumeName' }] -const COLUMNS: ColumnsType = [ - { - title: 'Bucket', - dataIndex: 'name', - key: 'name', - sorter: (a: Bucket, b: Bucket) => a.name.localeCompare(b.name), - defaultSortOrder: 'ascend' as const - }, - { - title: 'Volume', - dataIndex: 'volumeName', - key: 'volumeName', - sorter: (a: Bucket, b: Bucket) => a.volumeName.localeCompare(b.volumeName), - defaultSortOrder: 'ascend' as const - }, - { - title: 'Owner', - dataIndex: 'owner', - key: 'owner', - sorter: (a: Bucket, b: Bucket) => nullAwareLocaleCompare(a.owner, b.owner) - }, - { - title: 'Versioning', - dataIndex: 'versioning', - key: 'isVersionEnabled', - render: (isVersionEnabled: boolean) => renderIsVersionEnabled(isVersionEnabled) - }, - { - title: 'Storage Type', - dataIndex: 'storageType', - key: 'storageType', - filterMultiple: true, - filters: BucketStorageTypeList.map(state => ({ text: state, value: state })), - onFilter: (value, record: Bucket) => record.storageType === value, - sorter: (a: Bucket, b: Bucket) => a.storageType.localeCompare(b.storageType), - render: (storageType: BucketStorage) => renderStorageType(storageType) - }, - { - title: 'Bucket Layout', - dataIndex: 'bucketLayout', - key: 'bucketLayout', - filterMultiple: true, - filters: BucketLayoutTypeList.map(state => ({ text: state, value: state })), - onFilter: (value, record: Bucket) => record.bucketLayout === value, - sorter: (a: Bucket, b: Bucket) => a.bucketLayout.localeCompare(b.bucketLayout), - render: (bucketLayout: BucketLayout) => renderBucketLayout(bucketLayout) - }, - { - title: 'Creation Time', - dataIndex: 'creationTime', - key: 'creationTime', - sorter: (a: Bucket, b: Bucket) => a.creationTime - b.creationTime, - render: (creationTime: number) => { - return creationTime > 0 ? moment(creationTime).format('ll LTS') : 'NA'; - } - }, - { - title: 'Modification Time', - dataIndex: 'modificationTime', - key: 'modificationTime', - sorter: (a: Bucket, b: Bucket) => a.modificationTime - b.modificationTime, - render: (modificationTime: number) => { - return modificationTime > 0 ? moment(modificationTime).format('ll LTS') : 'NA'; - } - }, - { - title: 'Storage Capacity', - key: 'quotaCapacityBytes', - sorter: (a: Bucket, b: Bucket) => a.usedBytes - b.usedBytes, - render: (text: string, record: Bucket) => ( - - ) - }, - { - title: 'Namespace Capacity', - key: 'namespaceCapacity', - sorter: (a: Bucket, b: Bucket) => a.usedNamespace - b.usedNamespace, - render: (text: string, record: Bucket) => ( - - ) - }, - { - title: 'Source Volume', - dataIndex: 'sourceVolume', - key: 'sourceVolume', - render: (sourceVolume: string) => { - return sourceVolume ? sourceVolume : 'NA'; - } - }, - { - title: 'Source Bucket', - dataIndex: 'sourceBucket', - key: 'sourceBucket', - render: (sourceBucket: string) => { - return sourceBucket ? sourceBucket : 'NA'; - } - } -]; - const defaultColumns = COLUMNS.map(column => ({ label: column.title as string, value: column.key as string @@ -291,21 +133,10 @@ const Buckets: React.FC<{}> = () => { const debouncedSearch = useDebounce(searchTerm, 300); const { search } = useLocation(); - const paginationConfig: TablePaginationConfig = { - showTotal: (total: number, range) => `${range[0]}-${range[1]} of ${total} buckets`, - showSizeChanger: true - }; - function getVolumeSearchParam() { return new URLSearchParams(search).get('volume'); }; - function getFilteredData(data: Bucket[]) { - return data.filter( - (bucket: Bucket) => bucket[searchColumn].includes(debouncedSearch) - ); - } - function handleVolumeChange(selected: ValueType) { const { volumeBucketMap } = state; const volumeSelections = (selected as Option[]); @@ -327,50 +158,6 @@ const Buckets: React.FC<{}> = () => { setShowPanel(true); } - function filterSelectedColumns() { - const columnKeys = selectedColumns.map((column) => column.value); - return COLUMNS.filter( - (column) => columnKeys.indexOf(column.key as string) >= 0 - ) - } - - function addAclColumn() { - // Inside the class component to access the React internal state - const aclLinkColumn: ColumnProps = { - title: 'ACLs', - dataIndex: 'acls', - key: 'acls', - render: (_: any, record: Bucket) => { - return ( - { - handleAclLinkClick(record); - }} - > - Show ACL - - ); - } - }; - - if (COLUMNS.length > 0 && COLUMNS[COLUMNS.length - 1].key !== 'acls') { - // Push the ACL column for initial - COLUMNS.push(aclLinkColumn); - } else { - // Replace old ACL column with new ACL column with correct reference - // e.g. After page is reloaded / redirect from other page - COLUMNS[COLUMNS.length - 1] = aclLinkColumn; - } - - if (defaultColumns.length > 0 && defaultColumns[defaultColumns.length - 1].label !== 'acls') { - defaultColumns.push({ - label: aclLinkColumn.title as string, - value: aclLinkColumn.key as string - }); - } - }; - function handleColumnChange(selected: ValueType) { setSelectedColumns(selected as Option[]); } @@ -447,7 +234,6 @@ const Buckets: React.FC<{}> = () => { useEffect(() => { autoReloadHelper.startPolling(); - addAclColumn(); const initialVolume = getVolumeSearchParam(); if (initialVolume) { setSelectedVolumes([{ @@ -537,17 +323,13 @@ const Buckets: React.FC<{}> = () => { setSearchColumn(value as 'name' | 'volumeName'); }} /> -
-
- + ({ + label: (typeof column.title === 'string') + ? column.title + : (column.title as Function)().props.children[0], + value: column.key as string, +})); + +const Pipelines: React.FC<{}> = () => { + const cancelSignal = useRef(); + + const [state, setState] = useState({ + activeDataSource: [], + columnOptions: defaultColumns, + lastUpdated: 0, + }); + const [loading, setLoading] = useState(false); + const [selectedColumns, setSelectedColumns] = useState(defaultColumns); + const [searchTerm, setSearchTerm] = useState(''); + + const debouncedSearch = useDebounce(searchTerm, 300); + + const loadData = () => { + setLoading(true); + //Cancel any previous requests + cancelRequests([cancelSignal.current!]); + + const { request, controller } = AxiosGetHelper( + '/api/v1/pipelines', + cancelSignal.current + ); + + cancelSignal.current = controller; + request.then(response => { + const pipelinesResponse: PipelinesResponse = response.data; + const pipelines: Pipeline[] = pipelinesResponse?.pipelines ?? {}; + setState({ + ...state, + activeDataSource: pipelines, + lastUpdated: Number(moment()) + }) + setLoading(false); + }).catch(error => { + setLoading(false); + showDataFetchError(error.toString()); + }) + } + + const autoReloadHelper: AutoReloadHelper = new AutoReloadHelper(loadData); + + useEffect(() => { + autoReloadHelper.startPolling(); + loadData(); + return (() => { + autoReloadHelper.stopPolling(); + cancelRequests([cancelSignal.current!]); + }) + }, []); + + function handleColumnChange(selected: ValueType) { + setSelectedColumns(selected as Option[]); + } + + const { + activeDataSource, + columnOptions, + lastUpdated + } = state; + + return ( + <> +
+ Pipelines + +
+
+
+
+
+ { }} + fixedColumn='pipelineId' + columnLength={COLUMNS.length} /> +
+ ) => setSearchTerm(e.target.value) + } + onChange={() => { }} /> +
+ +
+
+ + ); +} +export default Pipelines; \ No newline at end of file diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/volumes/volumes.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/volumes/volumes.tsx index 605883caff9..cb25cedbcec 100644 --- a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/volumes/volumes.tsx +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/volumes/volumes.tsx @@ -18,22 +18,16 @@ import React, { useEffect, useRef, useState } from 'react'; import moment from 'moment'; -import { Table } from 'antd'; -import { Link } from 'react-router-dom'; -import { - TablePaginationConfig, - ColumnsType -} from 'antd/es/table'; import { ValueType } from 'react-select/src/types'; -import QuotaBar from '@/components/quotaBar/quotaBar'; import AclPanel from '@/v2/components/aclDrawer/aclDrawer'; import AutoReloadPanel from '@/components/autoReloadPanel/autoReloadPanel'; -import MultiSelect, { Option } from '@/v2/components/select/multiSelect'; import SingleSelect from '@/v2/components/select/singleSelect'; +import MultiSelect, { Option } from '@/v2/components/select/multiSelect'; +import VolumesTable, { COLUMNS } from '@/v2/components/tables/volumesTable'; import Search from '@/v2/components/search/search'; -import { byteToSize, showDataFetchError } from '@/utils/common'; +import { showDataFetchError } from '@/utils/common'; import { AutoReloadHelper } from '@/utils/autoReloadHelper'; import { AxiosGetHelper, cancelRequests } from "@/utils/axiosRequestHelper"; import { useDebounce } from '@/v2/hooks/debounce.hook'; @@ -72,93 +66,6 @@ const Volumes: React.FC<{}> = () => { const cancelSignal = useRef(); - const COLUMNS: ColumnsType = [ - { - title: 'Volume', - dataIndex: 'volume', - key: 'volume', - sorter: (a: Volume, b: Volume) => a.volume.localeCompare(b.volume), - defaultSortOrder: 'ascend' as const, - width: '15%' - }, - { - title: 'Owner', - dataIndex: 'owner', - key: 'owner', - sorter: (a: Volume, b: Volume) => a.owner.localeCompare(b.owner) - }, - { - title: 'Admin', - dataIndex: 'admin', - key: 'admin', - sorter: (a: Volume, b: Volume) => a.admin.localeCompare(b.admin) - }, - { - title: 'Creation Time', - dataIndex: 'creationTime', - key: 'creationTime', - sorter: (a: Volume, b: Volume) => a.creationTime - b.creationTime, - render: (creationTime: number) => { - return creationTime > 0 ? moment(creationTime).format('ll LTS') : 'NA'; - } - }, - { - title: 'Modification Time', - dataIndex: 'modificationTime', - key: 'modificationTime', - sorter: (a: Volume, b: Volume) => a.modificationTime - b.modificationTime, - render: (modificationTime: number) => { - return modificationTime > 0 ? moment(modificationTime).format('ll LTS') : 'NA'; - } - }, - { - title: 'Quota (Size)', - dataIndex: 'quotaInBytes', - key: 'quotaInBytes', - render: (quotaInBytes: number) => { - return quotaInBytes && quotaInBytes !== -1 ? byteToSize(quotaInBytes, 3) : 'NA'; - } - }, - { - title: 'Namespace Capacity', - key: 'namespaceCapacity', - sorter: (a: Volume, b: Volume) => a.usedNamespace - b.usedNamespace, - render: (text: string, record: Volume) => ( - - ) - }, - { - title: 'Actions', - key: 'actions', - render: (_: any, record: Volume) => { - const searchParams = new URLSearchParams(); - searchParams.append('volume', record.volume); - - return ( - <> - - Show buckets - - handleAclLinkClick(record)}> - Show ACL - - - ); - } - } - ]; - const defaultColumns = COLUMNS.map(column => ({ label: column.title as string, value: column.key as string, @@ -167,10 +74,10 @@ const Volumes: React.FC<{}> = () => { const [state, setState] = useState({ data: [], lastUpdated: 0, - columnOptions: defaultColumns, - currentRow: {} + columnOptions: defaultColumns }); const [loading, setLoading] = useState(false); + const [currentRow, setCurrentRow] = useState>({}); const [selectedColumns, setSelectedColumns] = useState(defaultColumns); const [selectedLimit, setSelectedLimit] = useState
- + import('@/v2/pages/overview/overview')); const Volumes = lazy(() => import('@/v2/pages/volumes/volumes')) const Buckets = lazy(() => import('@/v2/pages/buckets/buckets')); +const Pipelines = lazy(() => import('@/v2/pages/pipelines/pipelines')); export const routesV2 = [ { @@ -33,5 +34,9 @@ export const routesV2 = [ { path: '/Buckets', component: Buckets + }, + { + path: '/Pipelines', + component: Pipelines } ]; diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/types/bucket.types.ts b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/types/bucket.types.ts index 5cfc89d85e6..eb499dc617e 100644 --- a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/types/bucket.types.ts +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/types/bucket.types.ts @@ -17,7 +17,6 @@ */ import { Acl } from "@/v2/types/acl.types"; -import { Option } from "@/v2/components/select/singleSelect"; import { Option as MultiOption } from "@/v2/components/select/multiSelect"; // Corresponds to OzoneManagerProtocolProtos.StorageTypeProto @@ -68,4 +67,13 @@ export type BucketsState = { volumeBucketMap: Map>; bucketsUnderVolume: Bucket[]; volumeOptions: MultiOption[]; -} \ No newline at end of file +} + +export type BucketsTableProps = { + loading: boolean; + data: Bucket[]; + handleAclClick: (arg0: Bucket) => void; + selectedColumns: MultiOption[]; + searchColumn: 'name' | 'volumeName'; + searchTerm: string; +} diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/types/pipelines.types.ts b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/types/pipelines.types.ts new file mode 100644 index 00000000000..7c5a23bc0af --- /dev/null +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/types/pipelines.types.ts @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { Option } from "@/v2/components/select/multiSelect"; + +export const PipelineStatusList = [ + 'OPEN', + 'CLOSING', + 'QUASI_CLOSED', + 'CLOSED', + 'UNHEALTHY', + 'INVALID', + 'DELETED', + 'DORMANT' +] as const; +export type PipelineStatus = typeof PipelineStatusList[number]; + +export type Pipeline = { + pipelineId: string; + status: PipelineStatus; + replicationType: string; + leaderNode: string; + datanodes: string[]; + lastLeaderElection: number; + duration: number; + leaderElections: number; + replicationFactor: string; + containers: number; +} + +export type PipelinesResponse = { + totalCount: number; + pipelines: Pipeline[]; +} + +export type PipelinesState = { + activeDataSource: Pipeline[]; + columnOptions: Option[]; + lastUpdated: number; +} + +export type PipelinesTableProps = { + loading: boolean; + data: Pipeline[]; + selectedColumns: Option[]; + searchTerm: string; +} diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/types/volume.types.ts b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/types/volume.types.ts index 67f007706a4..b808d403584 100644 --- a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/types/volume.types.ts +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/types/volume.types.ts @@ -40,5 +40,13 @@ export type VolumesState = { data: Volume[]; lastUpdated: number; columnOptions: Option[]; - currentRow: Volume | Record; +} + +export type VolumesTableProps = { + loading: boolean; + data: Volume[]; + handleAclClick: (arg0: Volume) => void; + selectedColumns: Option[]; + searchColumn: 'volume' | 'owner' | 'admin'; + searchTerm: string; } diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/utils/momentUtils.ts b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/utils/momentUtils.ts new file mode 100644 index 00000000000..fb553d0db3f --- /dev/null +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/utils/momentUtils.ts @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import moment from "moment"; + +moment.updateLocale('en', { + relativeTime: { + past: '%s ago', + s: '%ds', + m: '1min', + mm: '%dmins', + h: '1hr', + hh: '%dhrs', + d: '1d', + dd: '%dd', + M: '1m', + MM: '%dm', + y: '1y', + yy: '%dy' + } +}); + +export function getTimeDiffFromTimestamp(timestamp: number): string { + const timestampDate = new Date(timestamp); + return moment(timestampDate).fromNow(); +} + +export function getDurationFromTimestamp(timestamp: number): string { + const duration: moment.Duration = moment.duration(timestamp, 'milliseconds'); + // return nothing when the duration is falsy or not correctly parsed (P0D) + if(!duration || duration.toISOString() === "P0D") return ''; + + let elapsedTime = []; + const durationBreakdowns: Record = { + 'y': Math.floor(duration.years()), + 'm': Math.floor(duration.months()), + 'd': Math.floor(duration.days()), + 'h': Math.floor(duration.hours()), + 'min': Math.floor(duration.minutes()), + 's': Math.floor(duration.seconds()) + } + + for (const [key, value] of Object.entries(durationBreakdowns)) { + value > 0 && elapsedTime.push(value + key); + } + + return (elapsedTime.length === 0) ? 'Just now' : elapsedTime.join(' '); +} From 151709a5f713ce4b2fb3ec1f866bb853181f230b Mon Sep 17 00:00:00 2001 From: len548 <63490262+len548@users.noreply.github.com> Date: Wed, 18 Sep 2024 14:02:03 +0200 Subject: [PATCH 24/43] HDDS-11446. Downgrade picocli to 4.7.5 due to regression (#7215) This reverts commit 008602b3a3144e41a8c0c483c729632424baf24f. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index c3834286870..23d096fc0c3 100644 --- a/pom.xml +++ b/pom.xml @@ -183,7 +183,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs 1.0.1 1.9.25 1.11 - 4.7.6 + 4.7.5 0.16.0 0.10.2 From 70b8dd5ea3b433c2f1b7f8816b269a71d9a2466e Mon Sep 17 00:00:00 2001 From: Abhishek Pal <43001336+devabhishekpal@users.noreply.github.com> Date: Thu, 19 Sep 2024 15:10:45 +0530 Subject: [PATCH 25/43] HDDS-11157. Improve Datanodes page UI (#7168) --- .../src/utils/axiosRequestHelper.tsx | 2 +- .../decommissioningSummary.tsx | 139 ++++++++ .../v2/components/storageBar/storageBar.less | 45 +++ .../v2/components/storageBar/storageBar.tsx | 49 +-- .../v2/components/tables/datanodesTable.tsx | 314 ++++++++++++++++++ .../src/v2/pages/buckets/buckets.tsx | 14 +- .../src/v2/pages/datanodes/datanodes.less | 52 +++ .../src/v2/pages/datanodes/datanodes.tsx | 309 +++++++++++++++++ .../ozone-recon-web/src/v2/routes-v2.tsx | 5 + .../src/v2/types/datanode.types.ts | 167 ++++++++++ 10 files changed, 1064 insertions(+), 32 deletions(-) create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/decommissioningSummary/decommissioningSummary.tsx create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/storageBar/storageBar.less create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/tables/datanodesTable.tsx create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/datanodes/datanodes.less create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/datanodes/datanodes.tsx create mode 100644 hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/types/datanode.types.ts diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/utils/axiosRequestHelper.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/utils/axiosRequestHelper.tsx index 8fbe403dc37..53a76d83f19 100644 --- a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/utils/axiosRequestHelper.tsx +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/utils/axiosRequestHelper.tsx @@ -37,7 +37,7 @@ export const AxiosGetHelper = ( export const AxiosPutHelper = ( url: string, data: any = {}, - controller: AbortController, + controller: AbortController | undefined, message: string = '', //optional ): { request: Promise>; controller: AbortController } => { controller && controller.abort(message); diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/decommissioningSummary/decommissioningSummary.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/decommissioningSummary/decommissioningSummary.tsx new file mode 100644 index 00000000000..34e72b0889a --- /dev/null +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/decommissioningSummary/decommissioningSummary.tsx @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import React, { useEffect } from 'react'; +import { AxiosError } from 'axios'; +import { Descriptions, Popover, Result } from 'antd'; +import { SummaryData } from '@/v2/types/datanode.types'; +import { AxiosGetHelper, cancelRequests } from '@/utils/axiosRequestHelper'; +import { showDataFetchError } from '@/utils/common'; +import Spin from 'antd/es/spin'; + +type DecommisioningSummaryProps = { + uuid: string; +} + +type DecommisioningSummaryState = { + loading: boolean; + summaryData: SummaryData | Record; +}; + +function getDescriptions(summaryData: SummaryData): React.ReactElement { + const { + datanodeDetails: { + uuid, + networkLocation, + ipAddress, + hostName + }, + containers: { UnderReplicated, UnClosed }, + metrics: { + decommissionStartTime, + numOfUnclosedPipelines, + numOfUnclosedContainers, + numOfUnderReplicatedContainers + } + } = summaryData; + return ( + + {uuid} + ({networkLocation}/{ipAddress}/{hostName}) + {decommissionStartTime} + {numOfUnclosedPipelines} + {numOfUnclosedContainers} + {numOfUnderReplicatedContainers} + {UnderReplicated} + {UnClosed} + + ); +} + + +const DecommissionSummary: React.FC = ({ + uuid = '' +}) => { + const [state, setState] = React.useState({ + summaryData: {}, + loading: false + }); + const cancelSignal = React.useRef(); + let content = ( + + ); + + async function fetchDecommissionSummary(selectedUuid: string) { + setState({ + ...state, + loading: true + }); + try { + const { request, controller } = AxiosGetHelper( + `/api/v1/datanodes/decommission/info/datanode?uuid=${selectedUuid}`, + cancelSignal.current + ); + cancelSignal.current = controller; + const datanodesInfoResponse = await request; + setState({ + ...state, + loading: false, + summaryData: datanodesInfoResponse?.data?.DatanodesDecommissionInfo[0] ?? {} + }); + } catch (error) { + setState({ + ...state, + loading: false, + summaryData: {} + }); + showDataFetchError((error as AxiosError).toString()); + content = ( + + ) + } + } + + useEffect(() => { + fetchDecommissionSummary(uuid); + return (() => { + cancelRequests([cancelSignal.current!]); + }) + }, []); + + const { summaryData } = state; + if (summaryData?.datanodeDetails + && summaryData?.metrics + && summaryData?.containers + ) { + content = getDescriptions(summaryData as SummaryData); + } + + return ( + +  {uuid} + + ); + +} + +export default DecommissionSummary; diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/storageBar/storageBar.less b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/storageBar/storageBar.less new file mode 100644 index 00000000000..798287366c3 --- /dev/null +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/storageBar/storageBar.less @@ -0,0 +1,45 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +@progress-gray: #d0d0d0; +@progress-light-blue: rgb(230, 235, 248); +@progress-blue: #1890ff; +@progress-green: #52c41a; +@progress-red: #FFA39E; + +.storage-cell-container-v2 { + .capacity-bar-v2 { + font-size: 1em; + } +} + +.ozone-used-bg-v2 { + color: @progress-green !important; +} + +.non-ozone-used-bg-v2 { + color: @progress-blue !important; +} + +.remaining-bg-v2 { + color: @progress-light-blue !important; +} + +.committed-bg-v2 { + color: @progress-red !important; +} diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/storageBar/storageBar.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/storageBar/storageBar.tsx index 591b0088b04..fd6dd8dfe9b 100644 --- a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/storageBar/storageBar.tsx +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/storageBar/storageBar.tsx @@ -20,72 +20,73 @@ import React from 'react'; import { Progress } from 'antd'; import filesize from 'filesize'; import Icon from '@ant-design/icons'; -import { withRouter } from 'react-router-dom'; import Tooltip from 'antd/lib/tooltip'; import { FilledIcon } from '@/utils/themeIcons'; import { getCapacityPercent } from '@/utils/common'; import type { StorageReport } from '@/v2/types/overview.types'; +import './storageBar.less'; + const size = filesize.partial({ standard: 'iec', round: 1 }); type StorageReportProps = { - showMeta: boolean; + showMeta?: boolean; + strokeWidth?: number; } & StorageReport -const StorageBar = (props: StorageReportProps = { - capacity: 0, - used: 0, - remaining: 0, - committed: 0, - showMeta: true, +const StorageBar: React.FC = ({ + capacity = 0, + used = 0, + remaining = 0, + committed = 0, + showMeta = false, + strokeWidth = 3 }) => { - const { capacity, used, remaining, committed, showMeta } = props; const nonOzoneUsed = capacity - remaining - used; const totalUsed = capacity - remaining; const tooltip = ( <>
- + Ozone Used ({size(used)})
- + Non Ozone Used ({size(nonOzoneUsed)})
- + Remaining ({size(remaining)})
- + Container Pre-allocated ({size(committed)})
); - const metaElement = (showMeta) ? ( -
- {size(used + nonOzoneUsed)} / {size(capacity)} -
- ) : <>; - return ( -
- - {metaElement} + + {(showMeta) && +
+ {size(used + nonOzoneUsed)} / {size(capacity)} +
+ } + className='capacity-bar-v2' strokeWidth={strokeWidth} />
-
); } diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/tables/datanodesTable.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/tables/datanodesTable.tsx new file mode 100644 index 00000000000..494d898509b --- /dev/null +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/components/tables/datanodesTable.tsx @@ -0,0 +1,314 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import React from 'react'; +import moment from 'moment'; +import { Popover, Tooltip } from 'antd' +import { + CheckCircleFilled, + CloseCircleFilled, + HourglassFilled, + InfoCircleOutlined, + WarningFilled +} from '@ant-design/icons'; +import Table, { + ColumnsType, + TablePaginationConfig +} from 'antd/es/table'; +import { TableRowSelection } from 'antd/es/table/interface'; + +import StorageBar from '@/v2/components/storageBar/storageBar'; +import DecommissionSummary from '@/v2/components/decommissioningSummary/decommissioningSummary'; + +import { ReplicationIcon } from '@/utils/themeIcons'; +import { getTimeDiffFromTimestamp } from '@/v2/utils/momentUtils'; + +import { + Datanode, + DatanodeOpState, + DatanodeOpStateList, + DatanodeState, + DatanodeStateList, + DatanodeTableProps +} from '@/v2/types/datanode.types'; +import { Pipeline } from '@/v2/types/pipelines.types'; + + +let decommissioningUuids: string | string[] = []; + +const headerIconStyles: React.CSSProperties = { + display: 'flex', + alignItems: 'center' +} + +const renderDatanodeState = (state: DatanodeState) => { + const stateIconMap = { + HEALTHY: , + STALE: , + DEAD: + }; + const icon = state in stateIconMap ? stateIconMap[state] : ''; + return {icon} {state}; +}; + +const renderDatanodeOpState = (opState: DatanodeOpState) => { + const opStateIconMap = { + IN_SERVICE: , + DECOMMISSIONING: , + DECOMMISSIONED: , + ENTERING_MAINTENANCE: , + IN_MAINTENANCE: + }; + const icon = opState in opStateIconMap ? opStateIconMap[opState] : ''; + return {icon} {opState}; +}; + +export const COLUMNS: ColumnsType = [ + { + title: 'Hostname', + dataIndex: 'hostname', + key: 'hostname', + sorter: (a: Datanode, b: Datanode) => a.hostname.localeCompare( + b.hostname, undefined, { numeric: true } + ), + defaultSortOrder: 'ascend' as const + }, + { + title: 'State', + dataIndex: 'state', + key: 'state', + filterMultiple: true, + filters: DatanodeStateList.map(state => ({ text: state, value: state })), + onFilter: (value, record: Datanode) => record.state === value, + render: (text: DatanodeState) => renderDatanodeState(text), + sorter: (a: Datanode, b: Datanode) => a.state.localeCompare(b.state) + }, + { + title: 'Operational State', + dataIndex: 'opState', + key: 'opState', + filterMultiple: true, + filters: DatanodeOpStateList.map(state => ({ text: state, value: state })), + onFilter: (value, record: Datanode) => record.opState === value, + render: (text: DatanodeOpState) => renderDatanodeOpState(text), + sorter: (a: Datanode, b: Datanode) => a.opState.localeCompare(b.opState) + }, + { + title: 'UUID', + dataIndex: 'uuid', + key: 'uuid', + sorter: (a: Datanode, b: Datanode) => a.uuid.localeCompare(b.uuid), + defaultSortOrder: 'ascend' as const, + render: (uuid: string, record: Datanode) => { + return ( + //1. Compare Decommission Api's UUID with all UUID in table and show Decommission Summary + (decommissioningUuids && decommissioningUuids.includes(record.uuid) && record.opState !== 'DECOMMISSIONED') ? + : {uuid} + ); + } + }, + { + title: 'Storage Capacity', + dataIndex: 'storageUsed', + key: 'storageUsed', + sorter: (a: Datanode, b: Datanode) => a.storageRemaining - b.storageRemaining, + render: (_: string, record: Datanode) => ( + + ) + }, + { + title: 'Last Heartbeat', + dataIndex: 'lastHeartbeat', + key: 'lastHeartbeat', + sorter: (a: Datanode, b: Datanode) => moment(a.lastHeartbeat).unix() - moment(b.lastHeartbeat).unix(), + render: (heartbeat: number) => { + return heartbeat > 0 ? getTimeDiffFromTimestamp(heartbeat) : 'NA'; + } + }, + { + title: 'Pipeline ID(s)', + dataIndex: 'pipelines', + key: 'pipelines', + render: (pipelines: Pipeline[], record: Datanode) => { + const renderPipelineIds = (pipelineIds: Pipeline[]) => { + return pipelineIds?.map((pipeline: any, index: any) => ( +
+ + {pipeline.pipelineID} +
+ )) + } + + return ( + + {pipelines.length} pipelines + + ); + } + }, + { + title: () => ( + + Leader Count + + + + + ), + dataIndex: 'leaderCount', + key: 'leaderCount', + sorter: (a: Datanode, b: Datanode) => a.leaderCount - b.leaderCount + }, + { + title: 'Containers', + dataIndex: 'containers', + key: 'containers', + sorter: (a: Datanode, b: Datanode) => a.containers - b.containers + }, + { + title: () => ( + + Open Container + + + + + ), + dataIndex: 'openContainers', + key: 'openContainers', + sorter: (a: Datanode, b: Datanode) => a.openContainers - b.openContainers + }, + { + title: 'Version', + dataIndex: 'version', + key: 'version', + sorter: (a: Datanode, b: Datanode) => a.version.localeCompare(b.version), + defaultSortOrder: 'ascend' as const + }, + { + title: 'Setup Time', + dataIndex: 'setupTime', + key: 'setupTime', + sorter: (a: Datanode, b: Datanode) => a.setupTime - b.setupTime, + render: (uptime: number) => { + return uptime > 0 ? moment(uptime).format('ll LTS') : 'NA'; + } + }, + { + title: 'Revision', + dataIndex: 'revision', + key: 'revision', + sorter: (a: Datanode, b: Datanode) => a.revision.localeCompare(b.revision), + defaultSortOrder: 'ascend' as const + }, + { + title: 'Build Date', + dataIndex: 'buildDate', + key: 'buildDate', + sorter: (a: Datanode, b: Datanode) => a.buildDate.localeCompare(b.buildDate), + defaultSortOrder: 'ascend' as const + }, + { + title: 'Network Location', + dataIndex: 'networkLocation', + key: 'networkLocation', + sorter: (a: Datanode, b: Datanode) => a.networkLocation.localeCompare(b.networkLocation), + defaultSortOrder: 'ascend' as const + } +]; + +const DatanodesTable: React.FC = ({ + data, + handleSelectionChange, + decommissionUuids, + selectedColumns, + loading = false, + selectedRows = [], + searchColumn = 'hostname', + searchTerm = '' +}) => { + + function filterSelectedColumns() { + const columnKeys = selectedColumns.map((column) => column.value); + return COLUMNS.filter( + (column) => columnKeys.indexOf(column.key as string) >= 0 + ); + } + + function getFilteredData(data: Datanode[]) { + return data?.filter( + (datanode: Datanode) => datanode[searchColumn].includes(searchTerm) + ) ?? []; + } + + function isSelectable(record: Datanode) { + // Disable checkbox for any datanode which is not DEAD to prevent removal + return record.state !== 'DEAD' && true; + } + + const paginationConfig: TablePaginationConfig = { + showTotal: (total: number, range) => ( + `${range[0]}-${range[1]} of ${total} Datanodes` + ), + showSizeChanger: true + }; + + const rowSelection: TableRowSelection = { + selectedRowKeys: selectedRows, + onChange: (rows: React.Key[]) => { handleSelectionChange(rows) }, + getCheckboxProps: (record: Datanode) => ({ + disabled: isSelectable(record) + }), + }; + + React.useEffect(() => { + decommissioningUuids = decommissionUuids; + }, [decommissionUuids]) + + return ( +
+
+ + ); +} + +export default DatanodesTable; diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/buckets/buckets.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/buckets/buckets.tsx index 12af3bb4281..1e2de307b17 100644 --- a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/buckets/buckets.tsx +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/buckets/buckets.tsx @@ -16,7 +16,7 @@ * limitations under the License. */ -import React, { useEffect, useState } from 'react'; +import React, { useEffect, useRef, useState } from 'react'; import moment from 'moment'; import { ValueType } from 'react-select'; import { useLocation } from 'react-router-dom'; @@ -28,7 +28,7 @@ import MultiSelect from '@/v2/components/select/multiSelect'; import SingleSelect, { Option } from '@/v2/components/select/singleSelect'; import { AutoReloadHelper } from '@/utils/autoReloadHelper'; -import { AxiosGetHelper } from "@/utils/axiosRequestHelper"; +import { AxiosGetHelper, cancelRequests } from "@/utils/axiosRequestHelper"; import { showDataFetchError } from '@/utils/common'; import { useDebounce } from '@/v2/hooks/debounce.hook'; @@ -111,7 +111,7 @@ function getFilteredBuckets( const Buckets: React.FC<{}> = () => { - let cancelSignal: AbortController; + const cancelSignal = useRef(); const [state, setState] = useState({ totalCount: 0, @@ -170,11 +170,11 @@ const Buckets: React.FC<{}> = () => { setLoading(true); const { request, controller } = AxiosGetHelper( '/api/v1/buckets', - cancelSignal, + cancelSignal.current, '', { limit: selectedLimit.value } ); - cancelSignal = controller; + cancelSignal.current = controller; request.then(response => { const bucketsResponse: BucketResponse = response.data; const totalCount = bucketsResponse.totalCount; @@ -230,7 +230,7 @@ const Buckets: React.FC<{}> = () => { }); } - let autoReloadHelper: AutoReloadHelper = new AutoReloadHelper(loadData); + const autoReloadHelper: AutoReloadHelper = new AutoReloadHelper(loadData); useEffect(() => { autoReloadHelper.startPolling(); @@ -245,7 +245,7 @@ const Buckets: React.FC<{}> = () => { return (() => { autoReloadHelper.stopPolling(); - cancelSignal && cancelSignal.abort(); + cancelRequests([cancelSignal.current!]); }) }, []); diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/datanodes/datanodes.less b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/datanodes/datanodes.less new file mode 100644 index 00000000000..a1eee385210 --- /dev/null +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/datanodes/datanodes.less @@ -0,0 +1,52 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +.content-div { + min-height: unset; + + .table-header-section { + display: flex; + justify-content: space-between; + align-items: center; + + .table-filter-section { + font-size: 14px; + font-weight: normal; + display: flex; + column-gap: 8px; + padding: 16px 8px; + align-items: center; + } + } + + .tag-block { + display: flex; + column-gap: 8px; + padding: 0px 8px 16px 8px; + } +} + +.pipeline-container-v2 { + padding: 6px 0px; +} + +.decommission-summary-result { + .ant-result-title { + font-size: 15px; + } +} \ No newline at end of file diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/datanodes/datanodes.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/datanodes/datanodes.tsx new file mode 100644 index 00000000000..13022dc05e0 --- /dev/null +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/pages/datanodes/datanodes.tsx @@ -0,0 +1,309 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import React, { + useEffect, + useRef, + useState +} from 'react'; +import moment from 'moment'; +import { AxiosError } from 'axios'; +import { + Button, + Modal +} from 'antd'; +import { + DeleteOutlined, + WarningFilled, +} from '@ant-design/icons'; +import { ValueType } from 'react-select'; + +import Search from '@/v2/components/search/search'; +import MultiSelect, { Option } from '@/v2/components/select/multiSelect'; +import DatanodesTable, { COLUMNS } from '@/v2/components/tables/datanodesTable'; +import AutoReloadPanel from '@/components/autoReloadPanel/autoReloadPanel'; +import { showDataFetchError } from '@/utils/common'; +import { AutoReloadHelper } from '@/utils/autoReloadHelper'; +import { + AxiosGetHelper, + AxiosPutHelper, + cancelRequests +} from '@/utils/axiosRequestHelper'; + +import { useDebounce } from '@/v2/hooks/debounce.hook'; +import { + Datanode, + DatanodeDecomissionInfo, + DatanodeResponse, + DatanodesResponse, + DatanodesState +} from '@/v2/types/datanode.types'; + +import './datanodes.less' + + +const defaultColumns = COLUMNS.map(column => ({ + label: (typeof column.title === 'string') + ? column.title + : (column.title as Function)().props.children[0], + value: column.key as string +})); + +const SearchableColumnOpts = [{ + label: 'Hostname', + value: 'hostname' +}, { + label: 'UUID', + value: 'uuid' +}, { + label: 'Version', + value: 'version' +}]; + +let decommissionUuids: string | string[] = []; +const COLUMN_UPDATE_DECOMMISSIONING = 'DECOMMISSIONING'; + +const Datanodes: React.FC<{}> = () => { + + const cancelSignal = useRef(); + const cancelDecommissionSignal = useRef(); + + const [state, setState] = useState({ + lastUpdated: 0, + columnOptions: defaultColumns, + dataSource: [] + }); + const [loading, setLoading] = useState(false); + const [selectedColumns, setSelectedColumns] = useState(defaultColumns); + const [selectedRows, setSelectedRows] = useState([]); + const [searchTerm, setSearchTerm] = useState(''); + const [searchColumn, setSearchColumn] = useState<'hostname' | 'uuid' | 'version' | 'revision'>('hostname'); + const [modalOpen, setModalOpen] = useState(false); + + const debouncedSearch = useDebounce(searchTerm, 300); + + function handleColumnChange(selected: ValueType) { + setSelectedColumns(selected as Option[]); + } + + async function loadDecommisionAPI() { + decommissionUuids = []; + const { request, controller } = await AxiosGetHelper( + '/api/v1/datanodes/decommission/info', + cancelDecommissionSignal.current + ); + cancelDecommissionSignal.current = controller; + return request + }; + + async function loadDataNodeAPI() { + const { request, controller } = await AxiosGetHelper( + '/api/v1/datanodes', + cancelSignal.current + ); + cancelSignal.current = controller; + return request; + }; + + async function removeDatanode(selectedRowKeys: string[]) { + setLoading(true); + const { request, controller } = await AxiosPutHelper( + '/api/v1/datanodes/remove', + selectedRowKeys, + cancelSignal.current + ); + cancelSignal.current = controller; + request.then(() => { + loadData(); + }).catch((error) => { + showDataFetchError(error.toString()); + }).finally(() => { + setLoading(false); + setSelectedRows([]); + }); + } + + const loadData = async () => { + setLoading(true); + // Need to call decommission API on each interval to get updated status + // before datanode API call to compare UUID's + // update 'Operation State' column in table manually before rendering + try { + let decomissionResponse = await loadDecommisionAPI(); + decommissionUuids = decomissionResponse.data?.DatanodesDecommissionInfo?.map( + (item: DatanodeDecomissionInfo) => item.datanodeDetails.uuid + ); + } catch (error) { + decommissionUuids = []; + showDataFetchError((error as AxiosError).toString()); + } + + try { + const datanodesAPIResponse = await loadDataNodeAPI(); + const datanodesResponse: DatanodesResponse = datanodesAPIResponse.data; + const datanodes: DatanodeResponse[] = datanodesResponse.datanodes; + const dataSource: Datanode[] = datanodes?.map( + (datanode) => ({ + hostname: datanode.hostname, + uuid: datanode.uuid, + state: datanode.state, + opState: (decommissionUuids?.includes(datanode.uuid) && datanode.opState !== 'DECOMMISSIONED') + ? COLUMN_UPDATE_DECOMMISSIONING + : datanode.opState, + lastHeartbeat: datanode.lastHeartbeat, + storageUsed: datanode.storageReport.used, + storageTotal: datanode.storageReport.capacity, + storageCommitted: datanode.storageReport.committed, + storageRemaining: datanode.storageReport.remaining, + pipelines: datanode.pipelines, + containers: datanode.containers, + openContainers: datanode.openContainers, + leaderCount: datanode.leaderCount, + version: datanode.version, + setupTime: datanode.setupTime, + revision: datanode.revision, + buildDate: datanode.buildDate, + networkLocation: datanode.networkLocation + }) + ); + setLoading(false); + setState({ + ...state, + dataSource: dataSource, + lastUpdated: Number(moment()) + }); + } catch (error) { + setLoading(false); + showDataFetchError((error as AxiosError).toString()) + } + } + + const autoReloadHelper: AutoReloadHelper = new AutoReloadHelper(loadData); + + useEffect(() => { + autoReloadHelper.startPolling(); + loadData(); + + return (() => { + autoReloadHelper.stopPolling(); + cancelRequests([ + cancelSignal.current!, + cancelDecommissionSignal.current! + ]); + }); + }, []); + + function handleSelectionChange(rows: React.Key[]) { + setSelectedRows(rows); + } + + function handleModalOk() { + setModalOpen(false); + removeDatanode(selectedRows as string[]) + }; + + function handleModalCancel() { + setModalOpen(false); + setSelectedRows([]); + }; + + const { dataSource, lastUpdated, columnOptions } = state; + + return ( + <> +
+ Datanodes + +
+
+
+
+
+ { }} + fixedColumn='hostname' + columnLength={columnOptions.length} /> + {selectedRows.length > 0 && + + } +
+ ) => setSearchTerm(e.target.value) + } + onChange={(value) => { + setSearchTerm(''); + setSearchColumn(value as 'hostname' | 'uuid' | 'version' | 'revision') + }} /> +
+ +
+
+ +
+ + Stop Tracking Datanode +
+ Are you sure, you want recon to stop tracking the selected {selectedRows.length} datanode(s)? +
+ + ); +} + +export default Datanodes; \ No newline at end of file diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/routes-v2.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/routes-v2.tsx index 37ec3964bbb..0aea9e80402 100644 --- a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/routes-v2.tsx +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/routes-v2.tsx @@ -20,6 +20,7 @@ import { lazy } from 'react'; const Overview = lazy(() => import('@/v2/pages/overview/overview')); const Volumes = lazy(() => import('@/v2/pages/volumes/volumes')) const Buckets = lazy(() => import('@/v2/pages/buckets/buckets')); +const Datanodes = lazy(() => import('@/v2/pages/datanodes/datanodes')); const Pipelines = lazy(() => import('@/v2/pages/pipelines/pipelines')); export const routesV2 = [ @@ -35,6 +36,10 @@ export const routesV2 = [ path: '/Buckets', component: Buckets }, + { + path: '/Datanodes', + component: Datanodes + }, { path: '/Pipelines', component: Pipelines diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/types/datanode.types.ts b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/types/datanode.types.ts new file mode 100644 index 00000000000..96a37020153 --- /dev/null +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/v2/types/datanode.types.ts @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { Pipeline } from "@/v2/types/pipelines.types"; +import { StorageReport } from "@/v2/types/overview.types"; +import { Option as MultiOption } from "@/v2/components/select/multiSelect"; + +// Corresponds to HddsProtos.NodeState +export const DatanodeStateList = ['HEALTHY', 'STALE', 'DEAD'] as const; +type DatanodeStateType = typeof DatanodeStateList; +export type DatanodeState = DatanodeStateType[number]; + +// Corresponds to HddsProtos.NodeOperationalState +export const DatanodeOpStateList = [ + 'IN_SERVICE', + 'DECOMMISSIONING', + 'DECOMMISSIONED', + 'ENTERING_MAINTENANCE', + 'IN_MAINTENANCE' +] as const; +export type DatanodeOpState = typeof DatanodeOpStateList[number]; + +export type DatanodeResponse = { + hostname: string; + state: DatanodeState; + opState: DatanodeOpState; + lastHeartbeat: string; + storageReport: StorageReport; + pipelines: Pipeline[]; + containers: number; + openContainers: number; + leaderCount: number; + uuid: string; + version: string; + setupTime: number; + revision: string; + buildDate: string; + networkLocation: string; +} + +export type DatanodesResponse = { + totalCount: number; + datanodes: DatanodeResponse[]; +} + +export type Datanode = { + hostname: string; + state: DatanodeState; + opState: DatanodeOpState; + lastHeartbeat: string; + storageUsed: number; + storageTotal: number; + storageRemaining: number; + storageCommitted: number; + pipelines: Pipeline[]; + containers: number; + openContainers: number; + leaderCount: number; + uuid: string; + version: string; + setupTime: number; + revision: string; + buildDate: string; + networkLocation: string; +} + +export type DatanodeDetails = { + uuid: string; +} + +export type DatanodeDecomissionInfo = { + datanodeDetails: DatanodeDetails +} + +export type DatanodesState = { + dataSource: Datanode[]; + lastUpdated: number; + columnOptions: MultiOption[]; +} + +// Datanode Summary endpoint types +type summaryByteString = { + string: string; + bytes: { + validUtf8: boolean; + empty: boolean; + } +} + +type SummaryPort = { + name: string; + value: number; +} + +type SummaryDatanodeDetails = { + level: number; + parent: unknown | null; + cost: number; + uuid: string; + uuidString: string; + ipAddress: string; + hostName: string; + ports: SummaryPort; + certSerialId: null, + version: string | null; + setupTime: number; + revision: string | null; + buildDate: string; + persistedOpState: string; + persistedOpStateExpiryEpochSec: number; + initialVersion: number; + currentVersion: number; + decommissioned: boolean; + maintenance: boolean; + ipAddressAsByteString: summaryByteString; + hostNameAsByteString: summaryByteString; + networkName: string; + networkLocation: string; + networkFullPath: string; + numOfLeaves: number; + networkNameAsByteString: summaryByteString; + networkLocationAsByteString: summaryByteString +} + +type SummaryMetrics = { + decommissionStartTime: string; + numOfUnclosedPipelines: number; + numOfUnderReplicatedContainers: number; + numOfUnclosedContainers: number; +} + +type SummaryContainers = { + UnderReplicated: string[]; + UnClosed: string[]; +} + +export type SummaryData = { + datanodeDetails: SummaryDatanodeDetails; + metrics: SummaryMetrics; + containers: SummaryContainers; +} + +export type DatanodeTableProps = { + loading: boolean; + selectedRows: React.Key[]; + data: Datanode[]; + decommissionUuids: string | string[]; + searchColumn: 'hostname' | 'uuid' | 'version' | 'revision'; + searchTerm: string; + selectedColumns: MultiOption[]; + handleSelectionChange: (arg0: React.Key[]) => void; +} From d3899d2cc3912fe47314a6af0c7bceee89114789 Mon Sep 17 00:00:00 2001 From: Chung En Lee Date: Fri, 20 Sep 2024 00:13:13 +0800 Subject: [PATCH 26/43] Clean up files created after TestKeyValueHandlerWithUnhealthyContainer#testMarkContainerUnhealthyInFailedVolume (#7219) --- .../keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java index f0c8a2077ea..1db2d7ff53e 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java @@ -35,12 +35,14 @@ import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.File; import java.io.IOException; import java.util.Arrays; import java.util.UUID; @@ -72,6 +74,9 @@ public class TestKeyValueHandlerWithUnhealthyContainer { public static final Logger LOG = LoggerFactory.getLogger( TestKeyValueHandlerWithUnhealthyContainer.class); + @TempDir + private File tempDir; + private IncrementalReportSender mockIcrSender; @BeforeEach @@ -220,6 +225,7 @@ public void testMarkContainerUnhealthyInFailedVolume() throws IOException { KeyValueContainerData mockContainerData = mock(KeyValueContainerData.class); HddsVolume mockVolume = mock(HddsVolume.class); when(mockContainerData.getVolume()).thenReturn(mockVolume); + when(mockContainerData.getMetadataPath()).thenReturn(tempDir.getAbsolutePath()); KeyValueContainer container = new KeyValueContainer( mockContainerData, new OzoneConfiguration()); From 2b196d19790f0488b0803346e702dbb055c86e0c Mon Sep 17 00:00:00 2001 From: Abhishek Pal <43001336+devabhishekpal@users.noreply.github.com> Date: Fri, 20 Sep 2024 10:50:32 +0530 Subject: [PATCH 27/43] HDDS-11468. Enabled DB sync button (#7216) --- .../src/components/autoReloadPanel/autoReloadPanel.tsx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/components/autoReloadPanel/autoReloadPanel.tsx b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/components/autoReloadPanel/autoReloadPanel.tsx index 0230d4dd61d..6b2bab246b7 100644 --- a/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/components/autoReloadPanel/autoReloadPanel.tsx +++ b/hadoop-ozone/recon/src/main/resources/webapps/recon/ozone-recon-web/src/components/autoReloadPanel/autoReloadPanel.tsx @@ -76,10 +76,11 @@ class AutoReloadPanel extends React.Component { ); const lastUpdatedDeltaFullText = lastUpdatedOMDBDelta === 0 || lastUpdatedOMDBDelta === undefined || lastUpdatedOMDBFull === 0 || lastUpdatedOMDBFull === undefined ? '' : + //omSyncLoad should be clickable at all times. If the response from the dbsync is false it will show DB update is already running else show triggered sync ( <>   | DB Synced at {lastUpdatedDeltaFullToolTip} -