diff --git a/.scripts/upload_new_boost_version.sh b/.scripts/upload_new_boost_version.sh new file mode 100755 index 000000000..0b333be7a --- /dev/null +++ b/.scripts/upload_new_boost_version.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# This script mirrors the boostorg/boost source bundle for the given version to Nexus. +# The boost source bundle is architecture independent. +# It contains its own build system (b2) which is also built from source before building boost itself, so we don't need to worry about architecture specific builds. +# This artifact is used by the hadoop/boost local image. + + +set -euo pipefail + +VERSION=${1:?"Missing version number argument (arg 1)"} +NEXUS_USER=${2:?"Missing Nexus username argument (arg 2)"} + +read -r -s -p "Nexus Password: " NEXUS_PASSWORD +echo "" + +# https://stackoverflow.com/questions/4632028/how-to-create-a-temporary-directory +# Find the directory name of the script +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +# the temp directory used, within $DIR +WORK_DIR=$(mktemp -d -p "$DIR") + +# check if tmp dir was created +if [[ ! "$WORK_DIR" || ! -d "$WORK_DIR" ]]; then + echo "Could not create temp dir" + exit 1 +fi + +# deletes the temp directory +function cleanup { + rm -rf "$WORK_DIR" +} + +# register the cleanup function to be called on the EXIT signal +trap cleanup EXIT + +cd "$WORK_DIR" || exit + +# boost does not currently publish signatures or SBOMs +BOOST_UNDERSCORE="$(echo "${VERSION}" | tr '.' '_')" +BOOST_TARBALL="boost_${BOOST_UNDERSCORE}.tar.bz2" +DOWNLOAD_URL="https://archives.boost.io/release/$VERSION/source/$BOOST_TARBALL" + +echo "Downloading boost" +if ! curl --fail -Ls -O "$DOWNLOAD_URL"; then + echo "Failed to download from $DOWNLOAD_URL" + exit 1 +fi + +FILE_NAME=$(basename "$DOWNLOAD_URL") + +echo "Uploading boost to Nexus" +if ! curl --fail -o /dev/null --progress-bar -u "$NEXUS_USER:$NEXUS_PASSWORD" --upload-file "$FILE_NAME" 'https://repo.stackable.tech/repository/packages/boost/'; then + echo "Failed to upload boost to Nexus" + exit 1 +fi + +echo "Successfully uploaded new version of boost ($VERSION) to Nexus" +echo "https://repo.stackable.tech/service/rest/repository/browse/packages/boost/" diff --git a/druid/boil-config.toml b/druid/boil-config.toml index 8910b6818..ba6da3a1d 100644 --- a/druid/boil-config.toml +++ b/druid/boil-config.toml @@ -15,7 +15,7 @@ authorizer-version = "0.7.0" # https://druid.apache.org/docs/34.0.0/operations/java/ java-base = "17" java-devel = "17" -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" # Deprecated since 26.3 [versions."34.0.0".build-arguments] @@ -26,7 +26,7 @@ authorizer-version = "0.7.0" # https://druid.apache.org/docs/35.0.1/operations/java/ java-base = "21" java-devel = "21" -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" [versions."35.0.1".build-arguments] authorizer-version = "0.7.0" diff --git a/hadoop/boil-config.toml b/hadoop/boil-config.toml index 4c6ef2a64..e082c3fe2 100644 --- a/hadoop/boil-config.toml +++ b/hadoop/boil-config.toml @@ -20,4 +20,24 @@ java-devel = "11" [versions."3.4.2".build-arguments] async-profiler-version = "2.9" jmx-exporter-version = "1.3.0" -hdfs-utils-version = "0.5.0" +hdfs-utils-version = "0.6.0" + +[versions."3.4.3".local-images] +"hadoop/hadoop" = "3.4.3" +java-base = "11" +java-devel = "11" + +[versions."3.4.3".build-arguments] +async-profiler-version = "2.9" +jmx-exporter-version = "1.3.0" +hdfs-utils-version = "0.6.0" + +[versions."3.5.0".local-images] +"hadoop/hadoop" = "3.5.0" +java-base = "17" +java-devel = "17" + +[versions."3.5.0".build-arguments] +async-profiler-version = "2.9" +jmx-exporter-version = "1.3.0" +hdfs-utils-version = "0.6.0" diff --git a/hadoop/hadoop/Dockerfile b/hadoop/hadoop/Dockerfile index a17feec73..fe6429eee 100644 --- a/hadoop/hadoop/Dockerfile +++ b/hadoop/hadoop/Dockerfile @@ -1,11 +1,15 @@ # syntax=docker/dockerfile:1.16.0@sha256:e2dd261f92e4b763d789984f6eab84be66ab4f5f08052316d8eb8f173593acf7 # check=error=true +FROM local-image/shared/boost AS boost-builder FROM local-image/java-devel AS hadoop-builder ARG PRODUCT_VERSION ARG RELEASE_VERSION ARG PROTOBUF_VERSION +ARG AWS_JAVA_SDK_BUNDLE_VERSION +ARG AZURE_STORAGE_VERSION +ARG AZURE_KEYVAULT_CORE_VERSION ARG STACKABLE_USER_UID WORKDIR /stackable @@ -14,17 +18,22 @@ COPY --chown=${STACKABLE_USER_UID}:0 shared/protobuf/stackable/patches/patchable COPY --chown=${STACKABLE_USER_UID}:0 shared/protobuf/stackable/patches/${PROTOBUF_VERSION} /stackable/src/shared/protobuf/stackable/patches/${PROTOBUF_VERSION} RUN < 3.21 bundles abseil-cpp and utf8_range as git submodules + git submodule update --init --recursive + cmake -S . -B build -DCMAKE_POSITION_INDEPENDENT_CODE=ON -Dprotobuf_BUILD_TESTS=OFF + cmake --build build --parallel "$(nproc)" + cmake --install build --prefix /opt/protobuf + fi (cd .. && rm -r ${PROTOBUF_VERSION}) EOF @@ -126,3 +143,31 @@ rm -rf /stackable/.m2 # Set correct groups; make sure only required artifacts for the final image are located in /stackable chmod -R g=u /stackable EOF + +RUN < +Date: Tue, 26 May 2026 15:40:40 +0200 +Subject: YARN-11527-Update-node.js + +--- + hadoop-project/pom.xml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml +index 0813904f98a..f837b1f5201 100644 +--- a/hadoop-project/pom.xml ++++ b/hadoop-project/pom.xml +@@ -236,7 +236,7 @@ + 5.4.0 + 10.4 + 1.0-1 +- v12.22.1 ++ v14.17.0 + v1.22.5 + 1.10.13 + 1.20 diff --git a/hadoop/hadoop/stackable/patches/3.4.3/0002-Allow-overriding-datanode-registration-addresses.patch b/hadoop/hadoop/stackable/patches/3.4.3/0002-Allow-overriding-datanode-registration-addresses.patch new file mode 100644 index 000000000..508dbe02a --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.4.3/0002-Allow-overriding-datanode-registration-addresses.patch @@ -0,0 +1,259 @@ +From d1266f7390327e1882f2646ffe595a961b56b592 Mon Sep 17 00:00:00 2001 +From: xeniape +Date: Wed, 10 Sep 2025 14:36:20 +0200 +Subject: Allow-overriding-datanode-registration-addresses + +--- + .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 9 +++ + .../blockmanagement/DatanodeManager.java | 43 +++++++----- + .../hadoop/hdfs/server/datanode/DNConf.java | 70 +++++++++++++++++++ + .../hadoop/hdfs/server/datanode/DataNode.java | 35 ++++++++-- + 4 files changed, 135 insertions(+), 22 deletions(-) + +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +index 6375778e4ea..7d49fbcfaa3 100755 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +@@ -152,6 +152,13 @@ public class DFSConfigKeys extends CommonConfigurationKeys { + public static final boolean DFS_DATANODE_DROP_CACHE_BEHIND_READS_DEFAULT = false; + public static final String DFS_DATANODE_USE_DN_HOSTNAME = "dfs.datanode.use.datanode.hostname"; + public static final boolean DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT = false; ++ ++ public static final String DFS_DATANODE_REGISTERED_HOSTNAME = "dfs.datanode.registered.hostname"; ++ public static final String DFS_DATANODE_REGISTERED_DATA_PORT = "dfs.datanode.registered.port"; ++ public static final String DFS_DATANODE_REGISTERED_HTTP_PORT = "dfs.datanode.registered.http.port"; ++ public static final String DFS_DATANODE_REGISTERED_HTTPS_PORT = "dfs.datanode.registered.https.port"; ++ public static final String DFS_DATANODE_REGISTERED_IPC_PORT = "dfs.datanode.registered.ipc.port"; ++ + public static final String DFS_DATANODE_MAX_LOCKED_MEMORY_KEY = "dfs.datanode.max.locked.memory"; + public static final long DFS_DATANODE_MAX_LOCKED_MEMORY_DEFAULT = 0; + public static final String DFS_DATANODE_FSDATASETCACHE_MAX_THREADS_PER_VOLUME_KEY = "dfs.datanode.fsdatasetcache.max.threads.per.volume"; +@@ -491,6 +498,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys { + public static final long DFS_DATANODE_PROCESS_COMMANDS_THRESHOLD_DEFAULT = + TimeUnit.SECONDS.toMillis(2); + ++ public static final String DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY = "dfs.namenode.datanode.registration.unsafe.allow-address-override"; ++ public static final boolean DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_DEFAULT = false; + public static final String DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_KEY = "dfs.namenode.datanode.registration.ip-hostname-check"; + public static final boolean DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_DEFAULT = true; + +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +index ebd2fa992e9..c56f254478b 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +@@ -181,6 +181,8 @@ public class DatanodeManager { + private boolean hasClusterEverBeenMultiRack = false; + + private final boolean checkIpHostnameInRegistration; ++ private final boolean allowRegistrationAddressOverride; ++ + /** + * Whether we should tell datanodes what to cache in replies to + * heartbeat messages. +@@ -314,6 +316,11 @@ public class DatanodeManager { + // Block invalidate limit also has some dependency on heartbeat interval. + // Check setBlockInvalidateLimit(). + setBlockInvalidateLimit(configuredBlockInvalidateLimit); ++ this.allowRegistrationAddressOverride = conf.getBoolean( ++ DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY, ++ DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_DEFAULT); ++ LOG.info(DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY ++ + "=" + allowRegistrationAddressOverride); + this.checkIpHostnameInRegistration = conf.getBoolean( + DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_KEY, + DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_DEFAULT); +@@ -1158,27 +1165,29 @@ public class DatanodeManager { + */ + public void registerDatanode(DatanodeRegistration nodeReg) + throws DisallowedDatanodeException, UnresolvedTopologyException { +- InetAddress dnAddress = Server.getRemoteIp(); +- if (dnAddress != null) { +- // Mostly called inside an RPC, update ip and peer hostname +- String hostname = dnAddress.getHostName(); +- String ip = dnAddress.getHostAddress(); +- if (checkIpHostnameInRegistration && !isNameResolved(dnAddress)) { +- // Reject registration of unresolved datanode to prevent performance +- // impact of repetitive DNS lookups later. +- final String message = "hostname cannot be resolved (ip=" +- + ip + ", hostname=" + hostname + ")"; +- LOG.warn("Unresolved datanode registration: " + message); +- throw new DisallowedDatanodeException(nodeReg, message); ++ if (!allowRegistrationAddressOverride) { ++ InetAddress dnAddress = Server.getRemoteIp(); ++ if (dnAddress != null) { ++ // Mostly called inside an RPC, update ip and peer hostname ++ String hostname = dnAddress.getHostName(); ++ String ip = dnAddress.getHostAddress(); ++ if (checkIpHostnameInRegistration && !isNameResolved(dnAddress)) { ++ // Reject registration of unresolved datanode to prevent performance ++ // impact of repetitive DNS lookups later. ++ final String message = "hostname cannot be resolved (ip=" ++ + ip + ", hostname=" + hostname + ")"; ++ LOG.warn("Unresolved datanode registration: " + message); ++ throw new DisallowedDatanodeException(nodeReg, message); ++ } ++ // update node registration with the ip and hostname from rpc request ++ nodeReg.setIpAddr(ip); ++ nodeReg.setPeerHostName(hostname); + } +- // update node registration with the ip and hostname from rpc request +- nodeReg.setIpAddr(ip); +- nodeReg.setPeerHostName(hostname); + } +- ++ + try { + nodeReg.setExportedKeys(blockManager.getBlockKeys()); +- ++ + // Checks if the node is not on the hosts list. If it is not, then + // it will be disallowed from registering. + if (!hostConfigManager.isIncluded(nodeReg)) { +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java +index 21b92db3073..5d3437239ce 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java +@@ -101,6 +101,11 @@ public class DNConf { + final boolean syncOnClose; + final boolean encryptDataTransfer; + final boolean connectToDnViaHostname; ++ private final String registeredHostname; ++ private final int registeredDataPort; ++ private final int registeredHttpPort; ++ private final int registeredHttpsPort; ++ private final int registeredIpcPort; + final boolean overwriteDownstreamDerivedQOP; + private final boolean pmemCacheRecoveryEnabled; + +@@ -189,6 +194,11 @@ public class DNConf { + connectToDnViaHostname = getConf().getBoolean( + DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME, + DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT); ++ registeredHostname = getConf().get(DFSConfigKeys.DFS_DATANODE_REGISTERED_HOSTNAME); ++ registeredDataPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_DATA_PORT, -1); ++ registeredHttpPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_HTTP_PORT, -1); ++ registeredHttpsPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_HTTPS_PORT, -1); ++ registeredIpcPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_IPC_PORT, -1); + this.blockReportInterval = getConf().getLong( + DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, + DFS_BLOCKREPORT_INTERVAL_MSEC_DEFAULT); +@@ -363,6 +373,66 @@ public class DNConf { + return connectToDnViaHostname; + } + ++ /** ++ * Returns a hostname to register with the cluster instead of the system ++ * hostname. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected hostname. ++ * ++ * @return null if the system hostname should be used, otherwise a hostname ++ */ ++ public String getRegisteredHostname() { ++ return registeredHostname; ++ } ++ ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * data port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredDataPort() { ++ return registeredDataPort; ++ } ++ ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * HTTP port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredHttpPort() { ++ return registeredHttpPort; ++ } ++ ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * HTTPS port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredHttpsPort() { ++ return registeredHttpsPort; ++ } ++ ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * IPC port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredIpcPort() { ++ return registeredIpcPort; ++ } ++ + /** + * Returns socket timeout + * +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +index 956f5bbe519..22ae127d98d 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +@@ -135,6 +135,7 @@ import java.util.HashSet; + import java.util.Iterator; + import java.util.List; + import java.util.Map; ++import java.util.Optional; + import java.util.Map.Entry; + import java.util.Set; + import java.util.UUID; +@@ -2076,11 +2077,35 @@ public class DataNode extends ReconfigurableBase + NodeType.DATA_NODE); + } + +- DatanodeID dnId = new DatanodeID( +- streamingAddr.getAddress().getHostAddress(), hostName, +- storage.getDatanodeUuid(), getXferPort(), getInfoPort(), +- infoSecurePort, getIpcPort()); +- return new DatanodeRegistration(dnId, storageInfo, ++ String registeredHostname = Optional ++ .ofNullable(dnConf.getRegisteredHostname()) ++ .orElseGet(() -> streamingAddr.getAddress().getHostAddress()); ++ int registeredDataPort = dnConf.getRegisteredDataPort(); ++ if (registeredDataPort == -1) { ++ registeredDataPort = getXferPort(); ++ } ++ int registeredHttpPort = dnConf.getRegisteredHttpPort(); ++ if (registeredHttpPort == -1) { ++ registeredHttpPort = getInfoPort(); ++ } ++ int registeredHttpsPort = dnConf.getRegisteredHttpsPort(); ++ if (registeredHttpsPort == -1) { ++ registeredHttpsPort = getInfoSecurePort(); ++ } ++ int registeredIpcPort = dnConf.getRegisteredIpcPort(); ++ if (registeredIpcPort == -1) { ++ registeredIpcPort = getIpcPort(); ++ } ++ ++ DatanodeID dnId = new DatanodeID(registeredHostname, ++ registeredHostname, ++ storage.getDatanodeUuid(), ++ registeredDataPort, ++ registeredHttpPort, ++ registeredHttpsPort, ++ registeredIpcPort); ++ ++ return new DatanodeRegistration(dnId, storageInfo, + new ExportedBlockKeys(), VersionInfo.getVersion()); + } + diff --git a/hadoop/hadoop/stackable/patches/3.4.3/0003-Async-profiler-also-grab-itimer-events.patch b/hadoop/hadoop/stackable/patches/3.4.3/0003-Async-profiler-also-grab-itimer-events.patch new file mode 100644 index 000000000..3cbccd3cd --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.4.3/0003-Async-profiler-also-grab-itimer-events.patch @@ -0,0 +1,29 @@ +From 8f29f6a32a0685577023398af3e7b2854ad47e96 Mon Sep 17 00:00:00 2001 +From: xeniape +Date: Wed, 10 Sep 2025 14:39:20 +0200 +Subject: Async-profiler-also-grab-itimer-events + +--- + .../src/main/java/org/apache/hadoop/http/ProfileServlet.java | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java +index ce532741512..909892ff903 100644 +--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java ++++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java +@@ -76,6 +76,7 @@ import org.apache.hadoop.util.ProcessUtils; + * Following event types are supported (default is 'cpu') (NOTE: not all OS'es support all events) + * // Perf events: + * // cpu ++ * // itimer + * // page-faults + * // context-switches + * // cycles +@@ -118,6 +119,7 @@ public class ProfileServlet extends HttpServlet { + private enum Event { + + CPU("cpu"), ++ ITIMER("itimer"), + ALLOC("alloc"), + LOCK("lock"), + PAGE_FAULTS("page-faults"), diff --git a/hadoop/hadoop/stackable/patches/3.4.3/0004-HDFS-17378-Fix-missing-operationType-for-some-operat.patch b/hadoop/hadoop/stackable/patches/3.4.3/0004-HDFS-17378-Fix-missing-operationType-for-some-operat.patch new file mode 100644 index 000000000..1e5dd00b1 --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.4.3/0004-HDFS-17378-Fix-missing-operationType-for-some-operat.patch @@ -0,0 +1,199 @@ +From a79df04a30b4477372e850a7628bd47dcb665955 Mon Sep 17 00:00:00 2001 +From: xeniape +Date: Wed, 10 Sep 2025 14:40:41 +0200 +Subject: HDFS-17378-Fix-missing-operationType-for-some-operat + +--- + .../hdfs/server/namenode/FSNamesystem.java | 41 +++++++++++-------- + 1 file changed, 24 insertions(+), 17 deletions(-) + +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +index cfc7f24381f..e9c5fc0da25 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +@@ -2618,15 +2618,16 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + * @throws IOException + */ + BlockStoragePolicy getStoragePolicy(String src) throws IOException { ++ final String operationName = "getStoragePolicy"; + checkOperation(OperationCategory.READ); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + readLock(); + try { + checkOperation(OperationCategory.READ); + return FSDirAttrOp.getStoragePolicy(dir, pc, blockManager, src); + } finally { +- readUnlock("getStoragePolicy"); ++ readUnlock(operationName); + } + } + +@@ -2646,15 +2647,16 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + } + + long getPreferredBlockSize(String src) throws IOException { ++ final String operationName = "getPreferredBlockSize"; + checkOperation(OperationCategory.READ); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + readLock(); + try { + checkOperation(OperationCategory.READ); + return FSDirAttrOp.getPreferredBlockSize(dir, pc, src); + } finally { +- readUnlock("getPreferredBlockSize"); ++ readUnlock(operationName); + } + } + +@@ -2707,7 +2709,6 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + boolean createParent, short replication, long blockSize, + CryptoProtocolVersion[] supportedVersions, String ecPolicyName, + String storagePolicy, boolean logRetryCache) throws IOException { +- + HdfsFileStatus status; + try { + status = startFileInt(src, permissions, holder, clientMachine, flag, +@@ -2727,6 +2728,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + long blockSize, CryptoProtocolVersion[] supportedVersions, + String ecPolicyName, String storagePolicy, boolean logRetryCache) + throws IOException { ++ final String operationName = "create"; + if (NameNode.stateChangeLog.isDebugEnabled()) { + StringBuilder builder = new StringBuilder(); + builder.append("DIR* NameSystem.startFile: src=").append(src) +@@ -2764,7 +2766,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + + checkOperation(OperationCategory.WRITE); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + writeLock(); + try { + checkOperation(OperationCategory.WRITE); +@@ -2827,7 +2829,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + dir.writeUnlock(); + } + } finally { +- writeUnlock("create", getLockReportInfoSupplier(src, null, stat)); ++ writeUnlock(operationName, getLockReportInfoSupplier(src, null, stat)); + // There might be transactions logged while trying to recover the lease. + // They need to be sync'ed even when an exception was thrown. + if (!skipSync) { +@@ -2856,10 +2858,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + */ + boolean recoverLease(String src, String holder, String clientMachine) + throws IOException { ++ final String operationName = "recoverLease"; + boolean skipSync = false; + checkOperation(OperationCategory.WRITE); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + writeLock(); + try { + checkOperation(OperationCategory.WRITE); +@@ -2880,7 +2883,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + skipSync = true; + throw se; + } finally { +- writeUnlock("recoverLease"); ++ writeUnlock(operationName); + // There might be transactions logged while trying to recover the lease. + // They need to be sync'ed even when an exception was thrown. + if (!skipSync) { +@@ -3096,6 +3099,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + final Set excludes, + final int numAdditionalNodes, final String clientName + ) throws IOException { ++ final String operationName = "getAdditionalDatanode"; + //check if the feature is enabled + dtpReplaceDatanodeOnFailure.checkEnabled(); + +@@ -3107,7 +3111,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + final BlockType blockType; + checkOperation(OperationCategory.WRITE); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + readLock(); + try { + // Changing this operation category to WRITE instead of making getAdditionalDatanode as a +@@ -3133,7 +3137,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + "src=%s, fileId=%d, blk=%s, clientName=%s, clientMachine=%s", + src, fileId, blk, clientName, clientMachine)); + } finally { +- readUnlock("getAdditionalDatanode"); ++ readUnlock(operationName); + } + + if (clientnode == null) { +@@ -3155,10 +3159,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + */ + void abandonBlock(ExtendedBlock b, long fileId, String src, String holder) + throws IOException { ++ final String operationName = "abandonBlock"; + NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: {} of file {}", b, src); + checkOperation(OperationCategory.WRITE); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + writeLock(); + try { + checkOperation(OperationCategory.WRITE); +@@ -3167,7 +3172,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + NameNode.stateChangeLog.debug( + "BLOCK* NameSystem.abandonBlock: {} is removed from pendingCreates", b); + } finally { +- writeUnlock("abandonBlock"); ++ writeUnlock(operationName); + } + getEditLog().logSync(); + } +@@ -3221,10 +3226,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + boolean completeFile(final String src, String holder, + ExtendedBlock last, long fileId) + throws IOException { ++ final String operationName = "completeFile"; + boolean success = false; + checkOperation(OperationCategory.WRITE); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + writeLock(); + try { + checkOperation(OperationCategory.WRITE); +@@ -3232,7 +3238,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + success = FSDirWriteFileOp.completeFile(this, pc, src, holder, last, + fileId); + } finally { +- writeUnlock("completeFile"); ++ writeUnlock(operationName); + } + getEditLog().logSync(); + if (success) { +@@ -3666,10 +3672,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + */ + void fsync(String src, long fileId, String clientName, long lastBlockLength) + throws IOException { ++ final String operationName = "fsync"; + NameNode.stateChangeLog.info("BLOCK* fsync: " + src + " for " + clientName); + checkOperation(OperationCategory.WRITE); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + writeLock(); + try { + checkOperation(OperationCategory.WRITE); +@@ -3683,7 +3690,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + } + FSDirWriteFileOp.persistBlocks(dir, src, pendingFile, false); + } finally { +- writeUnlock("fsync"); ++ writeUnlock(operationName); + } + getEditLog().logSync(); + } diff --git a/hadoop/hadoop/stackable/patches/3.4.3/0005-Update-CycloneDX-plugin.patch b/hadoop/hadoop/stackable/patches/3.4.3/0005-Update-CycloneDX-plugin.patch new file mode 100644 index 000000000..b479c49c2 --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.4.3/0005-Update-CycloneDX-plugin.patch @@ -0,0 +1,44 @@ +From a1b7a73b7ebe8ad84ab3f417d2b475dd2b51c192 Mon Sep 17 00:00:00 2001 +From: xeniape +Date: Wed, 10 Sep 2025 14:41:18 +0200 +Subject: Update-CycloneDX-plugin + +--- + pom.xml | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/pom.xml b/pom.xml +index d34fb58e22c..d30fd28be1f 100644 +--- a/pom.xml ++++ b/pom.xml +@@ -121,7 +121,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x + 4.2.0 + 1.1.1 + 3.10.1 +- 2.7.10 ++ 2.8.0 + + bash + +@@ -773,6 +773,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x + org.cyclonedx + cyclonedx-maven-plugin + ${cyclonedx.version} ++ ++ application ++ 1.5 ++ false ++ + + + package +@@ -781,9 +786,6 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x + + + +- +- xml +- + + + diff --git a/hadoop/hadoop/stackable/patches/3.4.3/0006-HADOOP-19352.-Hadoop-OSS-Connector-adds-support-for.patch b/hadoop/hadoop/stackable/patches/3.4.3/0006-HADOOP-19352.-Hadoop-OSS-Connector-adds-support-for.patch new file mode 100644 index 000000000..1cd05abc0 --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.4.3/0006-HADOOP-19352.-Hadoop-OSS-Connector-adds-support-for.patch @@ -0,0 +1,245 @@ +From 701aa813622cc8f78ac5c49ba9c4a9d638596657 Mon Sep 17 00:00:00 2001 +From: xeniape +Date: Wed, 10 Sep 2025 14:41:46 +0200 +Subject: HADOOP-19352.-Hadoop-OSS-Connector-adds-support-for- + +--- + hadoop-project/pom.xml | 2 +- + hadoop-tools/hadoop-aliyun/pom.xml | 26 +++++ + .../aliyun/oss/AliyunOSSFileSystemStore.java | 16 +++ + .../hadoop/fs/aliyun/oss/Constants.java | 15 +++ + .../fs/aliyun/oss/ITAliyunOSSSignatureV4.java | 98 +++++++++++++++++++ + .../src/test/resources/log4j.properties | 3 + + 6 files changed, 159 insertions(+), 1 deletion(-) + create mode 100644 hadoop-tools/hadoop-aliyun/src/test/java/org/apache/hadoop/fs/aliyun/oss/ITAliyunOSSSignatureV4.java + +diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml +index f837b1f5201..f8c7f2388f3 100644 +--- a/hadoop-project/pom.xml ++++ b/hadoop-project/pom.xml +@@ -1692,7 +1692,7 @@ + + com.aliyun.oss + aliyun-sdk-oss +- 3.13.2 ++ 3.18.1 + + + org.apache.httpcomponents +diff --git a/hadoop-tools/hadoop-aliyun/pom.xml b/hadoop-tools/hadoop-aliyun/pom.xml +index 50d6d4576c5..42fa66976af 100644 +--- a/hadoop-tools/hadoop-aliyun/pom.xml ++++ b/hadoop-tools/hadoop-aliyun/pom.xml +@@ -165,5 +165,31 @@ + test + jar + ++ ++ ++ org.junit.jupiter ++ junit-jupiter-api ++ test ++ ++ ++ org.junit.jupiter ++ junit-jupiter-engine ++ test ++ ++ ++ org.junit.jupiter ++ junit-jupiter-params ++ test ++ ++ ++ org.junit.platform ++ junit-platform-launcher ++ test ++ ++ ++ org.junit.vintage ++ junit-vintage-engine ++ test ++ + + +diff --git a/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/AliyunOSSFileSystemStore.java b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/AliyunOSSFileSystemStore.java +index ccd5d1ea25c..30da259fd51 100644 +--- a/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/AliyunOSSFileSystemStore.java ++++ b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/AliyunOSSFileSystemStore.java +@@ -73,6 +73,7 @@ import java.util.List; + import java.util.ListIterator; + import java.util.NoSuchElementException; + import java.util.stream.Collectors; ++import com.aliyun.oss.common.comm.SignVersion; + + import static org.apache.hadoop.fs.aliyun.oss.Constants.*; + +@@ -113,6 +114,16 @@ public class AliyunOSSFileSystemStore { + conf.get(USER_AGENT_PREFIX, USER_AGENT_PREFIX_DEFAULT) + ", Hadoop/" + + VersionInfo.getVersion()); + ++ String region = conf.get(REGION_KEY, ""); ++ String signatureVersion = conf.get(SIGNATURE_VERSION_KEY, SIGNATURE_VERSION_DEFAULT); ++ if ("V4".equalsIgnoreCase(signatureVersion)) { ++ clientConf.setSignatureVersion(SignVersion.V4); ++ if (StringUtils.isEmpty(region)) { ++ LOG.error("Signature version is V4 ,but region is empty."); ++ throw new IOException("SignVersion is V4 but region is empty"); ++ } ++ } ++ + String proxyHost = conf.getTrimmed(PROXY_HOST_KEY, ""); + int proxyPort = conf.getInt(PROXY_PORT_KEY, -1); + if (StringUtils.isNotEmpty(proxyHost)) { +@@ -171,6 +182,11 @@ public class AliyunOSSFileSystemStore { + statistics.incrementWriteOps(1); + } + ++ if (StringUtils.isNotEmpty(region)) { ++ ossClient.setRegion(region); ++ LOG.debug("ossClient setRegion {}", region); ++ } ++ + maxKeys = conf.getInt(MAX_PAGING_KEYS_KEY, MAX_PAGING_KEYS_DEFAULT); + int listVersion = conf.getInt(LIST_VERSION, DEFAULT_LIST_VERSION); + if (listVersion < 1 || listVersion > 2) { +diff --git a/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/Constants.java b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/Constants.java +index baeb9199377..176669ed152 100644 +--- a/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/Constants.java ++++ b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/Constants.java +@@ -211,4 +211,19 @@ public final class Constants { + public static final String LIST_VERSION = "fs.oss.list.version"; + + public static final int DEFAULT_LIST_VERSION = 2; ++ ++ /** ++ * OSS signature version. ++ */ ++ public static final String SIGNATURE_VERSION_KEY = "fs.oss.signatureversion"; ++ ++ /** ++ * OSS signature version DEFAULT {@value}. ++ */ ++ public static final String SIGNATURE_VERSION_DEFAULT = "V1"; ++ ++ /** ++ * OSS region {@value}. ++ */ ++ public static final String REGION_KEY = "fs.oss.region"; + } +diff --git a/hadoop-tools/hadoop-aliyun/src/test/java/org/apache/hadoop/fs/aliyun/oss/ITAliyunOSSSignatureV4.java b/hadoop-tools/hadoop-aliyun/src/test/java/org/apache/hadoop/fs/aliyun/oss/ITAliyunOSSSignatureV4.java +new file mode 100644 +index 00000000000..5070f2a5816 +--- /dev/null ++++ b/hadoop-tools/hadoop-aliyun/src/test/java/org/apache/hadoop/fs/aliyun/oss/ITAliyunOSSSignatureV4.java +@@ -0,0 +1,98 @@ ++/** ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++package org.apache.hadoop.fs.aliyun.oss; ++ ++import org.apache.hadoop.conf.Configuration; ++import org.apache.hadoop.fs.FileStatus; ++import org.apache.hadoop.fs.Path; ++import org.junit.Before; ++import org.junit.Test; ++import org.slf4j.Logger; ++import org.slf4j.LoggerFactory; ++ ++import java.io.IOException; ++import java.net.URI; ++ ++import static org.apache.hadoop.fs.aliyun.oss.Constants.REGION_KEY; ++import static org.apache.hadoop.fs.aliyun.oss.Constants.SIGNATURE_VERSION_KEY; ++import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile; ++import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; ++import static org.junit.Assert.*; ++import static org.junit.Assume.assumeNotNull; ++ ++/** ++ * Tests Aliyun OSS system. ++ */ ++public class ITAliyunOSSSignatureV4 { ++ private static final Logger LOG = LoggerFactory.getLogger(ITAliyunOSSSignatureV4.class); ++ private Configuration conf; ++ private URI testURI; ++ private Path testFile = new Path("ITAliyunOSSSignatureV4/atestr"); ++ ++ @Before ++ public void setUp() throws Exception { ++ conf = new Configuration(); ++ String bucketUri = conf.get("test.fs.oss.name"); ++ LOG.debug("bucketUri={}", bucketUri); ++ testURI = URI.create(bucketUri); ++ } ++ ++ @Test ++ public void testV4() throws IOException { ++ conf.set(SIGNATURE_VERSION_KEY, "V4"); ++ conf.set(REGION_KEY, "cn-hongkong"); ++ AliyunOSSFileSystem fs = new AliyunOSSFileSystem(); ++ fs.initialize(testURI, conf); ++ assumeNotNull(fs); ++ ++ createFile(fs, testFile, true, dataset(256, 0, 255)); ++ FileStatus status = fs.getFileStatus(testFile); ++ fs.delete(testFile); ++ fs.close(); ++ } ++ ++ @Test ++ public void testDefaultSignatureVersion() throws IOException { ++ AliyunOSSFileSystem fs = new AliyunOSSFileSystem(); ++ fs.initialize(testURI, conf); ++ assumeNotNull(fs); ++ ++ Path testFile2 = new Path("/test/atestr"); ++ createFile(fs, testFile2, true, dataset(256, 0, 255)); ++ FileStatus status = fs.getFileStatus(testFile2); ++ fs.delete(testFile2); ++ fs.close(); ++ } ++ ++ @Test ++ public void testV4WithoutRegion() throws IOException { ++ conf.set(SIGNATURE_VERSION_KEY, "V4"); ++ AliyunOSSFileSystem fs = new AliyunOSSFileSystem(); ++ IOException expectedException = null; ++ try { ++ fs.initialize(testURI, conf); ++ } catch (IOException e) { ++ LOG.warn("use V4 , but do not set region, get exception={}", e); ++ expectedException = e; ++ assertEquals("use V4 , but do not set region", e.getMessage(), ++ "SignVersion is V4 but region is empty"); ++ } ++ assertNotNull(expectedException); ++ } ++} +diff --git a/hadoop-tools/hadoop-aliyun/src/test/resources/log4j.properties b/hadoop-tools/hadoop-aliyun/src/test/resources/log4j.properties +index bb5cbe5ec32..2167f68811a 100644 +--- a/hadoop-tools/hadoop-aliyun/src/test/resources/log4j.properties ++++ b/hadoop-tools/hadoop-aliyun/src/test/resources/log4j.properties +@@ -21,3 +21,6 @@ log4j.threshold=ALL + log4j.appender.stdout=org.apache.log4j.ConsoleAppender + log4j.appender.stdout.layout=org.apache.log4j.PatternLayout + log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n ++ ++# Log all oss classes ++log4j.logger.org.apache.hadoop.fs.aliyun.oss=DEBUG +\ No newline at end of file diff --git a/hadoop/hadoop/stackable/patches/3.4.3/0007-YARN-11873-Update-nodejs-to-LTS-version.patch b/hadoop/hadoop/stackable/patches/3.4.3/0007-YARN-11873-Update-nodejs-to-LTS-version.patch new file mode 100644 index 000000000..05965b1e5 --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.4.3/0007-YARN-11873-Update-nodejs-to-LTS-version.patch @@ -0,0 +1,22 @@ +From 8177d73d3dd9260f36c8bd349a0b6602e1a8fc2c Mon Sep 17 00:00:00 2001 +From: Sebastian Bernauer +Date: Tue, 26 May 2026 15:43:29 +0200 +Subject: YARN-11873 Update nodejs to LTS version + +--- + hadoop-project/pom.xml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml +index f8c7f2388f3..4522eea9fe0 100644 +--- a/hadoop-project/pom.xml ++++ b/hadoop-project/pom.xml +@@ -236,7 +236,7 @@ + 5.4.0 + 10.4 + 1.0-1 +- v14.17.0 ++ v22.20.0 + v1.22.5 + 1.10.13 + 1.20 diff --git a/hadoop/hadoop/stackable/patches/3.4.3/0008-HDFS-17891-fix-for-hostname-resolution-bug-with-data.patch b/hadoop/hadoop/stackable/patches/3.4.3/0008-HDFS-17891-fix-for-hostname-resolution-bug-with-data.patch new file mode 100644 index 000000000..7e036ad78 --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.4.3/0008-HDFS-17891-fix-for-hostname-resolution-bug-with-data.patch @@ -0,0 +1,48 @@ +From dff0783ae6c7026515a264bbf0fda137331a2864 Mon Sep 17 00:00:00 2001 +From: Jim Halfpenny +Date: Mon, 16 Mar 2026 09:18:46 +0000 +Subject: HDFS-17891 fix for hostname resolution bug with datanodes + +--- + .../hadoop/hdfs/server/blockmanagement/HostSet.java | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostSet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostSet.java +index d12e5fbae13..3da753f1cb0 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostSet.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostSet.java +@@ -18,10 +18,11 @@ + package org.apache.hadoop.hdfs.server.blockmanagement; + + +-import org.apache.hadoop.util.Preconditions; + import org.apache.hadoop.thirdparty.com.google.common.collect.HashMultimap; + import org.apache.hadoop.thirdparty.com.google.common.collect.Multimap; + import org.apache.hadoop.thirdparty.com.google.common.collect.UnmodifiableIterator; ++import org.slf4j.Logger; ++import org.slf4j.LoggerFactory; + + import java.net.InetAddress; + import java.net.InetSocketAddress; +@@ -38,6 +39,8 @@ import java.util.Map; + * .getPort() || B.getPort() == 0. + */ + public class HostSet implements Iterable { ++ private static final Logger LOG = LoggerFactory.getLogger(HostSet.class); ++ + // Host -> lists of ports + private final Multimap addrs = HashMultimap.create(); + +@@ -72,7 +75,11 @@ public class HostSet implements Iterable { + } + + void add(InetSocketAddress addr) { +- Preconditions.checkArgument(!addr.isUnresolved()); ++ LOG.debug("Adding address to HostSet: {}", addr); ++ if (addr.isUnresolved()) { ++ LOG.warn("Unresolved address not added to HostSet: {}", addr); ++ return; ++ } + addrs.put(addr.getAddress(), addr.getPort()); + } + diff --git a/hadoop/hadoop/stackable/patches/3.4.3/patchable.toml b/hadoop/hadoop/stackable/patches/3.4.3/patchable.toml new file mode 100644 index 000000000..45b693b8a --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.4.3/patchable.toml @@ -0,0 +1,2 @@ +mirror = "https://github.com/stackabletech/hadoop.git" +base = "9d50c6884666e794e45102260a4017bb31802e1b" diff --git a/hadoop/hadoop/stackable/patches/3.5.0/0001-YARN-11527-Update-node.js.patch b/hadoop/hadoop/stackable/patches/3.5.0/0001-YARN-11527-Update-node.js.patch new file mode 100644 index 000000000..14faf2a73 --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.5.0/0001-YARN-11527-Update-node.js.patch @@ -0,0 +1,22 @@ +From 151c1c7e4e47849a940c6c60896269f1c7bc11c8 Mon Sep 17 00:00:00 2001 +From: Sebastian Bernauer +Date: Tue, 26 May 2026 15:40:40 +0200 +Subject: YARN-11527-Update-node.js + +--- + hadoop-project/pom.xml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml +index 4b9c22319cb..23657fb6e80 100644 +--- a/hadoop-project/pom.xml ++++ b/hadoop-project/pom.xml +@@ -236,7 +236,7 @@ + 5.4.0 + 10.4 + 1.0-1 +- v12.22.1 ++ v14.17.0 + v1.22.5 + 1.10.13 + 1.20 diff --git a/hadoop/hadoop/stackable/patches/3.5.0/0002-Allow-overriding-datanode-registration-addresses.patch b/hadoop/hadoop/stackable/patches/3.5.0/0002-Allow-overriding-datanode-registration-addresses.patch new file mode 100644 index 000000000..69ce7c696 --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.5.0/0002-Allow-overriding-datanode-registration-addresses.patch @@ -0,0 +1,259 @@ +From 7e5fff59ac2e2e73a6ede36fb4618c92ddf71302 Mon Sep 17 00:00:00 2001 +From: xeniape +Date: Wed, 10 Sep 2025 14:36:20 +0200 +Subject: Allow-overriding-datanode-registration-addresses + +--- + .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 9 +++ + .../blockmanagement/DatanodeManager.java | 43 +++++++----- + .../hadoop/hdfs/server/datanode/DNConf.java | 70 +++++++++++++++++++ + .../hadoop/hdfs/server/datanode/DataNode.java | 35 ++++++++-- + 4 files changed, 135 insertions(+), 22 deletions(-) + +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +index 96226f45f6a..2dd29176df7 100755 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +@@ -154,6 +154,13 @@ public class DFSConfigKeys extends CommonConfigurationKeys { + public static final boolean DFS_DATANODE_DROP_CACHE_BEHIND_READS_DEFAULT = false; + public static final String DFS_DATANODE_USE_DN_HOSTNAME = "dfs.datanode.use.datanode.hostname"; + public static final boolean DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT = false; ++ ++ public static final String DFS_DATANODE_REGISTERED_HOSTNAME = "dfs.datanode.registered.hostname"; ++ public static final String DFS_DATANODE_REGISTERED_DATA_PORT = "dfs.datanode.registered.port"; ++ public static final String DFS_DATANODE_REGISTERED_HTTP_PORT = "dfs.datanode.registered.http.port"; ++ public static final String DFS_DATANODE_REGISTERED_HTTPS_PORT = "dfs.datanode.registered.https.port"; ++ public static final String DFS_DATANODE_REGISTERED_IPC_PORT = "dfs.datanode.registered.ipc.port"; ++ + public static final String DFS_DATANODE_MAX_LOCKED_MEMORY_KEY = "dfs.datanode.max.locked.memory"; + public static final long DFS_DATANODE_MAX_LOCKED_MEMORY_DEFAULT = 0; + public static final String DFS_DATANODE_FSDATASETCACHE_MAX_THREADS_PER_VOLUME_KEY = "dfs.datanode.fsdatasetcache.max.threads.per.volume"; +@@ -502,6 +509,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys { + public static final long DFS_DATANODE_PROCESS_COMMANDS_THRESHOLD_DEFAULT = + TimeUnit.SECONDS.toMillis(2); + ++ public static final String DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY = "dfs.namenode.datanode.registration.unsafe.allow-address-override"; ++ public static final boolean DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_DEFAULT = false; + public static final String DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_KEY = "dfs.namenode.datanode.registration.ip-hostname-check"; + public static final boolean DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_DEFAULT = true; + +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +index 01f1af9624d..4e7c722d03f 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +@@ -182,6 +182,8 @@ public class DatanodeManager { + private boolean hasClusterEverBeenMultiRack = false; + + private final boolean checkIpHostnameInRegistration; ++ private final boolean allowRegistrationAddressOverride; ++ + /** + * Whether we should tell datanodes what to cache in replies to + * heartbeat messages. +@@ -317,6 +319,11 @@ public class DatanodeManager { + // Block invalidate limit also has some dependency on heartbeat interval. + // Check setBlockInvalidateLimit(). + setBlockInvalidateLimit(configuredBlockInvalidateLimit); ++ this.allowRegistrationAddressOverride = conf.getBoolean( ++ DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY, ++ DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_DEFAULT); ++ LOG.info(DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY ++ + "=" + allowRegistrationAddressOverride); + this.checkIpHostnameInRegistration = conf.getBoolean( + DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_KEY, + DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_DEFAULT); +@@ -1184,27 +1191,29 @@ public class DatanodeManager { + */ + public void registerDatanode(DatanodeRegistration nodeReg) + throws DisallowedDatanodeException, UnresolvedTopologyException { +- InetAddress dnAddress = Server.getRemoteIp(); +- if (dnAddress != null) { +- // Mostly called inside an RPC, update ip and peer hostname +- String hostname = dnAddress.getHostName(); +- String ip = dnAddress.getHostAddress(); +- if (checkIpHostnameInRegistration && !isNameResolved(dnAddress)) { +- // Reject registration of unresolved datanode to prevent performance +- // impact of repetitive DNS lookups later. +- final String message = "hostname cannot be resolved (ip=" +- + ip + ", hostname=" + hostname + ")"; +- LOG.warn("Unresolved datanode registration: " + message); +- throw new DisallowedDatanodeException(nodeReg, message); ++ if (!allowRegistrationAddressOverride) { ++ InetAddress dnAddress = Server.getRemoteIp(); ++ if (dnAddress != null) { ++ // Mostly called inside an RPC, update ip and peer hostname ++ String hostname = dnAddress.getHostName(); ++ String ip = dnAddress.getHostAddress(); ++ if (checkIpHostnameInRegistration && !isNameResolved(dnAddress)) { ++ // Reject registration of unresolved datanode to prevent performance ++ // impact of repetitive DNS lookups later. ++ final String message = "hostname cannot be resolved (ip=" ++ + ip + ", hostname=" + hostname + ")"; ++ LOG.warn("Unresolved datanode registration: " + message); ++ throw new DisallowedDatanodeException(nodeReg, message); ++ } ++ // update node registration with the ip and hostname from rpc request ++ nodeReg.setIpAddr(ip); ++ nodeReg.setPeerHostName(hostname); + } +- // update node registration with the ip and hostname from rpc request +- nodeReg.setIpAddr(ip); +- nodeReg.setPeerHostName(hostname); + } +- ++ + try { + nodeReg.setExportedKeys(blockManager.getBlockKeys()); +- ++ + // Checks if the node is not on the hosts list. If it is not, then + // it will be disallowed from registering. + if (!hostConfigManager.isIncluded(nodeReg)) { +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java +index 21b92db3073..5d3437239ce 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java +@@ -101,6 +101,11 @@ public class DNConf { + final boolean syncOnClose; + final boolean encryptDataTransfer; + final boolean connectToDnViaHostname; ++ private final String registeredHostname; ++ private final int registeredDataPort; ++ private final int registeredHttpPort; ++ private final int registeredHttpsPort; ++ private final int registeredIpcPort; + final boolean overwriteDownstreamDerivedQOP; + private final boolean pmemCacheRecoveryEnabled; + +@@ -189,6 +194,11 @@ public class DNConf { + connectToDnViaHostname = getConf().getBoolean( + DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME, + DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT); ++ registeredHostname = getConf().get(DFSConfigKeys.DFS_DATANODE_REGISTERED_HOSTNAME); ++ registeredDataPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_DATA_PORT, -1); ++ registeredHttpPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_HTTP_PORT, -1); ++ registeredHttpsPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_HTTPS_PORT, -1); ++ registeredIpcPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_IPC_PORT, -1); + this.blockReportInterval = getConf().getLong( + DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, + DFS_BLOCKREPORT_INTERVAL_MSEC_DEFAULT); +@@ -363,6 +373,66 @@ public class DNConf { + return connectToDnViaHostname; + } + ++ /** ++ * Returns a hostname to register with the cluster instead of the system ++ * hostname. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected hostname. ++ * ++ * @return null if the system hostname should be used, otherwise a hostname ++ */ ++ public String getRegisteredHostname() { ++ return registeredHostname; ++ } ++ ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * data port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredDataPort() { ++ return registeredDataPort; ++ } ++ ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * HTTP port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredHttpPort() { ++ return registeredHttpPort; ++ } ++ ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * HTTPS port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredHttpsPort() { ++ return registeredHttpsPort; ++ } ++ ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * IPC port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredIpcPort() { ++ return registeredIpcPort; ++ } ++ + /** + * Returns socket timeout + * +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +index 3a1b1e07f36..6d4c356dfb6 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +@@ -135,6 +135,7 @@ import java.util.HashSet; + import java.util.Iterator; + import java.util.List; + import java.util.Map; ++import java.util.Optional; + import java.util.Map.Entry; + import java.util.Set; + import java.util.UUID; +@@ -2077,11 +2078,35 @@ public class DataNode extends ReconfigurableBase + NodeType.DATA_NODE); + } + +- DatanodeID dnId = new DatanodeID( +- streamingAddr.getAddress().getHostAddress(), hostName, +- storage.getDatanodeUuid(), getXferPort(), getInfoPort(), +- infoSecurePort, getIpcPort()); +- return new DatanodeRegistration(dnId, storageInfo, ++ String registeredHostname = Optional ++ .ofNullable(dnConf.getRegisteredHostname()) ++ .orElseGet(() -> streamingAddr.getAddress().getHostAddress()); ++ int registeredDataPort = dnConf.getRegisteredDataPort(); ++ if (registeredDataPort == -1) { ++ registeredDataPort = getXferPort(); ++ } ++ int registeredHttpPort = dnConf.getRegisteredHttpPort(); ++ if (registeredHttpPort == -1) { ++ registeredHttpPort = getInfoPort(); ++ } ++ int registeredHttpsPort = dnConf.getRegisteredHttpsPort(); ++ if (registeredHttpsPort == -1) { ++ registeredHttpsPort = getInfoSecurePort(); ++ } ++ int registeredIpcPort = dnConf.getRegisteredIpcPort(); ++ if (registeredIpcPort == -1) { ++ registeredIpcPort = getIpcPort(); ++ } ++ ++ DatanodeID dnId = new DatanodeID(registeredHostname, ++ registeredHostname, ++ storage.getDatanodeUuid(), ++ registeredDataPort, ++ registeredHttpPort, ++ registeredHttpsPort, ++ registeredIpcPort); ++ ++ return new DatanodeRegistration(dnId, storageInfo, + new ExportedBlockKeys(), VersionInfo.getVersion()); + } + diff --git a/hadoop/hadoop/stackable/patches/3.5.0/0003-Async-profiler-also-grab-itimer-events.patch b/hadoop/hadoop/stackable/patches/3.5.0/0003-Async-profiler-also-grab-itimer-events.patch new file mode 100644 index 000000000..25acd19ff --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.5.0/0003-Async-profiler-also-grab-itimer-events.patch @@ -0,0 +1,29 @@ +From eabd444dec4a472f4d4d3bb1a449799c14aea2d5 Mon Sep 17 00:00:00 2001 +From: xeniape +Date: Wed, 10 Sep 2025 14:39:20 +0200 +Subject: Async-profiler-also-grab-itimer-events + +--- + .../src/main/java/org/apache/hadoop/http/ProfileServlet.java | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java +index ce532741512..909892ff903 100644 +--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java ++++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java +@@ -76,6 +76,7 @@ import org.apache.hadoop.util.ProcessUtils; + * Following event types are supported (default is 'cpu') (NOTE: not all OS'es support all events) + * // Perf events: + * // cpu ++ * // itimer + * // page-faults + * // context-switches + * // cycles +@@ -118,6 +119,7 @@ public class ProfileServlet extends HttpServlet { + private enum Event { + + CPU("cpu"), ++ ITIMER("itimer"), + ALLOC("alloc"), + LOCK("lock"), + PAGE_FAULTS("page-faults"), diff --git a/hadoop/hadoop/stackable/patches/3.5.0/0004-Update-CycloneDX-plugin.patch b/hadoop/hadoop/stackable/patches/3.5.0/0004-Update-CycloneDX-plugin.patch new file mode 100644 index 000000000..073949ac1 --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.5.0/0004-Update-CycloneDX-plugin.patch @@ -0,0 +1,35 @@ +From dce9e64ac4ff8906108a1853e6d1003d3fe9a313 Mon Sep 17 00:00:00 2001 +From: xeniape +Date: Wed, 10 Sep 2025 14:41:18 +0200 +Subject: Update-CycloneDX-plugin + +--- + pom.xml | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/pom.xml b/pom.xml +index bb9fb62180d..62dd33d6485 100644 +--- a/pom.xml ++++ b/pom.xml +@@ -830,6 +830,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x + org.cyclonedx + cyclonedx-maven-plugin + ${cyclonedx.version} ++ ++ application ++ 1.5 ++ false ++ + + + package +@@ -838,9 +843,6 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x + + + +- +- xml +- + + + diff --git a/hadoop/hadoop/stackable/patches/3.5.0/0005-YARN-11873-Update-nodejs-to-LTS-version.patch b/hadoop/hadoop/stackable/patches/3.5.0/0005-YARN-11873-Update-nodejs-to-LTS-version.patch new file mode 100644 index 000000000..ae977e796 --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.5.0/0005-YARN-11873-Update-nodejs-to-LTS-version.patch @@ -0,0 +1,22 @@ +From a8543f42f9cb3d88b7c030df1870de809a07949b Mon Sep 17 00:00:00 2001 +From: Sebastian Bernauer +Date: Tue, 26 May 2026 15:43:29 +0200 +Subject: YARN-11873 Update nodejs to LTS version + +--- + hadoop-project/pom.xml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml +index 23657fb6e80..0880f5acdb5 100644 +--- a/hadoop-project/pom.xml ++++ b/hadoop-project/pom.xml +@@ -236,7 +236,7 @@ + 5.4.0 + 10.4 + 1.0-1 +- v14.17.0 ++ v22.20.0 + v1.22.5 + 1.10.13 + 1.20 diff --git a/hadoop/hadoop/stackable/patches/3.5.0/0006-HDFS-17891-fix-for-hostname-resolution-bug-with-data.patch b/hadoop/hadoop/stackable/patches/3.5.0/0006-HDFS-17891-fix-for-hostname-resolution-bug-with-data.patch new file mode 100644 index 000000000..e74826088 --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.5.0/0006-HDFS-17891-fix-for-hostname-resolution-bug-with-data.patch @@ -0,0 +1,48 @@ +From 065f4c5ccbd462ee04fb7a63fb38026c7c7d0628 Mon Sep 17 00:00:00 2001 +From: Jim Halfpenny +Date: Mon, 16 Mar 2026 09:18:46 +0000 +Subject: HDFS-17891 fix for hostname resolution bug with datanodes + +--- + .../hadoop/hdfs/server/blockmanagement/HostSet.java | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostSet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostSet.java +index d12e5fbae13..3da753f1cb0 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostSet.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostSet.java +@@ -18,10 +18,11 @@ + package org.apache.hadoop.hdfs.server.blockmanagement; + + +-import org.apache.hadoop.util.Preconditions; + import org.apache.hadoop.thirdparty.com.google.common.collect.HashMultimap; + import org.apache.hadoop.thirdparty.com.google.common.collect.Multimap; + import org.apache.hadoop.thirdparty.com.google.common.collect.UnmodifiableIterator; ++import org.slf4j.Logger; ++import org.slf4j.LoggerFactory; + + import java.net.InetAddress; + import java.net.InetSocketAddress; +@@ -38,6 +39,8 @@ import java.util.Map; + * .getPort() || B.getPort() == 0. + */ + public class HostSet implements Iterable { ++ private static final Logger LOG = LoggerFactory.getLogger(HostSet.class); ++ + // Host -> lists of ports + private final Multimap addrs = HashMultimap.create(); + +@@ -72,7 +75,11 @@ public class HostSet implements Iterable { + } + + void add(InetSocketAddress addr) { +- Preconditions.checkArgument(!addr.isUnresolved()); ++ LOG.debug("Adding address to HostSet: {}", addr); ++ if (addr.isUnresolved()) { ++ LOG.warn("Unresolved address not added to HostSet: {}", addr); ++ return; ++ } + addrs.put(addr.getAddress(), addr.getPort()); + } + diff --git a/hadoop/hadoop/stackable/patches/3.5.0/patchable.toml b/hadoop/hadoop/stackable/patches/3.5.0/patchable.toml new file mode 100644 index 000000000..972e228e7 --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.5.0/patchable.toml @@ -0,0 +1,2 @@ +mirror = "https://github.com/stackabletech/hadoop.git" +base = "dbcc7cd797100e6b32cd84f85b53a5193a5f9af0" diff --git a/hadoop/hdfs-utils/stackable/patches/0.6.0/patchable.toml b/hadoop/hdfs-utils/stackable/patches/0.6.0/patchable.toml new file mode 100644 index 000000000..406dfb637 --- /dev/null +++ b/hadoop/hdfs-utils/stackable/patches/0.6.0/patchable.toml @@ -0,0 +1,2 @@ +# FIXME: Use released commit +base = "b3ebd854f36395cfc3ebbbb71f868f2a6ec7b0fa" diff --git a/hbase/Dockerfile b/hbase/Dockerfile index f954a7304..dcd4655a1 100644 --- a/hbase/Dockerfile +++ b/hbase/Dockerfile @@ -30,15 +30,8 @@ ARG STACKABLE_USER_UID USER ${STACKABLE_USER_UID} WORKDIR /stackable -COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 \ - # The artifact name of the AWS bundle has changed between Haddop 3.3.6 and 3.4.1 - # from aws-java-sdk-bundle-*.jar to bundle-*.jar. - # See: https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/aws_sdk_upgrade.html - # So we try to copy both and if one of them doesn't exist buildx will just ignore it :) - /stackable/hadoop/share/hadoop/tools/lib/bundle-*.jar \ - /stackable/hadoop/share/hadoop/tools/lib/aws-java-sdk-bundle-*.jar \ - /stackable/hadoop/share/hadoop/tools/lib/hadoop-aws-${HADOOP_VERSION}-stackable${RELEASE_VERSION}.jar \ - /stackable/hadoop/share/hadoop/tools/lib/ +# Copy cloud libraries for s3a:// and abfs:// support +COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/hadoop-cloud-libraries/*.jar /stackable/hadoop/share/hadoop/tools/lib/ COPY --chown=${STACKABLE_USER_UID}:0 hbase/hbase/stackable/bin/export-snapshot-to-s3.env /stackable/bin/ diff --git a/hbase/boil-config.toml b/hbase/boil-config.toml index 66d1af206..2e5c06b00 100644 --- a/hbase/boil-config.toml +++ b/hbase/boil-config.toml @@ -6,7 +6,7 @@ "hbase/hbase-operator-tools" = "1.3.0-hbase2.6.3" "hbase/phoenix" = "5.2.1-hbase2.6.3" "hbase/hbase-opa-authorizer" = "0.3.0" -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" java-base = "11" java-devel = "11" @@ -19,7 +19,7 @@ delete-caches = "true" "hbase/hbase-operator-tools" = "1.3.0-hbase2.6.4" "hbase/phoenix" = "5.3.0-hbase2.6.4" "hbase/hbase-opa-authorizer" = "0.3.0" -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" java-base = "11" java-devel = "11" diff --git a/hbase/hbase-operator-tools/boil-config.toml b/hbase/hbase-operator-tools/boil-config.toml index ebde2a084..26d5b2381 100644 --- a/hbase/hbase-operator-tools/boil-config.toml +++ b/hbase/hbase-operator-tools/boil-config.toml @@ -1,5 +1,5 @@ [versions."1.3.0-hbase2.6.3".local-images] -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" "hbase/hbase" = "2.6.3" "java-devel" = "11" @@ -11,7 +11,7 @@ delete-caches = "true" # Note: The next Hbase version should use 1.4.0 if it is released. # At the time of writing, the latest commit is b22878f. [versions."1.3.0-hbase2.6.4".local-images] -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" "hbase/hbase" = "2.6.4" "java-devel" = "11" diff --git a/hbase/hbase/boil-config.toml b/hbase/hbase/boil-config.toml index 85a715450..af8073a05 100644 --- a/hbase/hbase/boil-config.toml +++ b/hbase/hbase/boil-config.toml @@ -1,5 +1,5 @@ [versions."2.6.3".local-images] -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" java-base = "11" java-devel = "11" @@ -8,7 +8,7 @@ async-profiler-version = "2.9" delete-caches = "true" [versions."2.6.4".local-images] -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" java-base = "11" java-devel = "11" diff --git a/hbase/phoenix/boil-config.toml b/hbase/phoenix/boil-config.toml index 3470920fc..76ca067c3 100644 --- a/hbase/phoenix/boil-config.toml +++ b/hbase/phoenix/boil-config.toml @@ -1,6 +1,6 @@ [versions."5.2.1-hbase2.6.3".local-images] "hbase/hbase" = "2.6.3" -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" java-devel = "11" [versions."5.2.1-hbase2.6.3".build-arguments] @@ -10,7 +10,7 @@ delete-caches = "true" [versions."5.3.0-hbase2.6.4".local-images] "hbase/hbase" = "2.6.4" -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" java-devel = "11" [versions."5.3.0-hbase2.6.4".build-arguments] diff --git a/hive/Dockerfile b/hive/Dockerfile index cf114407b..0986c3b66 100644 --- a/hive/Dockerfile +++ b/hive/Dockerfile @@ -18,9 +18,6 @@ ARG HADOOP_HADOOP_VERSION # Reassign the arg to `HADOOP_VERSION` for better readability. ENV HADOOP_VERSION=${HADOOP_HADOOP_VERSION} ARG JMX_EXPORTER_VERSION -ARG AWS_JAVA_SDK_BUNDLE_VERSION -ARG AZURE_STORAGE_VERSION -ARG AZURE_KEYVAULT_CORE_VERSION ARG STACKABLE_USER_UID # Setting this to anything other than "true" will keep the cache folders around (e.g. for Maven, NPM etc.) @@ -64,8 +61,8 @@ tar -czf /stackable/hive-${NEW_VERSION}-src.tar.gz . if [[ "${PRODUCT_VERSION}" == "3.1.3" ]] ; then mvn \ - clean package \ - -DskipTests \ + clean package \ + -DskipTests \ --projects standalone-metastore mv standalone-metastore/target/apache-hive-metastore-${NEW_VERSION}-bin/apache-hive-metastore-${NEW_VERSION}-bin /stackable mv standalone-metastore/target/bom.json /stackable/apache-hive-metastore-${NEW_VERSION}-bin/apache-hive-metastore-${NEW_VERSION}.cdx.json @@ -73,9 +70,9 @@ elif [[ "${PRODUCT_VERSION}" == 4.0.* ]]; then ( # https://issues.apache.org/jira/browse/HIVE-20451 switched the metastore server packaging starting with 4.0.0 mvn \ - clean package \ - -DskipTests \ - -Dhadoop.version=${HADOOP_VERSION}-stackable${RELEASE_VERSION} + clean package \ + -DskipTests \ + -Dhadoop.version=${HADOOP_VERSION}-stackable${RELEASE_VERSION} # We only seem to get a .tar.gz archive, so let's extract that to the correct location tar --extract --directory=/stackable -f standalone-metastore/metastore-server/target/apache-hive-standalone-metastore-server-${NEW_VERSION}-bin.tar.gz @@ -87,10 +84,10 @@ elif [[ "${PRODUCT_VERSION}" == 4.0.* ]]; then else # Starting with 4.1.0 the build process changed again in https://github.com/apache/hive/pull/5936 (HIVE-29062) mvn \ - clean package \ - -Dhadoop.version=${HADOOP_VERSION}-stackable${RELEASE_VERSION} \ - -DskipTests \ - -Pdist + clean package \ + -Dhadoop.version=${HADOOP_VERSION}-stackable${RELEASE_VERSION} \ + -DskipTests \ + -Pdist # Looks like we can not filter the projects using "--projects standalone-metastore/metastore-server --also-make", # as this does not build a *.tar.gz @@ -103,6 +100,9 @@ fi rm -rf "$BUILD_SRC_DIR" EOF +# Copy cloud libraries for s3a:// and abfs:// support +COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/hadoop-cloud-libraries/*.jar /stackable/apache-hive-metastore-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}-bin/lib/ + RUN < below cp /stackable/patched-libs/maven/org/apache/hadoop/hadoop-mapreduce-client-core/${HADOOP_VERSION}-stackable${RELEASE_VERSION}/hadoop-mapreduce-client-core-${HADOOP_VERSION}-stackable${RELEASE_VERSION}.jar /stackable/apache-hive-metastore-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}-bin/lib/ -# The next two sections for S3 and Azure use hardcoded version numbers on purpose instead of wildcards -# This way the build will fail should one of the files not be available anymore in a later Hadoop version! - -# Add S3 Support for Hive (support for s3a://) -cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/tools/lib/hadoop-aws-${HADOOP_VERSION}-stackable${RELEASE_VERSION}.jar /stackable/apache-hive-metastore-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}-bin/lib/ - -# According to https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/aws_sdk_upgrade.html, the jar filename has changed from -# aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar to bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar. In future, you might need to do: -if [[ "${PRODUCT_VERSION}" == "3.1.3" || "${PRODUCT_VERSION}" == 4.0.* ]]; then -cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/tools/lib/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar /stackable/apache-hive-metastore-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}-bin/lib/ -else -cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/tools/lib/bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar /stackable/apache-hive-metastore-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}-bin/lib/ -fi - -# Add Azure ABFS support (support for abfs://) -cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/tools/lib/hadoop-azure-${HADOOP_VERSION}-stackable${RELEASE_VERSION}.jar /stackable/apache-hive-metastore-${NEW_VERSION}-bin/lib/ -cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/tools/lib/azure-storage-${AZURE_STORAGE_VERSION}.jar /stackable/apache-hive-metastore-${NEW_VERSION}-bin/lib/ -cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/tools/lib/azure-keyvault-core-${AZURE_KEYVAULT_CORE_VERSION}.jar /stackable/apache-hive-metastore-${NEW_VERSION}-bin/lib/ - # We're removing these to make the intermediate layer smaller # This can be necessary even though it's only a builder image because the GitHub Action Runners only have very limited space available # and we are sometimes running into errors because we're out of space. diff --git a/hive/boil-config.toml b/hive/boil-config.toml index 6731797dc..b2694f47b 100644 --- a/hive/boil-config.toml +++ b/hive/boil-config.toml @@ -11,10 +11,6 @@ java-devel = "8" [versions."3.1.3".build-arguments] jmx-exporter-version = "1.3.0" -# Keep consistent with the dependency from Hadoop: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.6 -aws-java-sdk-bundle-version = "1.12.367" -azure-storage-version = "7.0.1" -azure-keyvault-core-version = "1.0.0" [versions."4.0.0".local-images] # Hive 4 must be built with Java 8 (according to GitHub README) but seems to run on Java 11 @@ -26,10 +22,6 @@ java-devel = "8" [versions."4.0.0".build-arguments] jmx-exporter-version = "1.3.0" -# Keep consistent with the dependency from Hadoop: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.6 -aws-java-sdk-bundle-version = "1.12.367" -azure-storage-version = "7.0.1" -azure-keyvault-core-version = "1.0.0" [versions."4.0.1".local-images] # Hive 4.0 must be built with Java 8 (according to GitHub README) but seems to run on Java 11 @@ -41,41 +33,25 @@ java-devel = "8" [versions."4.0.1".build-arguments] jmx-exporter-version = "1.3.0" -# Keep consistent with the dependency from Hadoop: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.6 -aws-java-sdk-bundle-version = "1.12.367" -azure-storage-version = "7.0.1" -azure-keyvault-core-version = "1.0.0" [versions."4.1.0".local-images] # Hive 4.1 requires Java 17 (according to GitHub README) java-base = "17" java-devel = "17" -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" # hive-metastore-opa-authorizer from: https://github.com/boschglobal/hive-metastore-opa-authorizer -"hive/hive-metastore-opa-authorizer" = "v1.0.0-hive-4.1.0-hadoop-3.4.2" +"hive/hive-metastore-opa-authorizer" = "v1.0.0-hive-4.1.0-hadoop-3.4.3" [versions."4.1.0".build-arguments] jmx-exporter-version = "1.3.0" -# Keep consistent with the dependency from hadoop-aws: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.4.2 -aws-java-sdk-bundle-version = "2.29.52" -# Keep consistent with the dependency from hadoop-azure: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.4.2 -azure-storage-version = "7.0.1" -# Keep consistent with the dependency from azure-storage: https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1 -azure-keyvault-core-version = "1.0.0" [versions."4.2.0".local-images] # Hive 4.2 requires Java 21 (according to GitHub README) java-base = "21" java-devel = "21" -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" # hive-metastore-opa-authorizer from: https://github.com/boschglobal/hive-metastore-opa-authorizer -"hive/hive-metastore-opa-authorizer" = "v1.0.0-hive-4.2.0-hadoop-3.4.2" +"hive/hive-metastore-opa-authorizer" = "v1.0.0-hive-4.2.0-hadoop-3.4.3" [versions."4.2.0".build-arguments] jmx-exporter-version = "1.3.0" -# Keep consistent with the dependency from hadoop-aws: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.4.2 -aws-java-sdk-bundle-version = "2.29.52" -# Keep consistent with the dependency from hadoop-azure: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.4.2 -azure-storage-version = "7.0.1" -# Keep consistent with the dependency from azure-storage: https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1 -azure-keyvault-core-version = "1.0.0" diff --git a/hive/hive-metastore-opa-authorizer/boil-config.toml b/hive/hive-metastore-opa-authorizer/boil-config.toml index a2eb55fcd..2808be73d 100644 --- a/hive/hive-metastore-opa-authorizer/boil-config.toml +++ b/hive/hive-metastore-opa-authorizer/boil-config.toml @@ -25,20 +25,20 @@ authorizer-version = "v1.0.0" hive-version = "4.0.1" delete-caches = "true" -[versions."v1.0.0-hive-4.1.0-hadoop-3.4.2".local-images] +[versions."v1.0.0-hive-4.1.0-hadoop-3.4.3".local-images] "java-devel" = "17" -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" -[versions."v1.0.0-hive-4.1.0-hadoop-3.4.2".build-arguments] +[versions."v1.0.0-hive-4.1.0-hadoop-3.4.3".build-arguments] authorizer-version = "v1.0.0" hive-version = "4.1.0" delete-caches = "true" -[versions."v1.0.0-hive-4.2.0-hadoop-3.4.2".local-images] +[versions."v1.0.0-hive-4.2.0-hadoop-3.4.3".local-images] "java-devel" = "21" -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" -[versions."v1.0.0-hive-4.2.0-hadoop-3.4.2".build-arguments] +[versions."v1.0.0-hive-4.2.0-hadoop-3.4.3".build-arguments] authorizer-version = "v1.0.0" hive-version = "4.2.0" delete-caches = "true" diff --git a/shared/boost/Dockerfile b/shared/boost/Dockerfile new file mode 100644 index 000000000..a918042ab --- /dev/null +++ b/shared/boost/Dockerfile @@ -0,0 +1,54 @@ +# syntax=docker/dockerfile:1.16.0@sha256:e2dd261f92e4b763d789984f6eab84be66ab4f5f08052316d8eb8f173593acf7 +# check=error=true + +# The boost library is a runtime dependency of the Hadoop native code +# but is not available in the RedHat UBI10 or EPEL10 repositories, so we need to build +# it ourselves. +# Additionally, UBI ships a single supported boost version, which works for some product versions, +# but not for all. +# +# NOTE: We use a published source bundle instead of the patchable workflow +# because boost uses git submodules for its build system +# and patchable doesn't support these. +# The source bundle contains everything needed to build b2 (the build system) +# and boost except for dependencies which are automatically discovered. +# +FROM local-image/stackable-base AS boost-builder + +ARG BOOST_VERSION + +WORKDIR /tmp + +RUN <= 2.34 makes PTHREAD_STACK_MIN a sysconf() call instead of a compile-time +# constant, which breaks the `#if PTHREAD_STACK_MIN > 0` preprocessor check in older +# boost versions (e.g. 1.72.0). Apply the upstream fix: use `#ifdef`. +# See: https://github.com/boostorg/thread/commit/74fb0a26099bc51d717f5f154b37231ce7df3e98 +# +# Can be removed once we drop boost 1.72.0 support. +THREAD_DATA="boost/thread/pthread/thread_data.hpp" +if [ -f "${THREAD_DATA}" ]; then + sed -i \ + -e 's/^#if PTHREAD_STACK_MIN > 0$/#ifdef PTHREAD_STACK_MIN/' \ + "${THREAD_DATA}" +fi + +./bootstrap.sh --prefix=/stackable/boost +./b2 -j"$(nproc)" \ + link=shared runtime-link=shared variant=release threading=multi \ + install +rm -rf /tmp/* +EOF diff --git a/shared/boost/boil-config.toml b/shared/boost/boil-config.toml new file mode 100644 index 000000000..02a259508 --- /dev/null +++ b/shared/boost/boil-config.toml @@ -0,0 +1,17 @@ +[versions."1.72.0".local-images] +stackable-base = "1.0.0" + +[versions."1.72.0".build-arguments] +boost-version = "1.72.0" + +[versions."1.78.0".local-images] +stackable-base = "1.0.0" + +[versions."1.78.0".build-arguments] +boost-version = "1.78.0" + +[versions."1.86.0".local-images] +stackable-base = "1.0.0" + +[versions."1.86.0".build-arguments] +boost-version = "1.86.0" diff --git a/shared/protobuf/stackable/patches/3.21.12/patchable.toml b/shared/protobuf/stackable/patches/3.21.12/patchable.toml new file mode 100644 index 000000000..6b38d2fb4 --- /dev/null +++ b/shared/protobuf/stackable/patches/3.21.12/patchable.toml @@ -0,0 +1,2 @@ +mirror = "https://github.com/stackabletech/protobuf.git" +base = "f0dc78d7e6e331b8c6bb2d5283e06aa26883ca7c" diff --git a/shared/protobuf/stackable/patches/3.25.5/patchable.toml b/shared/protobuf/stackable/patches/3.25.5/patchable.toml new file mode 100644 index 000000000..c707e1755 --- /dev/null +++ b/shared/protobuf/stackable/patches/3.25.5/patchable.toml @@ -0,0 +1,2 @@ +mirror = "https://github.com/stackabletech/protobuf.git" +base = "9d0ec0f92b5b5fdeeda11f9dcecc1872ff378014" diff --git a/spark-k8s/Dockerfile.3 b/spark-k8s/Dockerfile.3 index 71a786516..183a79ae9 100644 --- a/spark-k8s/Dockerfile.3 +++ b/spark-k8s/Dockerfile.3 @@ -44,9 +44,6 @@ ARG HADOOP_HADOOP_VERSION # Reassign the arg to `HADOOP_VERSION` for better readability. ENV HADOOP_VERSION=${HADOOP_HADOOP_VERSION} ARG HBASE_VERSION -ARG AWS_JAVA_SDK_BUNDLE_VERSION -ARG AZURE_STORAGE_VERSION -ARG AZURE_KEYVAULT_CORE_VERSION ARG JACKSON_DATAFORMAT_XML_VERSION ARG STAX2_API_VERSION ARG WOODSTOX_CORE_VERSION @@ -108,18 +105,8 @@ EOF WORKDIR /stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/dist/jars -# Copy modules required for s3a:// -COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 \ - /stackable/hadoop/share/hadoop/tools/lib/hadoop-aws-${HADOOP_VERSION}-stackable${RELEASE_VERSION}.jar \ - /stackable/hadoop/share/hadoop/tools/lib/bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar \ - ./ - -# Copy modules required for abfs:// -COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 \ - /stackable/hadoop/share/hadoop/tools/lib/hadoop-azure-${HADOOP_VERSION}-stackable${RELEASE_VERSION}.jar \ - /stackable/hadoop/share/hadoop/tools/lib/azure-storage-${AZURE_STORAGE_VERSION}.jar \ - /stackable/hadoop/share/hadoop/tools/lib/azure-keyvault-core-${AZURE_KEYVAULT_CORE_VERSION}.jar \ - ./ +# Copy cloud libraries for s3a:// and abfs:// support +COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/hadoop-cloud-libraries/*.jar ./ # Copy the HBase connector including required modules COPY --from=hbase-connectors-builder --chown=${STACKABLE_USER_UID}:0 \ diff --git a/spark-k8s/Dockerfile.4 b/spark-k8s/Dockerfile.4 index bc61acdaf..ba6e40a3c 100644 --- a/spark-k8s/Dockerfile.4 +++ b/spark-k8s/Dockerfile.4 @@ -36,9 +36,6 @@ ARG PRODUCT_VERSION ARG HADOOP_HADOOP_VERSION # Reassign the arg to `HADOOP_VERSION` for better readability. ENV HADOOP_VERSION=${HADOOP_HADOOP_VERSION} -ARG AWS_JAVA_SDK_BUNDLE_VERSION -ARG AZURE_STORAGE_VERSION -ARG AZURE_KEYVAULT_CORE_VERSION ARG JACKSON_DATAFORMAT_XML_VERSION ARG STAX2_API_VERSION ARG WOODSTOX_CORE_VERSION @@ -104,18 +101,8 @@ EOF WORKDIR /stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/dist/jars -# Copy modules required for s3a:// -COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 \ - /stackable/hadoop/share/hadoop/tools/lib/hadoop-aws-${HADOOP_VERSION}-stackable${RELEASE_VERSION}.jar \ - /stackable/hadoop/share/hadoop/tools/lib/bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar \ - ./ - -# Copy modules required for abfs:// -COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 \ - /stackable/hadoop/share/hadoop/tools/lib/hadoop-azure-${HADOOP_VERSION}-stackable${RELEASE_VERSION}.jar \ - /stackable/hadoop/share/hadoop/tools/lib/azure-storage-${AZURE_STORAGE_VERSION}.jar \ - /stackable/hadoop/share/hadoop/tools/lib/azure-keyvault-core-${AZURE_KEYVAULT_CORE_VERSION}.jar \ - ./ +# Copy cloud libraries for s3a:// and abfs:// support +COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/hadoop-cloud-libraries/*.jar ./ COPY spark-k8s/stackable/jmx /stackable/jmx diff --git a/spark-k8s/boil-config.toml b/spark-k8s/boil-config.toml index 1ebde375c..c724bcf9b 100644 --- a/spark-k8s/boil-config.toml +++ b/spark-k8s/boil-config.toml @@ -5,7 +5,7 @@ containerfile = "Dockerfile.3" [versions."3.5.7".local-images] -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" java-base = "17" java-devel = "17" hbase = "2.6.3" @@ -13,12 +13,10 @@ hbase = "2.6.3" [versions."3.5.7".build-arguments] python-version = "3.11" -aws-java-sdk-bundle-version = "2.29.52" # needs to match the version shipped by Hadoop -azure-storage-version = "7.0.1" # needs to match the version shipped by Hadoop -azure-keyvault-core-version = "1.0.0" # needs to match the version shipped by Hadoop -jackson-dataformat-xml-version = "2.15.2" # needs to match the version shipped by Spark https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.12/3.5.7 -stax2-api-version = "4.2.1" # needs to match the jackson version https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 -woodstox-core-version = "6.5.1" # needs to match the jackson version https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 + +jackson-dataformat-xml-version = "2.15.2" # Needs to match https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.12/3.5.7 +stax2-api-version = "4.2.1" # Needs to match https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 +woodstox-core-version = "6.5.1" # Needs to match https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 jmx-exporter-version = "1.3.0" tini-version = "0.19.0" hbase-connector-version = "1.0.1_3.5.7" @@ -27,7 +25,7 @@ hbase-connector-version = "1.0.1_3.5.7" containerfile = "Dockerfile.3" [versions."3.5.8".local-images] -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" java-base = "17" java-devel = "17" hbase = "2.6.3" @@ -35,12 +33,9 @@ hbase = "2.6.3" [versions."3.5.8".build-arguments] python-version = "3.11" -aws-java-sdk-bundle-version = "2.29.52" # needs to match the version shipped by Hadoop -azure-storage-version = "7.0.1" # needs to match the version shipped by Hadoop -azure-keyvault-core-version = "1.0.0" # needs to match the version shipped by Hadoop -jackson-dataformat-xml-version = "2.15.2" # needs to match the version shipped by Spark https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.12/3.5.7 -stax2-api-version = "4.2.1" # needs to match the jackson version https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 -woodstox-core-version = "6.5.1" # needs to match the jackson version https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 +jackson-dataformat-xml-version = "2.15.2" # Needs to match https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.12/3.5.7 +stax2-api-version = "4.2.1" # Needs to match https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 +woodstox-core-version = "6.5.1" # Needs to match https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 jmx-exporter-version = "1.3.0" tini-version = "0.19.0" hbase-connector-version = "1.0.1_3.5.8" @@ -49,18 +44,15 @@ hbase-connector-version = "1.0.1_3.5.8" containerfile = "Dockerfile.4" [versions."4.0.1".local-images] -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" java-base = "21" java-devel = "21" [versions."4.0.1".build-arguments] python-version = "3.12" -aws-java-sdk-bundle-version = "2.29.52" # needs to match the version shipped by Hadoop -azure-storage-version = "7.0.1" # needs to match the version shipped by Hadoop -azure-keyvault-core-version = "1.0.0" # needs to match the version shipped by Hadoop -jackson-dataformat-xml-version = "2.18.2" # needs to match the version shipped by Spark https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.13/4.0.1 -stax2-api-version = "4.2.2" # needs to match the jackson version https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.18.2 -woodstox-core-version = "7.0.0" # needs to match the jackson version https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.18.2 +jackson-dataformat-xml-version = "2.15.2" # Needs to match https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.12/3.5.7 +stax2-api-version = "4.2.1" # Needs to match https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 +woodstox-core-version = "6.5.1" # Needs to match https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 jmx-exporter-version = "1.3.0" tini-version = "0.19.0" @@ -68,17 +60,14 @@ tini-version = "0.19.0" containerfile = "Dockerfile.4" [versions."4.1.1".local-images] -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" java-base = "21" java-devel = "21" [versions."4.1.1".build-arguments] python-version = "3.12" -aws-java-sdk-bundle-version = "2.29.52" # needs to match the version shipped by Hadoop -azure-storage-version = "7.0.1" # needs to match the version shipped by Hadoop -azure-keyvault-core-version = "1.0.0" # needs to match the version shipped by Hadoop -jackson-dataformat-xml-version = "2.20.0" # needs to match the version shipped by Spark https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.13/4.1.1/dependencies -stax2-api-version = "4.2.2" # needs to match the jackson version https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.20.0/dependencies -woodstox-core-version = "7.1.1" # needs to match the jackson version https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.20.0/dependencies +jackson-dataformat-xml-version = "2.15.2" # Needs to match https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.12/3.5.7 +stax2-api-version = "4.2.1" # Needs to match https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 +woodstox-core-version = "6.5.1" # Needs to match https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 jmx-exporter-version = "1.3.0" tini-version = "0.19.0" diff --git a/spark-k8s/hbase-connectors/boil-config.toml b/spark-k8s/hbase-connectors/boil-config.toml index 6cf4c0791..d661b5906 100644 --- a/spark-k8s/hbase-connectors/boil-config.toml +++ b/spark-k8s/hbase-connectors/boil-config.toml @@ -4,7 +4,7 @@ java-devel = "17" [versions."1.0.1_3.5.7".build-arguments] spark-version = "3.5.7" -hadoop-version = "3.4.2" +hadoop-version = "3.4.3" hbase-version = "2.6.3" [versions."1.0.1_3.5.8".local-images] @@ -12,5 +12,5 @@ java-devel = "17" [versions."1.0.1_3.5.8".build-arguments] spark-version = "3.5.8" -hadoop-version = "3.4.2" +hadoop-version = "3.4.3" hbase-version = "2.6.3"