From 3674f5a4a8897cc6f4e1dfaaa25e2400f8be237e Mon Sep 17 00:00:00 2001 From: Umeshkumar9414 <9414umeshkumar@gmail.com> Date: Tue, 5 May 2026 13:34:19 +0530 Subject: [PATCH 1/3] HBASE-30142 Resolve NPE while running recoverUnknown command because null RegionLocation --- .../org/apache/hadoop/hbase/master/ServerManager.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index 09184975a33c..8f7cf4488349 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -980,11 +980,14 @@ public synchronized boolean isServerDead(ServerName serverName) { /** * Check if a server is unknown. A server can be online, or known to be dead, or unknown to this * manager (i.e, not online, not known to be dead either; it is simply not tracked by the master - * any more, for example, a very old previous instance). + * any more, for example, a very old previous instance). A null serverName should not be + * considered unknown. We set regionLocation null before finding the assign candidate (in-between + * region transition) or while marking it OFFLINE/FAILED_OPEN For more info regarding null-check + * in this refer HBASE-30142 */ public boolean isServerUnknown(ServerName serverName) { - return serverName == null - || (!onlineServers.containsKey(serverName) && !deadservers.isDeadServer(serverName)); + return serverName != null + && (!onlineServers.containsKey(serverName) && !deadservers.isDeadServer(serverName)); } public void shutdownCluster() { From 03da5377c0dd09eb5c5216ad00dd9d9260f303ca Mon Sep 17 00:00:00 2001 From: Umeshkumar9414 <9414umeshkumar@gmail.com> Date: Tue, 12 May 2026 03:38:25 +0530 Subject: [PATCH 2/3] HBASE-30142 added JUnit Generated-by:us.anthropic.claude-opus-4-7 --- ...tRecoverUnknownWithNullRegionLocation.java | 112 ++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRecoverUnknownWithNullRegionLocation.java diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRecoverUnknownWithNullRegionLocation.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRecoverUnknownWithNullRegionLocation.java new file mode 100644 index 000000000000..abf7def5c838 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRecoverUnknownWithNullRegionLocation.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.List; +import org.apache.hadoop.hbase.HBaseTestingUtil; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.master.assignment.AssignmentManager; +import org.apache.hadoop.hbase.master.assignment.RegionStateNode; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; + +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos; + +/** + * Test for HBASE-30142. Verifies that {@code scheduleSCPsForUnknownServers} (i.e. the + * recoverUnknown HBCK command) does not throw a NullPointerException when a region's + * {@code regionLocation} is null while its state is something other than OFFLINE. + *

+ * Before HBASE-30142, {@link ServerManager#isServerUnknown(org.apache.hadoop.hbase.ServerName)} + * returned {@code true} for a {@code null} server name, so a region whose location had been + * temporarily nulled (e.g. between region transitions or while marking it FAILED_OPEN / + * ABNORMALLY_CLOSED) was treated as living on an "unknown" server. The downstream call to + * {@code shouldSubmitSCP(null)} then dereferenced the null and crashed. + */ +@Tag(MasterTests.TAG) +@Tag(MediumTests.TAG) +public class TestRecoverUnknownWithNullRegionLocation { + + private static HBaseTestingUtil UTIL; + private static final TableName TABLE_NAME = + TableName.valueOf("TestRecoverUnknownWithNullRegionLocation"); + private static final byte[] FAMILY = Bytes.toBytes("cf"); + + @BeforeAll + public static void setUpBeforeClass() throws Exception { + UTIL = new HBaseTestingUtil(); + UTIL.startMiniCluster(2); + } + + @AfterAll + public static void tearDownAfterClass() throws Exception { + if (UTIL != null) { + UTIL.shutdownMiniCluster(); + } + } + + /** + * Drive the region into ABNORMALLY_CLOSED through the + * {@link AssignmentManager#regionClosedAbnormally(RegionStateNode)} path (the same path SCP + * uses). That method also nulls {@code regionLocation} while leaving state non-OFFLINE. Then call + * {@code scheduleSCPsForUnknownServers} and assert no NPE. + */ + @Test + public void testRecoverUnknownWithAbnormallyClosedRegion() throws Exception { + try (Table ignored = UTIL.createTable(TABLE_NAME, FAMILY)) { + UTIL.waitTableAvailable(TABLE_NAME); + + HMaster master = UTIL.getMiniHBaseCluster().getMaster(); + AssignmentManager am = master.getAssignmentManager(); + List regions = am.getTableRegions(TABLE_NAME, true); + assertFalse(regions.isEmpty(), "expected at least one region"); + RegionStateNode node = am.getRegionStates().getRegionStateNode(regions.get(0)); + assertNotNull(node, "expected a region state node for the test region"); + + node.lock(); + try { + am.regionClosedAbnormally(node).get(); + } finally { + node.unlock(); + } + + assertTrue(node.isInState(RegionState.State.ABNORMALLY_CLOSED), + "regionClosedAbnormally must move state to ABNORMALLY_CLOSED"); + assertNull(node.getRegionLocation(), "regionClosedAbnormally must null out the location"); + + MasterProtos.ScheduleSCPsForUnknownServersResponse response = + master.getMasterRpcServices().scheduleSCPsForUnknownServers(null, + MasterProtos.ScheduleSCPsForUnknownServersRequest.newBuilder().build()); + assertEquals(0, response.getPidCount(), + "no SCPs should be scheduled for an ABNORMALLY_CLOSED region with null location"); + } + } +} From 4488d62bcdc248e2d3ca0042e9ae9da19f65afd2 Mon Sep 17 00:00:00 2001 From: Umeshkumar9414 <9414umeshkumar@gmail.com> Date: Tue, 26 May 2026 00:55:53 +0530 Subject: [PATCH 3/3] HBASE-30142 review comments --- .../hadoop/hbase/master/ServerManager.java | 8 +++---- ...tRecoverUnknownWithNullRegionLocation.java | 22 +++++++++---------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index 8f7cf4488349..1ea500fe36a3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -980,10 +980,10 @@ public synchronized boolean isServerDead(ServerName serverName) { /** * Check if a server is unknown. A server can be online, or known to be dead, or unknown to this * manager (i.e, not online, not known to be dead either; it is simply not tracked by the master - * any more, for example, a very old previous instance). A null serverName should not be - * considered unknown. We set regionLocation null before finding the assign candidate (in-between - * region transition) or while marking it OFFLINE/FAILED_OPEN For more info regarding null-check - * in this refer HBASE-30142 + * anymore, for example, a very old previous instance). A serverName with value null should not be + * considered unknown. We set value of regionLocation to null just before finding the assign + * candidate (in-between region transition) or while marking it OFFLINE/FAILED_OPEN. Refer + * HBASE-30142. */ public boolean isServerUnknown(ServerName serverName) { return serverName != null diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRecoverUnknownWithNullRegionLocation.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRecoverUnknownWithNullRegionLocation.java index abf7def5c838..080feec06e42 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRecoverUnknownWithNullRegionLocation.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRecoverUnknownWithNullRegionLocation.java @@ -31,7 +31,7 @@ import org.apache.hadoop.hbase.master.assignment.AssignmentManager; import org.apache.hadoop.hbase.master.assignment.RegionStateNode; import org.apache.hadoop.hbase.testclassification.MasterTests; -import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; import org.apache.hadoop.hbase.util.Bytes; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; @@ -52,7 +52,7 @@ * {@code shouldSubmitSCP(null)} then dereferenced the null and crashed. */ @Tag(MasterTests.TAG) -@Tag(MediumTests.TAG) +@Tag(SmallTests.TAG) public class TestRecoverUnknownWithNullRegionLocation { private static HBaseTestingUtil UTIL; @@ -94,19 +94,17 @@ public void testRecoverUnknownWithAbnormallyClosedRegion() throws Exception { node.lock(); try { am.regionClosedAbnormally(node).get(); + assertTrue(node.isInState(RegionState.State.ABNORMALLY_CLOSED), + "regionClosedAbnormally must move state to ABNORMALLY_CLOSED"); + assertNull(node.getRegionLocation(), "regionClosedAbnormally must null out the location"); + MasterProtos.ScheduleSCPsForUnknownServersResponse response = + master.getMasterRpcServices().scheduleSCPsForUnknownServers(null, + MasterProtos.ScheduleSCPsForUnknownServersRequest.newBuilder().build()); + assertEquals(0, response.getPidCount(), + "no SCPs should be scheduled for an ABNORMALLY_CLOSED region with null location"); } finally { node.unlock(); } - - assertTrue(node.isInState(RegionState.State.ABNORMALLY_CLOSED), - "regionClosedAbnormally must move state to ABNORMALLY_CLOSED"); - assertNull(node.getRegionLocation(), "regionClosedAbnormally must null out the location"); - - MasterProtos.ScheduleSCPsForUnknownServersResponse response = - master.getMasterRpcServices().scheduleSCPsForUnknownServers(null, - MasterProtos.ScheduleSCPsForUnknownServersRequest.newBuilder().build()); - assertEquals(0, response.getPidCount(), - "no SCPs should be scheduled for an ABNORMALLY_CLOSED region with null location"); } } }