001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.janitor; 019 020import static org.junit.jupiter.api.Assertions.assertEquals; 021import static org.junit.jupiter.api.Assertions.assertFalse; 022import static org.junit.jupiter.api.Assertions.assertNotNull; 023import static org.junit.jupiter.api.Assertions.assertTrue; 024 025import java.io.IOException; 026import java.util.Arrays; 027import java.util.LinkedList; 028import java.util.List; 029import org.apache.hadoop.hbase.CatalogFamilyFormat; 030import org.apache.hadoop.hbase.HBaseTestingUtil; 031import org.apache.hadoop.hbase.HConstants; 032import org.apache.hadoop.hbase.MetaTableAccessor; 033import org.apache.hadoop.hbase.TableName; 034import org.apache.hadoop.hbase.TableNotEnabledException; 035import org.apache.hadoop.hbase.client.Put; 036import org.apache.hadoop.hbase.client.RegionInfo; 037import org.apache.hadoop.hbase.client.RegionInfoBuilder; 038import org.apache.hadoop.hbase.master.assignment.RegionStateStore; 039import org.apache.hadoop.hbase.testclassification.LargeTests; 040import org.apache.hadoop.hbase.testclassification.MasterTests; 041import org.apache.hadoop.hbase.util.Bytes; 042import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 043import org.apache.hadoop.hbase.util.Pair; 044import org.junit.jupiter.api.AfterEach; 045import org.junit.jupiter.api.BeforeEach; 046import org.junit.jupiter.api.Tag; 047import org.junit.jupiter.api.Test; 048import org.slf4j.Logger; 049import org.slf4j.LoggerFactory; 050 051@Tag(MasterTests.TAG) 052@Tag(LargeTests.TAG) 053public class TestCatalogJanitorCluster { 054 private static final Logger LOG = LoggerFactory.getLogger(TestCatalogJanitorCluster.class); 055 056 private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); 057 private static final TableName T1 = TableName.valueOf("t1"); 058 private static final TableName T2 = TableName.valueOf("t2"); 059 private static final TableName T3 = TableName.valueOf("t3"); 060 private static final TableName T4 = TableName.valueOf("t4"); 061 private static final TableName T5 = TableName.valueOf("t5"); 062 private static final TableName T6 = TableName.valueOf("t6"); 063 private static final TableName T7 = TableName.valueOf("t7"); 064 065 @BeforeEach 066 public void before() throws Exception { 067 TEST_UTIL.startMiniCluster(); 068 TEST_UTIL.createMultiRegionTable(T1, new byte[][] { HConstants.CATALOG_FAMILY }); 069 TEST_UTIL.createMultiRegionTable(T2, new byte[][] { HConstants.CATALOG_FAMILY }); 070 TEST_UTIL.createMultiRegionTable(T3, new byte[][] { HConstants.CATALOG_FAMILY }); 071 072 final byte[][] keysForT4 = 073 { Bytes.toBytes("aa"), Bytes.toBytes("bb"), Bytes.toBytes("cc"), Bytes.toBytes("dd") }; 074 075 TEST_UTIL.createTable(T4, HConstants.CATALOG_FAMILY, keysForT4); 076 077 final byte[][] keysForT5 = { Bytes.toBytes("bb"), Bytes.toBytes("cc"), Bytes.toBytes("dd") }; 078 079 TEST_UTIL.createTable(T5, HConstants.CATALOG_FAMILY, keysForT5); 080 081 TEST_UTIL.createMultiRegionTable(T6, new byte[][] { HConstants.CATALOG_FAMILY }); 082 TEST_UTIL.createMultiRegionTable(T7, new byte[][] { HConstants.CATALOG_FAMILY }); 083 } 084 085 @AfterEach 086 public void after() throws Exception { 087 TEST_UTIL.shutdownMiniCluster(); 088 } 089 090 /** 091 * Fat method where we start with a fat hbase:meta and then gradually intro problems running 092 * catalogjanitor for each to ensure it triggers complaint. Do one big method because takes a 093 * while to build up the context we need. We create three tables and then make holes, overlaps, 094 * add unknown servers and empty out regioninfo columns. Each should up counts in the 095 * CatalogJanitor.Report produced. 096 */ 097 @Test 098 public void testConsistency() throws IOException { 099 CatalogJanitor janitor = TEST_UTIL.getHBaseCluster().getMaster().getCatalogJanitor(); 100 RegionStateStore regionStateStore = 101 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStateStore(); 102 janitor.scan(); 103 CatalogJanitorReport report = janitor.getLastReport(); 104 // Assert no problems. 105 assertTrue(report.isEmpty()); 106 // Now remove first region in table t2 to see if catalogjanitor scan notices. 107 List<RegionInfo> t2Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T2); 108 regionStateStore.deleteRegion(t2Ris.get(0)); 109 janitor.scan(); 110 report = janitor.getLastReport(); 111 assertFalse(report.isEmpty()); 112 assertEquals(1, report.getHoles().size()); 113 assertTrue(report.getHoles().get(0).getFirst().getTable() 114 .equals(RegionInfoBuilder.UNDEFINED.getTable())); 115 assertTrue(report.getHoles().get(0).getSecond().getTable().equals(T2)); 116 assertEquals(0, report.getOverlaps().size()); 117 // Next, add overlaps to first row in t3 118 List<RegionInfo> t3Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T3); 119 RegionInfo ri = t3Ris.get(0); 120 RegionInfo newRi1 = RegionInfoBuilder.newBuilder(ri.getTable()) 121 .setStartKey(incrementRow(ri.getStartKey())).setEndKey(incrementRow(ri.getEndKey())).build(); 122 Put p1 = MetaTableAccessor.makePutFromRegionInfo(newRi1, EnvironmentEdgeManager.currentTime()); 123 RegionInfo newRi2 = RegionInfoBuilder.newBuilder(newRi1.getTable()) 124 .setStartKey(incrementRow(newRi1.getStartKey())).setEndKey(incrementRow(newRi1.getEndKey())) 125 .build(); 126 Put p2 = MetaTableAccessor.makePutFromRegionInfo(newRi2, EnvironmentEdgeManager.currentTime()); 127 MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(p1, p2)); 128 janitor.scan(); 129 report = janitor.getLastReport(); 130 assertFalse(report.isEmpty()); 131 // We added two overlaps so total three. 132 assertEquals(3, report.getOverlaps().size()); 133 // Assert hole is still there. 134 assertEquals(1, report.getHoles().size()); 135 // Assert other attributes are empty still. 136 assertTrue(report.getEmptyRegionInfo().isEmpty()); 137 assertTrue(report.getUnknownServers().isEmpty()); 138 // Now make bad server in t1. 139 List<RegionInfo> t1Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T1); 140 RegionInfo t1Ri1 = t1Ris.get(1); 141 Put pServer = new Put(t1Ri1.getRegionName()); 142 pServer.addColumn(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getServerColumn(0), 143 Bytes.toBytes("bad.server.example.org:1234")); 144 MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(pServer)); 145 janitor.scan(); 146 report = janitor.getLastReport(); 147 assertFalse(report.isEmpty()); 148 assertEquals(1, report.getUnknownServers().size()); 149 // Test what happens if we blow away an info:server row, if it is null. Should not kill CJ 150 // and we should log the row that had the problem. HBASE-23192. Just make sure we don't 151 // break if this happens. 152 LOG.info("Make null info:server"); 153 Put emptyInfoServerPut = new Put(t1Ri1.getRegionName()); 154 emptyInfoServerPut.addColumn(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getServerColumn(0), 155 Bytes.toBytes("")); 156 MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(emptyInfoServerPut)); 157 janitor.scan(); 158 report = janitor.getLastReport(); 159 assertEquals(0, report.getUnknownServers().size()); 160 // Mke an empty regioninfo in t1. 161 RegionInfo t1Ri2 = t1Ris.get(2); 162 Put pEmptyRI = new Put(t1Ri2.getRegionName()); 163 pEmptyRI.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER, 164 HConstants.EMPTY_BYTE_ARRAY); 165 MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(pEmptyRI)); 166 janitor.scan(); 167 report = janitor.getLastReport(); 168 assertEquals(1, report.getEmptyRegionInfo().size()); 169 170 int holesReported = report.getHoles().size(); 171 int overlapsReported = report.getOverlaps().size(); 172 173 // Test the case for T4 174 // r1: [aa, bb), r2: [cc, dd), r3: [a, cc) 175 // Make sure only overlaps and no holes are reported. 176 List<RegionInfo> t4Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T4); 177 // delete the region [bb, cc) 178 regionStateStore.deleteRegion(t4Ris.get(2)); 179 180 // add a new region [a, cc) 181 RegionInfo newRiT4 = RegionInfoBuilder.newBuilder(T4).setStartKey("a".getBytes()) 182 .setEndKey("cc".getBytes()).build(); 183 Put putForT4 = 184 MetaTableAccessor.makePutFromRegionInfo(newRiT4, EnvironmentEdgeManager.currentTime()); 185 MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(putForT4)); 186 187 janitor.scan(); 188 report = janitor.getLastReport(); 189 // there is no new hole reported, 2 more overLaps added. 190 assertEquals(holesReported, report.getHoles().size()); 191 assertEquals(overlapsReported + 2, report.getOverlaps().size()); 192 193 holesReported = report.getHoles().size(); 194 overlapsReported = report.getOverlaps().size(); 195 196 // Test the case for T5 197 // r0: [, bb), r1: [a, g), r2: [bb, cc), r3: [dd, ) 198 // Make sure only overlaps and no holes are reported. 199 List<RegionInfo> t5Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T5); 200 // delete the region [cc, dd) 201 regionStateStore.deleteRegion(t5Ris.get(2)); 202 203 // add a new region [a, g) 204 RegionInfo newRiT5 = RegionInfoBuilder.newBuilder(T5).setStartKey("a".getBytes()) 205 .setEndKey("g".getBytes()).build(); 206 Put putForT5 = 207 MetaTableAccessor.makePutFromRegionInfo(newRiT5, EnvironmentEdgeManager.currentTime()); 208 MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(putForT5)); 209 210 janitor.scan(); 211 report = janitor.getLastReport(); 212 // there is no new hole reported, 3 more overLaps added. 213 // ([a, g), [, bb)), ([a, g), [bb, cc)), ([a, g), [dd, )) 214 assertEquals(holesReported, report.getHoles().size()); 215 assertEquals(overlapsReported + 3, report.getOverlaps().size()); 216 } 217 218 /** 219 * Take last byte and add one to it. 220 */ 221 private static byte[] incrementRow(byte[] row) { 222 if (row.length == 0) { 223 return new byte[] { '0' }; 224 } 225 row[row.length - 1] = (byte) (((int) row[row.length - 1]) + 1); 226 return row; 227 } 228 229 @Test 230 public void testHoles() throws IOException, InterruptedException { 231 CatalogJanitor janitor = TEST_UTIL.getHBaseCluster().getMaster().getCatalogJanitor(); 232 233 CatalogJanitorReport report = janitor.getLastReport(); 234 // Assert no problems. 235 assertTrue(report.isEmpty()); 236 // Verify start and end region holes 237 verifyCornerHoles(janitor, T1); 238 // Verify start and end region holes 239 verifyCornerHoles(janitor, T2); 240 // Verify start and end region holes when next table is disable see: HBASE-27560 241 disableTable(T7); 242 verifyCornerHoles(janitor, T6); 243 verifyMiddleHole(janitor); 244 // Verify that MetaFixer is able to fix these holes 245 fixHoles(janitor); 246 } 247 248 private void fixHoles(CatalogJanitor janitor) throws IOException { 249 MetaFixer metaFixer = new MetaFixer(TEST_UTIL.getHBaseCluster().getMaster()); 250 janitor.scan(); 251 CatalogJanitorReport report = janitor.getLastReport(); 252 // Verify total number of holes, 2 in t1, t2, t6 each and one in t3 253 assertEquals(7, report.getHoles().size(), "Number of holes are not matching"); 254 metaFixer.fix(); 255 janitor.scan(); 256 report = janitor.getLastReport(); 257 assertEquals(0, report.getHoles().size(), "Holes are not fixed"); 258 } 259 260 private void verifyMiddleHole(CatalogJanitor janitor) throws IOException { 261 // Verify middle holes 262 RegionInfo firstRegion = getRegionInfo(T3, "".getBytes()); 263 RegionInfo secondRegion = getRegionInfo(T3, "bbb".getBytes()); 264 RegionInfo thirdRegion = getRegionInfo(T3, "ccc".getBytes()); 265 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStateStore() 266 .deleteRegion(secondRegion); 267 LinkedList<Pair<RegionInfo, RegionInfo>> holes = getHoles(janitor, T3); 268 Pair<RegionInfo, RegionInfo> regionInfoRegionInfoPair = holes.getFirst(); 269 assertTrue(regionInfoRegionInfoPair.getFirst().getTable().equals(T3)); 270 assertTrue(regionInfoRegionInfoPair.getSecond().getTable().equals(T3)); 271 assertTrue( 272 regionInfoRegionInfoPair.getFirst().getEncodedName().equals(firstRegion.getEncodedName())); 273 assertTrue( 274 regionInfoRegionInfoPair.getSecond().getEncodedName().equals(thirdRegion.getEncodedName())); 275 } 276 277 private void verifyCornerHoles(CatalogJanitor janitor, TableName tableName) throws IOException { 278 RegionStateStore regionStateStore = 279 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStateStore(); 280 RegionInfo firstRegion = getRegionInfo(tableName, "".getBytes()); 281 RegionInfo secondRegion = getRegionInfo(tableName, "bbb".getBytes()); 282 regionStateStore.deleteRegion(firstRegion); 283 LinkedList<Pair<RegionInfo, RegionInfo>> holes = getHoles(janitor, tableName); 284 285 assertEquals(1, holes.size()); 286 Pair<RegionInfo, RegionInfo> regionInfoRegionInfoPair = holes.get(0); 287 assertTrue(regionInfoRegionInfoPair.getFirst().getTable() 288 .equals(RegionInfoBuilder.UNDEFINED.getTable())); 289 assertTrue(regionInfoRegionInfoPair.getSecond().getTable().equals(tableName)); 290 assertTrue( 291 regionInfoRegionInfoPair.getSecond().getEncodedName().equals(secondRegion.getEncodedName())); 292 293 RegionInfo lastRegion = getRegionInfo(tableName, "zzz".getBytes()); 294 RegionInfo secondLastRegion = getRegionInfo(tableName, "yyy".getBytes()); 295 regionStateStore.deleteRegion(lastRegion); 296 holes = getHoles(janitor, tableName); 297 assertEquals(2, holes.size()); 298 regionInfoRegionInfoPair = holes.get(1); 299 assertTrue(regionInfoRegionInfoPair.getFirst().getEncodedName() 300 .equals(secondLastRegion.getEncodedName())); 301 assertTrue(regionInfoRegionInfoPair.getSecond().getTable() 302 .equals(RegionInfoBuilder.UNDEFINED.getTable())); 303 } 304 305 // Get Holes filter by table 306 private LinkedList<Pair<RegionInfo, RegionInfo>> getHoles(CatalogJanitor janitor, 307 TableName tableName) throws IOException { 308 janitor.scan(); 309 CatalogJanitorReport lastReport = janitor.getLastReport(); 310 assertFalse(lastReport.isEmpty()); 311 LinkedList<Pair<RegionInfo, RegionInfo>> holes = new LinkedList<>(); 312 for (Pair<RegionInfo, RegionInfo> hole : lastReport.getHoles()) { 313 if ( 314 hole.getFirst().getTable().equals(tableName) 315 || hole.getSecond().getTable().equals(tableName) 316 ) { 317 holes.add(hole); 318 } 319 } 320 return holes; 321 } 322 323 private RegionInfo getRegionInfo(TableName tableName, byte[] row) throws IOException { 324 RegionInfo regionInfo = 325 TEST_UTIL.getConnection().getRegionLocator(tableName).getRegionLocation(row).getRegion(); 326 assertNotNull(regionInfo); 327 return regionInfo; 328 } 329 330 private void disableTable(TableName tableName) throws IOException, InterruptedException { 331 try { 332 TEST_UTIL.getAdmin().disableTable(tableName); 333 TEST_UTIL.waitTableDisabled(tableName, 30000); 334 } catch (TableNotEnabledException e) { 335 LOG.debug("Table: " + tableName + " already disabled, ignore."); 336 } 337 } 338}