001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.janitor;
019
020import static org.junit.jupiter.api.Assertions.assertEquals;
021import static org.junit.jupiter.api.Assertions.assertFalse;
022import static org.junit.jupiter.api.Assertions.assertNotNull;
023import static org.junit.jupiter.api.Assertions.assertTrue;
024
025import java.io.IOException;
026import java.util.Arrays;
027import java.util.LinkedList;
028import java.util.List;
029import org.apache.hadoop.hbase.CatalogFamilyFormat;
030import org.apache.hadoop.hbase.HBaseTestingUtil;
031import org.apache.hadoop.hbase.HConstants;
032import org.apache.hadoop.hbase.MetaTableAccessor;
033import org.apache.hadoop.hbase.TableName;
034import org.apache.hadoop.hbase.TableNotEnabledException;
035import org.apache.hadoop.hbase.client.Put;
036import org.apache.hadoop.hbase.client.RegionInfo;
037import org.apache.hadoop.hbase.client.RegionInfoBuilder;
038import org.apache.hadoop.hbase.master.assignment.RegionStateStore;
039import org.apache.hadoop.hbase.testclassification.LargeTests;
040import org.apache.hadoop.hbase.testclassification.MasterTests;
041import org.apache.hadoop.hbase.util.Bytes;
042import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
043import org.apache.hadoop.hbase.util.Pair;
044import org.junit.jupiter.api.AfterEach;
045import org.junit.jupiter.api.BeforeEach;
046import org.junit.jupiter.api.Tag;
047import org.junit.jupiter.api.Test;
048import org.slf4j.Logger;
049import org.slf4j.LoggerFactory;
050
051@Tag(MasterTests.TAG)
052@Tag(LargeTests.TAG)
053public class TestCatalogJanitorCluster {
054  private static final Logger LOG = LoggerFactory.getLogger(TestCatalogJanitorCluster.class);
055
056  private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
057  private static final TableName T1 = TableName.valueOf("t1");
058  private static final TableName T2 = TableName.valueOf("t2");
059  private static final TableName T3 = TableName.valueOf("t3");
060  private static final TableName T4 = TableName.valueOf("t4");
061  private static final TableName T5 = TableName.valueOf("t5");
062  private static final TableName T6 = TableName.valueOf("t6");
063  private static final TableName T7 = TableName.valueOf("t7");
064
065  @BeforeEach
066  public void before() throws Exception {
067    TEST_UTIL.startMiniCluster();
068    TEST_UTIL.createMultiRegionTable(T1, new byte[][] { HConstants.CATALOG_FAMILY });
069    TEST_UTIL.createMultiRegionTable(T2, new byte[][] { HConstants.CATALOG_FAMILY });
070    TEST_UTIL.createMultiRegionTable(T3, new byte[][] { HConstants.CATALOG_FAMILY });
071
072    final byte[][] keysForT4 =
073      { Bytes.toBytes("aa"), Bytes.toBytes("bb"), Bytes.toBytes("cc"), Bytes.toBytes("dd") };
074
075    TEST_UTIL.createTable(T4, HConstants.CATALOG_FAMILY, keysForT4);
076
077    final byte[][] keysForT5 = { Bytes.toBytes("bb"), Bytes.toBytes("cc"), Bytes.toBytes("dd") };
078
079    TEST_UTIL.createTable(T5, HConstants.CATALOG_FAMILY, keysForT5);
080
081    TEST_UTIL.createMultiRegionTable(T6, new byte[][] { HConstants.CATALOG_FAMILY });
082    TEST_UTIL.createMultiRegionTable(T7, new byte[][] { HConstants.CATALOG_FAMILY });
083  }
084
085  @AfterEach
086  public void after() throws Exception {
087    TEST_UTIL.shutdownMiniCluster();
088  }
089
090  /**
091   * Fat method where we start with a fat hbase:meta and then gradually intro problems running
092   * catalogjanitor for each to ensure it triggers complaint. Do one big method because takes a
093   * while to build up the context we need. We create three tables and then make holes, overlaps,
094   * add unknown servers and empty out regioninfo columns. Each should up counts in the
095   * CatalogJanitor.Report produced.
096   */
097  @Test
098  public void testConsistency() throws IOException {
099    CatalogJanitor janitor = TEST_UTIL.getHBaseCluster().getMaster().getCatalogJanitor();
100    RegionStateStore regionStateStore =
101      TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStateStore();
102    janitor.scan();
103    CatalogJanitorReport report = janitor.getLastReport();
104    // Assert no problems.
105    assertTrue(report.isEmpty());
106    // Now remove first region in table t2 to see if catalogjanitor scan notices.
107    List<RegionInfo> t2Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T2);
108    regionStateStore.deleteRegion(t2Ris.get(0));
109    janitor.scan();
110    report = janitor.getLastReport();
111    assertFalse(report.isEmpty());
112    assertEquals(1, report.getHoles().size());
113    assertTrue(report.getHoles().get(0).getFirst().getTable()
114      .equals(RegionInfoBuilder.UNDEFINED.getTable()));
115    assertTrue(report.getHoles().get(0).getSecond().getTable().equals(T2));
116    assertEquals(0, report.getOverlaps().size());
117    // Next, add overlaps to first row in t3
118    List<RegionInfo> t3Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T3);
119    RegionInfo ri = t3Ris.get(0);
120    RegionInfo newRi1 = RegionInfoBuilder.newBuilder(ri.getTable())
121      .setStartKey(incrementRow(ri.getStartKey())).setEndKey(incrementRow(ri.getEndKey())).build();
122    Put p1 = MetaTableAccessor.makePutFromRegionInfo(newRi1, EnvironmentEdgeManager.currentTime());
123    RegionInfo newRi2 = RegionInfoBuilder.newBuilder(newRi1.getTable())
124      .setStartKey(incrementRow(newRi1.getStartKey())).setEndKey(incrementRow(newRi1.getEndKey()))
125      .build();
126    Put p2 = MetaTableAccessor.makePutFromRegionInfo(newRi2, EnvironmentEdgeManager.currentTime());
127    MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(p1, p2));
128    janitor.scan();
129    report = janitor.getLastReport();
130    assertFalse(report.isEmpty());
131    // We added two overlaps so total three.
132    assertEquals(3, report.getOverlaps().size());
133    // Assert hole is still there.
134    assertEquals(1, report.getHoles().size());
135    // Assert other attributes are empty still.
136    assertTrue(report.getEmptyRegionInfo().isEmpty());
137    assertTrue(report.getUnknownServers().isEmpty());
138    // Now make bad server in t1.
139    List<RegionInfo> t1Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T1);
140    RegionInfo t1Ri1 = t1Ris.get(1);
141    Put pServer = new Put(t1Ri1.getRegionName());
142    pServer.addColumn(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getServerColumn(0),
143      Bytes.toBytes("bad.server.example.org:1234"));
144    MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(pServer));
145    janitor.scan();
146    report = janitor.getLastReport();
147    assertFalse(report.isEmpty());
148    assertEquals(1, report.getUnknownServers().size());
149    // Test what happens if we blow away an info:server row, if it is null. Should not kill CJ
150    // and we should log the row that had the problem. HBASE-23192. Just make sure we don't
151    // break if this happens.
152    LOG.info("Make null info:server");
153    Put emptyInfoServerPut = new Put(t1Ri1.getRegionName());
154    emptyInfoServerPut.addColumn(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getServerColumn(0),
155      Bytes.toBytes(""));
156    MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(emptyInfoServerPut));
157    janitor.scan();
158    report = janitor.getLastReport();
159    assertEquals(0, report.getUnknownServers().size());
160    // Mke an empty regioninfo in t1.
161    RegionInfo t1Ri2 = t1Ris.get(2);
162    Put pEmptyRI = new Put(t1Ri2.getRegionName());
163    pEmptyRI.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
164      HConstants.EMPTY_BYTE_ARRAY);
165    MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(pEmptyRI));
166    janitor.scan();
167    report = janitor.getLastReport();
168    assertEquals(1, report.getEmptyRegionInfo().size());
169
170    int holesReported = report.getHoles().size();
171    int overlapsReported = report.getOverlaps().size();
172
173    // Test the case for T4
174    // r1: [aa, bb), r2: [cc, dd), r3: [a, cc)
175    // Make sure only overlaps and no holes are reported.
176    List<RegionInfo> t4Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T4);
177    // delete the region [bb, cc)
178    regionStateStore.deleteRegion(t4Ris.get(2));
179
180    // add a new region [a, cc)
181    RegionInfo newRiT4 = RegionInfoBuilder.newBuilder(T4).setStartKey("a".getBytes())
182      .setEndKey("cc".getBytes()).build();
183    Put putForT4 =
184      MetaTableAccessor.makePutFromRegionInfo(newRiT4, EnvironmentEdgeManager.currentTime());
185    MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(putForT4));
186
187    janitor.scan();
188    report = janitor.getLastReport();
189    // there is no new hole reported, 2 more overLaps added.
190    assertEquals(holesReported, report.getHoles().size());
191    assertEquals(overlapsReported + 2, report.getOverlaps().size());
192
193    holesReported = report.getHoles().size();
194    overlapsReported = report.getOverlaps().size();
195
196    // Test the case for T5
197    // r0: [, bb), r1: [a, g), r2: [bb, cc), r3: [dd, )
198    // Make sure only overlaps and no holes are reported.
199    List<RegionInfo> t5Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T5);
200    // delete the region [cc, dd)
201    regionStateStore.deleteRegion(t5Ris.get(2));
202
203    // add a new region [a, g)
204    RegionInfo newRiT5 = RegionInfoBuilder.newBuilder(T5).setStartKey("a".getBytes())
205      .setEndKey("g".getBytes()).build();
206    Put putForT5 =
207      MetaTableAccessor.makePutFromRegionInfo(newRiT5, EnvironmentEdgeManager.currentTime());
208    MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(putForT5));
209
210    janitor.scan();
211    report = janitor.getLastReport();
212    // there is no new hole reported, 3 more overLaps added.
213    // ([a, g), [, bb)), ([a, g), [bb, cc)), ([a, g), [dd, ))
214    assertEquals(holesReported, report.getHoles().size());
215    assertEquals(overlapsReported + 3, report.getOverlaps().size());
216  }
217
218  /**
219   * Take last byte and add one to it.
220   */
221  private static byte[] incrementRow(byte[] row) {
222    if (row.length == 0) {
223      return new byte[] { '0' };
224    }
225    row[row.length - 1] = (byte) (((int) row[row.length - 1]) + 1);
226    return row;
227  }
228
229  @Test
230  public void testHoles() throws IOException, InterruptedException {
231    CatalogJanitor janitor = TEST_UTIL.getHBaseCluster().getMaster().getCatalogJanitor();
232
233    CatalogJanitorReport report = janitor.getLastReport();
234    // Assert no problems.
235    assertTrue(report.isEmpty());
236    // Verify start and end region holes
237    verifyCornerHoles(janitor, T1);
238    // Verify start and end region holes
239    verifyCornerHoles(janitor, T2);
240    // Verify start and end region holes when next table is disable see: HBASE-27560
241    disableTable(T7);
242    verifyCornerHoles(janitor, T6);
243    verifyMiddleHole(janitor);
244    // Verify that MetaFixer is able to fix these holes
245    fixHoles(janitor);
246  }
247
248  private void fixHoles(CatalogJanitor janitor) throws IOException {
249    MetaFixer metaFixer = new MetaFixer(TEST_UTIL.getHBaseCluster().getMaster());
250    janitor.scan();
251    CatalogJanitorReport report = janitor.getLastReport();
252    // Verify total number of holes, 2 in t1, t2, t6 each and one in t3
253    assertEquals(7, report.getHoles().size(), "Number of holes are not matching");
254    metaFixer.fix();
255    janitor.scan();
256    report = janitor.getLastReport();
257    assertEquals(0, report.getHoles().size(), "Holes are not fixed");
258  }
259
260  private void verifyMiddleHole(CatalogJanitor janitor) throws IOException {
261    // Verify middle holes
262    RegionInfo firstRegion = getRegionInfo(T3, "".getBytes());
263    RegionInfo secondRegion = getRegionInfo(T3, "bbb".getBytes());
264    RegionInfo thirdRegion = getRegionInfo(T3, "ccc".getBytes());
265    TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStateStore()
266      .deleteRegion(secondRegion);
267    LinkedList<Pair<RegionInfo, RegionInfo>> holes = getHoles(janitor, T3);
268    Pair<RegionInfo, RegionInfo> regionInfoRegionInfoPair = holes.getFirst();
269    assertTrue(regionInfoRegionInfoPair.getFirst().getTable().equals(T3));
270    assertTrue(regionInfoRegionInfoPair.getSecond().getTable().equals(T3));
271    assertTrue(
272      regionInfoRegionInfoPair.getFirst().getEncodedName().equals(firstRegion.getEncodedName()));
273    assertTrue(
274      regionInfoRegionInfoPair.getSecond().getEncodedName().equals(thirdRegion.getEncodedName()));
275  }
276
277  private void verifyCornerHoles(CatalogJanitor janitor, TableName tableName) throws IOException {
278    RegionStateStore regionStateStore =
279      TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStateStore();
280    RegionInfo firstRegion = getRegionInfo(tableName, "".getBytes());
281    RegionInfo secondRegion = getRegionInfo(tableName, "bbb".getBytes());
282    regionStateStore.deleteRegion(firstRegion);
283    LinkedList<Pair<RegionInfo, RegionInfo>> holes = getHoles(janitor, tableName);
284
285    assertEquals(1, holes.size());
286    Pair<RegionInfo, RegionInfo> regionInfoRegionInfoPair = holes.get(0);
287    assertTrue(regionInfoRegionInfoPair.getFirst().getTable()
288      .equals(RegionInfoBuilder.UNDEFINED.getTable()));
289    assertTrue(regionInfoRegionInfoPair.getSecond().getTable().equals(tableName));
290    assertTrue(
291      regionInfoRegionInfoPair.getSecond().getEncodedName().equals(secondRegion.getEncodedName()));
292
293    RegionInfo lastRegion = getRegionInfo(tableName, "zzz".getBytes());
294    RegionInfo secondLastRegion = getRegionInfo(tableName, "yyy".getBytes());
295    regionStateStore.deleteRegion(lastRegion);
296    holes = getHoles(janitor, tableName);
297    assertEquals(2, holes.size());
298    regionInfoRegionInfoPair = holes.get(1);
299    assertTrue(regionInfoRegionInfoPair.getFirst().getEncodedName()
300      .equals(secondLastRegion.getEncodedName()));
301    assertTrue(regionInfoRegionInfoPair.getSecond().getTable()
302      .equals(RegionInfoBuilder.UNDEFINED.getTable()));
303  }
304
305  // Get Holes filter by table
306  private LinkedList<Pair<RegionInfo, RegionInfo>> getHoles(CatalogJanitor janitor,
307    TableName tableName) throws IOException {
308    janitor.scan();
309    CatalogJanitorReport lastReport = janitor.getLastReport();
310    assertFalse(lastReport.isEmpty());
311    LinkedList<Pair<RegionInfo, RegionInfo>> holes = new LinkedList<>();
312    for (Pair<RegionInfo, RegionInfo> hole : lastReport.getHoles()) {
313      if (
314        hole.getFirst().getTable().equals(tableName)
315          || hole.getSecond().getTable().equals(tableName)
316      ) {
317        holes.add(hole);
318      }
319    }
320    return holes;
321  }
322
323  private RegionInfo getRegionInfo(TableName tableName, byte[] row) throws IOException {
324    RegionInfo regionInfo =
325      TEST_UTIL.getConnection().getRegionLocator(tableName).getRegionLocation(row).getRegion();
326    assertNotNull(regionInfo);
327    return regionInfo;
328  }
329
330  private void disableTable(TableName tableName) throws IOException, InterruptedException {
331    try {
332      TEST_UTIL.getAdmin().disableTable(tableName);
333      TEST_UTIL.waitTableDisabled(tableName, 30000);
334    } catch (TableNotEnabledException e) {
335      LOG.debug("Table: " + tableName + " already disabled, ignore.");
336    }
337  }
338}