001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.janitor;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertFalse;
022import static org.junit.Assert.assertNotNull;
023import static org.junit.Assert.assertTrue;
024
025import java.io.IOException;
026import java.util.Arrays;
027import java.util.LinkedList;
028import java.util.List;
029import org.apache.hadoop.hbase.CatalogFamilyFormat;
030import org.apache.hadoop.hbase.HBaseClassTestRule;
031import org.apache.hadoop.hbase.HBaseTestingUtil;
032import org.apache.hadoop.hbase.HConstants;
033import org.apache.hadoop.hbase.MetaTableAccessor;
034import org.apache.hadoop.hbase.TableName;
035import org.apache.hadoop.hbase.TableNotEnabledException;
036import org.apache.hadoop.hbase.client.Put;
037import org.apache.hadoop.hbase.client.RegionInfo;
038import org.apache.hadoop.hbase.client.RegionInfoBuilder;
039import org.apache.hadoop.hbase.master.assignment.RegionStateStore;
040import org.apache.hadoop.hbase.testclassification.LargeTests;
041import org.apache.hadoop.hbase.testclassification.MasterTests;
042import org.apache.hadoop.hbase.util.Bytes;
043import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
044import org.apache.hadoop.hbase.util.Pair;
045import org.junit.After;
046import org.junit.Before;
047import org.junit.ClassRule;
048import org.junit.Rule;
049import org.junit.Test;
050import org.junit.experimental.categories.Category;
051import org.junit.rules.TestName;
052import org.slf4j.Logger;
053import org.slf4j.LoggerFactory;
054
055@Category({ MasterTests.class, LargeTests.class })
056public class TestCatalogJanitorCluster {
057  private static final Logger LOG = LoggerFactory.getLogger(TestCatalogJanitorCluster.class);
058
059  @ClassRule
060  public static final HBaseClassTestRule CLASS_RULE =
061    HBaseClassTestRule.forClass(TestCatalogJanitorCluster.class);
062
063  @Rule
064  public final TestName name = new TestName();
065
066  private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
067  private static final TableName T1 = TableName.valueOf("t1");
068  private static final TableName T2 = TableName.valueOf("t2");
069  private static final TableName T3 = TableName.valueOf("t3");
070  private static final TableName T4 = TableName.valueOf("t4");
071  private static final TableName T5 = TableName.valueOf("t5");
072  private static final TableName T6 = TableName.valueOf("t6");
073  private static final TableName T7 = TableName.valueOf("t7");
074
075  @Before
076  public void before() throws Exception {
077    TEST_UTIL.startMiniCluster();
078    TEST_UTIL.createMultiRegionTable(T1, new byte[][] { HConstants.CATALOG_FAMILY });
079    TEST_UTIL.createMultiRegionTable(T2, new byte[][] { HConstants.CATALOG_FAMILY });
080    TEST_UTIL.createMultiRegionTable(T3, new byte[][] { HConstants.CATALOG_FAMILY });
081
082    final byte[][] keysForT4 =
083      { Bytes.toBytes("aa"), Bytes.toBytes("bb"), Bytes.toBytes("cc"), Bytes.toBytes("dd") };
084
085    TEST_UTIL.createTable(T4, HConstants.CATALOG_FAMILY, keysForT4);
086
087    final byte[][] keysForT5 = { Bytes.toBytes("bb"), Bytes.toBytes("cc"), Bytes.toBytes("dd") };
088
089    TEST_UTIL.createTable(T5, HConstants.CATALOG_FAMILY, keysForT5);
090
091    TEST_UTIL.createMultiRegionTable(T6, new byte[][] { HConstants.CATALOG_FAMILY });
092    TEST_UTIL.createMultiRegionTable(T7, new byte[][] { HConstants.CATALOG_FAMILY });
093  }
094
095  @After
096  public void after() throws Exception {
097    TEST_UTIL.shutdownMiniCluster();
098  }
099
100  /**
101   * Fat method where we start with a fat hbase:meta and then gradually intro problems running
102   * catalogjanitor for each to ensure it triggers complaint. Do one big method because takes a
103   * while to build up the context we need. We create three tables and then make holes, overlaps,
104   * add unknown servers and empty out regioninfo columns. Each should up counts in the
105   * CatalogJanitor.Report produced.
106   */
107  @Test
108  public void testConsistency() throws IOException {
109    CatalogJanitor janitor = TEST_UTIL.getHBaseCluster().getMaster().getCatalogJanitor();
110    RegionStateStore regionStateStore =
111      TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStateStore();
112    janitor.scan();
113    CatalogJanitorReport report = janitor.getLastReport();
114    // Assert no problems.
115    assertTrue(report.isEmpty());
116    // Now remove first region in table t2 to see if catalogjanitor scan notices.
117    List<RegionInfo> t2Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T2);
118    regionStateStore.deleteRegion(t2Ris.get(0));
119    janitor.scan();
120    report = janitor.getLastReport();
121    assertFalse(report.isEmpty());
122    assertEquals(1, report.getHoles().size());
123    assertTrue(report.getHoles().get(0).getFirst().getTable()
124      .equals(RegionInfoBuilder.UNDEFINED.getTable()));
125    assertTrue(report.getHoles().get(0).getSecond().getTable().equals(T2));
126    assertEquals(0, report.getOverlaps().size());
127    // Next, add overlaps to first row in t3
128    List<RegionInfo> t3Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T3);
129    RegionInfo ri = t3Ris.get(0);
130    RegionInfo newRi1 = RegionInfoBuilder.newBuilder(ri.getTable())
131      .setStartKey(incrementRow(ri.getStartKey())).setEndKey(incrementRow(ri.getEndKey())).build();
132    Put p1 = MetaTableAccessor.makePutFromRegionInfo(newRi1, EnvironmentEdgeManager.currentTime());
133    RegionInfo newRi2 = RegionInfoBuilder.newBuilder(newRi1.getTable())
134      .setStartKey(incrementRow(newRi1.getStartKey())).setEndKey(incrementRow(newRi1.getEndKey()))
135      .build();
136    Put p2 = MetaTableAccessor.makePutFromRegionInfo(newRi2, EnvironmentEdgeManager.currentTime());
137    MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(p1, p2));
138    janitor.scan();
139    report = janitor.getLastReport();
140    assertFalse(report.isEmpty());
141    // We added two overlaps so total three.
142    assertEquals(3, report.getOverlaps().size());
143    // Assert hole is still there.
144    assertEquals(1, report.getHoles().size());
145    // Assert other attributes are empty still.
146    assertTrue(report.getEmptyRegionInfo().isEmpty());
147    assertTrue(report.getUnknownServers().isEmpty());
148    // Now make bad server in t1.
149    List<RegionInfo> t1Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T1);
150    RegionInfo t1Ri1 = t1Ris.get(1);
151    Put pServer = new Put(t1Ri1.getRegionName());
152    pServer.addColumn(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getServerColumn(0),
153      Bytes.toBytes("bad.server.example.org:1234"));
154    MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(pServer));
155    janitor.scan();
156    report = janitor.getLastReport();
157    assertFalse(report.isEmpty());
158    assertEquals(1, report.getUnknownServers().size());
159    // Test what happens if we blow away an info:server row, if it is null. Should not kill CJ
160    // and we should log the row that had the problem. HBASE-23192. Just make sure we don't
161    // break if this happens.
162    LOG.info("Make null info:server");
163    Put emptyInfoServerPut = new Put(t1Ri1.getRegionName());
164    emptyInfoServerPut.addColumn(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getServerColumn(0),
165      Bytes.toBytes(""));
166    MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(emptyInfoServerPut));
167    janitor.scan();
168    report = janitor.getLastReport();
169    assertEquals(0, report.getUnknownServers().size());
170    // Mke an empty regioninfo in t1.
171    RegionInfo t1Ri2 = t1Ris.get(2);
172    Put pEmptyRI = new Put(t1Ri2.getRegionName());
173    pEmptyRI.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
174      HConstants.EMPTY_BYTE_ARRAY);
175    MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(pEmptyRI));
176    janitor.scan();
177    report = janitor.getLastReport();
178    assertEquals(1, report.getEmptyRegionInfo().size());
179
180    int holesReported = report.getHoles().size();
181    int overlapsReported = report.getOverlaps().size();
182
183    // Test the case for T4
184    // r1: [aa, bb), r2: [cc, dd), r3: [a, cc)
185    // Make sure only overlaps and no holes are reported.
186    List<RegionInfo> t4Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T4);
187    // delete the region [bb, cc)
188    regionStateStore.deleteRegion(t4Ris.get(2));
189
190    // add a new region [a, cc)
191    RegionInfo newRiT4 = RegionInfoBuilder.newBuilder(T4).setStartKey("a".getBytes())
192      .setEndKey("cc".getBytes()).build();
193    Put putForT4 =
194      MetaTableAccessor.makePutFromRegionInfo(newRiT4, EnvironmentEdgeManager.currentTime());
195    MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(putForT4));
196
197    janitor.scan();
198    report = janitor.getLastReport();
199    // there is no new hole reported, 2 more overLaps added.
200    assertEquals(holesReported, report.getHoles().size());
201    assertEquals(overlapsReported + 2, report.getOverlaps().size());
202
203    holesReported = report.getHoles().size();
204    overlapsReported = report.getOverlaps().size();
205
206    // Test the case for T5
207    // r0: [, bb), r1: [a, g), r2: [bb, cc), r3: [dd, )
208    // Make sure only overlaps and no holes are reported.
209    List<RegionInfo> t5Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T5);
210    // delete the region [cc, dd)
211    regionStateStore.deleteRegion(t5Ris.get(2));
212
213    // add a new region [a, g)
214    RegionInfo newRiT5 = RegionInfoBuilder.newBuilder(T5).setStartKey("a".getBytes())
215      .setEndKey("g".getBytes()).build();
216    Put putForT5 =
217      MetaTableAccessor.makePutFromRegionInfo(newRiT5, EnvironmentEdgeManager.currentTime());
218    MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(putForT5));
219
220    janitor.scan();
221    report = janitor.getLastReport();
222    // there is no new hole reported, 3 more overLaps added.
223    // ([a, g), [, bb)), ([a, g), [bb, cc)), ([a, g), [dd, ))
224    assertEquals(holesReported, report.getHoles().size());
225    assertEquals(overlapsReported + 3, report.getOverlaps().size());
226  }
227
228  /**
229   * Take last byte and add one to it.
230   */
231  private static byte[] incrementRow(byte[] row) {
232    if (row.length == 0) {
233      return new byte[] { '0' };
234    }
235    row[row.length - 1] = (byte) (((int) row[row.length - 1]) + 1);
236    return row;
237  }
238
239  @Test
240  public void testHoles() throws IOException, InterruptedException {
241    CatalogJanitor janitor = TEST_UTIL.getHBaseCluster().getMaster().getCatalogJanitor();
242
243    CatalogJanitorReport report = janitor.getLastReport();
244    // Assert no problems.
245    assertTrue(report.isEmpty());
246    // Verify start and end region holes
247    verifyCornerHoles(janitor, T1);
248    // Verify start and end region holes
249    verifyCornerHoles(janitor, T2);
250    // Verify start and end region holes when next table is disable see: HBASE-27560
251    disableTable(T7);
252    verifyCornerHoles(janitor, T6);
253    verifyMiddleHole(janitor);
254    // Verify that MetaFixer is able to fix these holes
255    fixHoles(janitor);
256  }
257
258  private void fixHoles(CatalogJanitor janitor) throws IOException {
259    MetaFixer metaFixer = new MetaFixer(TEST_UTIL.getHBaseCluster().getMaster());
260    janitor.scan();
261    CatalogJanitorReport report = janitor.getLastReport();
262    // Verify total number of holes, 2 in t1, t2, t6 each and one in t3
263    assertEquals("Number of holes are not matching", 7, report.getHoles().size());
264    metaFixer.fix();
265    janitor.scan();
266    report = janitor.getLastReport();
267    assertEquals("Holes are not fixed", 0, report.getHoles().size());
268  }
269
270  private void verifyMiddleHole(CatalogJanitor janitor) throws IOException {
271    // Verify middle holes
272    RegionInfo firstRegion = getRegionInfo(T3, "".getBytes());
273    RegionInfo secondRegion = getRegionInfo(T3, "bbb".getBytes());
274    RegionInfo thirdRegion = getRegionInfo(T3, "ccc".getBytes());
275    TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStateStore()
276      .deleteRegion(secondRegion);
277    LinkedList<Pair<RegionInfo, RegionInfo>> holes = getHoles(janitor, T3);
278    Pair<RegionInfo, RegionInfo> regionInfoRegionInfoPair = holes.getFirst();
279    assertTrue(regionInfoRegionInfoPair.getFirst().getTable().equals(T3));
280    assertTrue(regionInfoRegionInfoPair.getSecond().getTable().equals(T3));
281    assertTrue(
282      regionInfoRegionInfoPair.getFirst().getEncodedName().equals(firstRegion.getEncodedName()));
283    assertTrue(
284      regionInfoRegionInfoPair.getSecond().getEncodedName().equals(thirdRegion.getEncodedName()));
285  }
286
287  private void verifyCornerHoles(CatalogJanitor janitor, TableName tableName) throws IOException {
288    RegionStateStore regionStateStore =
289      TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStateStore();
290    RegionInfo firstRegion = getRegionInfo(tableName, "".getBytes());
291    RegionInfo secondRegion = getRegionInfo(tableName, "bbb".getBytes());
292    regionStateStore.deleteRegion(firstRegion);
293    LinkedList<Pair<RegionInfo, RegionInfo>> holes = getHoles(janitor, tableName);
294
295    assertEquals(1, holes.size());
296    Pair<RegionInfo, RegionInfo> regionInfoRegionInfoPair = holes.get(0);
297    assertTrue(regionInfoRegionInfoPair.getFirst().getTable()
298      .equals(RegionInfoBuilder.UNDEFINED.getTable()));
299    assertTrue(regionInfoRegionInfoPair.getSecond().getTable().equals(tableName));
300    assertTrue(
301      regionInfoRegionInfoPair.getSecond().getEncodedName().equals(secondRegion.getEncodedName()));
302
303    RegionInfo lastRegion = getRegionInfo(tableName, "zzz".getBytes());
304    RegionInfo secondLastRegion = getRegionInfo(tableName, "yyy".getBytes());
305    regionStateStore.deleteRegion(lastRegion);
306    holes = getHoles(janitor, tableName);
307    assertEquals(2, holes.size());
308    regionInfoRegionInfoPair = holes.get(1);
309    assertTrue(regionInfoRegionInfoPair.getFirst().getEncodedName()
310      .equals(secondLastRegion.getEncodedName()));
311    assertTrue(regionInfoRegionInfoPair.getSecond().getTable()
312      .equals(RegionInfoBuilder.UNDEFINED.getTable()));
313  }
314
315  // Get Holes filter by table
316  private LinkedList<Pair<RegionInfo, RegionInfo>> getHoles(CatalogJanitor janitor,
317    TableName tableName) throws IOException {
318    janitor.scan();
319    CatalogJanitorReport lastReport = janitor.getLastReport();
320    assertFalse(lastReport.isEmpty());
321    LinkedList<Pair<RegionInfo, RegionInfo>> holes = new LinkedList<>();
322    for (Pair<RegionInfo, RegionInfo> hole : lastReport.getHoles()) {
323      if (
324        hole.getFirst().getTable().equals(tableName)
325          || hole.getSecond().getTable().equals(tableName)
326      ) {
327        holes.add(hole);
328      }
329    }
330    return holes;
331  }
332
333  private RegionInfo getRegionInfo(TableName tableName, byte[] row) throws IOException {
334    RegionInfo regionInfo =
335      TEST_UTIL.getConnection().getRegionLocator(tableName).getRegionLocation(row).getRegion();
336    assertNotNull(regionInfo);
337    return regionInfo;
338  }
339
340  private void disableTable(TableName tableName) throws IOException, InterruptedException {
341    try {
342      TEST_UTIL.getAdmin().disableTable(tableName);
343      TEST_UTIL.waitTableDisabled(tableName, 30000);
344    } catch (TableNotEnabledException e) {
345      LOG.debug("Table: " + tableName + " already disabled, ignore.");
346    }
347  }
348}