001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static org.junit.jupiter.api.Assertions.assertEquals;
021import static org.junit.jupiter.api.Assertions.assertTrue;
022
023import java.io.IOException;
024import java.util.ArrayList;
025import java.util.Arrays;
026import java.util.List;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.fs.Path;
029import org.apache.hadoop.hbase.Cell;
030import org.apache.hadoop.hbase.CellUtil;
031import org.apache.hadoop.hbase.HBaseTestingUtil;
032import org.apache.hadoop.hbase.TableName;
033import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
034import org.apache.hadoop.hbase.client.Delete;
035import org.apache.hadoop.hbase.client.Durability;
036import org.apache.hadoop.hbase.client.Get;
037import org.apache.hadoop.hbase.client.Put;
038import org.apache.hadoop.hbase.client.RegionInfo;
039import org.apache.hadoop.hbase.client.RegionInfoBuilder;
040import org.apache.hadoop.hbase.client.Scan;
041import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
042import org.apache.hadoop.hbase.io.hfile.BlockCache;
043import org.apache.hadoop.hbase.io.hfile.BlockCacheFactory;
044import org.apache.hadoop.hbase.io.hfile.HFile;
045import org.apache.hadoop.hbase.testclassification.RegionServerTests;
046import org.apache.hadoop.hbase.testclassification.SmallTests;
047import org.apache.hadoop.hbase.util.Bytes;
048import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper;
049import org.junit.jupiter.api.AfterAll;
050import org.junit.jupiter.api.BeforeAll;
051import org.junit.jupiter.api.BeforeEach;
052import org.junit.jupiter.api.Tag;
053import org.junit.jupiter.api.Test;
054import org.junit.jupiter.api.TestInfo;
055import org.slf4j.Logger;
056import org.slf4j.LoggerFactory;
057
058@Tag(RegionServerTests.TAG)
059@Tag(SmallTests.TAG)
060public class TestBlocksRead {
061
062  private static final Logger LOG = LoggerFactory.getLogger(TestBlocksRead.class);
063  private String testName;
064
065  @BeforeEach
066  public void setTestName(TestInfo testInfo) {
067    this.testName = testInfo.getTestMethod().get().getName();
068  }
069
070  static final BloomType[] BLOOM_TYPE =
071    new BloomType[] { BloomType.ROWCOL, BloomType.ROW, BloomType.NONE };
072
073  HRegion region = null;
074  private static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
075  private final String DIR = TEST_UTIL.getDataTestDir("TestBlocksRead").toString();
076  private Configuration conf = TEST_UTIL.getConfiguration();
077
078  @BeforeAll
079  public static void setUp() throws Exception {
080    // disable compactions in this test.
081    TEST_UTIL.getConfiguration().setInt("hbase.hstore.compactionThreshold", 10000);
082  }
083
084  @AfterAll
085  public static void tearDown() throws Exception {
086    EnvironmentEdgeManagerTestHelper.reset();
087  }
088
089  /**
090   * Callers must afterward call {@link HBaseTestingUtil#closeRegionAndWAL(HRegion)}
091   * @return created and initialized region.
092   */
093  private HRegion initHRegion(byte[] tableName, String callingMethod, Configuration conf,
094    String family) throws IOException {
095    return initHRegion(tableName, callingMethod, conf, family, null);
096  }
097
098  /**
099   * Callers must afterward call {@link HBaseTestingUtil#closeRegionAndWAL(HRegion)}
100   */
101  private HRegion initHRegion(byte[] tableName, String callingMethod, Configuration conf,
102    String family, BlockCache blockCache) throws IOException {
103    TableDescriptorBuilder builder =
104      TableDescriptorBuilder.newBuilder(TableName.valueOf(tableName));
105    for (int i = 0; i < BLOOM_TYPE.length; i++) {
106      BloomType bloomType = BLOOM_TYPE[i];
107      builder.setColumnFamily(
108        ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(family + "_" + bloomType))
109          .setBlocksize(1).setBloomFilterType(bloomType).build());
110    }
111    RegionInfo info = RegionInfoBuilder.newBuilder(TableName.valueOf(tableName)).build();
112    Path path = new Path(DIR + callingMethod);
113    if (blockCache != null) {
114      return HBaseTestingUtil.createRegionAndWAL(info, path, conf, builder.build(), blockCache);
115    } else {
116      return HBaseTestingUtil.createRegionAndWAL(info, path, conf, builder.build());
117    }
118  }
119
120  private void putData(String family, String row, String col, long version) throws IOException {
121    for (int i = 0; i < BLOOM_TYPE.length; i++) {
122      putData(Bytes.toBytes(family + "_" + BLOOM_TYPE[i]), row, col, version, version);
123    }
124  }
125
126  // generates a value to put for a row/col/version.
127  private static byte[] genValue(String row, String col, long version) {
128    return Bytes.toBytes("Value:" + row + "#" + col + "#" + version);
129  }
130
131  private void putData(byte[] cf, String row, String col, long versionStart, long versionEnd)
132    throws IOException {
133    byte[] columnBytes = Bytes.toBytes(col);
134    Put put = new Put(Bytes.toBytes(row));
135    put.setDurability(Durability.SKIP_WAL);
136
137    for (long version = versionStart; version <= versionEnd; version++) {
138      put.addColumn(cf, columnBytes, version, genValue(row, col, version));
139    }
140    region.put(put);
141  }
142
143  private Cell[] getData(String family, String row, List<String> columns, int expBlocks)
144    throws IOException {
145    return getData(family, row, columns, expBlocks, expBlocks, expBlocks);
146  }
147
148  private Cell[] getData(String family, String row, List<String> columns, int expBlocksRowCol,
149    int expBlocksRow, int expBlocksNone) throws IOException {
150    int[] expBlocks = new int[] { expBlocksRowCol, expBlocksRow, expBlocksNone };
151    Cell[] kvs = null;
152
153    for (int i = 0; i < BLOOM_TYPE.length; i++) {
154      BloomType bloomType = BLOOM_TYPE[i];
155      byte[] cf = Bytes.toBytes(family + "_" + bloomType);
156      long blocksStart = getBlkAccessCount(cf);
157      Get get = new Get(Bytes.toBytes(row));
158
159      for (String column : columns) {
160        get.addColumn(cf, Bytes.toBytes(column));
161      }
162
163      kvs = region.get(get).rawCells();
164      long blocksEnd = getBlkAccessCount(cf);
165      if (expBlocks[i] != -1) {
166        assertEquals(expBlocks[i], blocksEnd - blocksStart,
167          "Blocks Read Check for Bloom: " + bloomType);
168      }
169      System.out.println("Blocks Read for Bloom: " + bloomType + " = " + (blocksEnd - blocksStart)
170        + "Expected = " + expBlocks[i]);
171    }
172    return kvs;
173  }
174
175  private Cell[] getData(String family, String row, String column, int expBlocks)
176    throws IOException {
177    return getData(family, row, Arrays.asList(column), expBlocks, expBlocks, expBlocks);
178  }
179
180  private Cell[] getData(String family, String row, String column, int expBlocksRowCol,
181    int expBlocksRow, int expBlocksNone) throws IOException {
182    return getData(family, row, Arrays.asList(column), expBlocksRowCol, expBlocksRow,
183      expBlocksNone);
184  }
185
186  private void deleteFamily(String family, String row, long version) throws IOException {
187    Delete del = new Delete(Bytes.toBytes(row));
188    del.addFamily(Bytes.toBytes(family + "_ROWCOL"), version);
189    del.addFamily(Bytes.toBytes(family + "_ROW"), version);
190    del.addFamily(Bytes.toBytes(family + "_NONE"), version);
191    region.delete(del);
192  }
193
194  private static void verifyData(Cell kv, String expectedRow, String expectedCol,
195    long expectedVersion) {
196    assertTrue(CellUtil.matchingRows(kv, Bytes.toBytes(expectedRow)), "RowCheck");
197    assertTrue(CellUtil.matchingQualifier(kv, Bytes.toBytes(expectedCol)), "ColumnCheck");
198    assertEquals(expectedVersion, kv.getTimestamp(), "TSCheck");
199    assertTrue(CellUtil.matchingValue(kv, genValue(expectedRow, expectedCol, expectedVersion)),
200      "ValueCheck");
201  }
202
203  private static long getBlkAccessCount(byte[] cf) {
204    return HFile.DATABLOCK_READ_COUNT.sum();
205  }
206
207  /**
208   * Test # of blocks read for some simple seek cases.
209   */
210  @Test
211  public void testBlocksRead() throws Exception {
212    byte[] TABLE = Bytes.toBytes("testBlocksRead");
213    String FAMILY = "cf1";
214    Cell[] kvs;
215    this.region = initHRegion(TABLE, testName, conf, FAMILY);
216
217    try {
218      putData(FAMILY, "row", "col1", 1);
219      putData(FAMILY, "row", "col2", 2);
220      putData(FAMILY, "row", "col3", 3);
221      putData(FAMILY, "row", "col4", 4);
222      putData(FAMILY, "row", "col5", 5);
223      putData(FAMILY, "row", "col6", 6);
224      putData(FAMILY, "row", "col7", 7);
225      region.flush(true);
226
227      // Expected block reads: 1
228      // The top block has the KV we are
229      // interested. So only 1 seek is needed.
230      kvs = getData(FAMILY, "row", "col1", 1);
231      assertEquals(1, kvs.length);
232      verifyData(kvs[0], "row", "col1", 1);
233
234      // Expected block reads: 2
235      // The top block and next block has the KVs we are
236      // interested. So only 2 seek is needed.
237      kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2"), 2);
238      assertEquals(2, kvs.length);
239      verifyData(kvs[0], "row", "col1", 1);
240      verifyData(kvs[1], "row", "col2", 2);
241
242      // Expected block reads: 3
243      // The first 2 seeks is to find out col2. [HBASE-4443]
244      // One additional seek for col3
245      // So 3 seeks are needed.
246      kvs = getData(FAMILY, "row", Arrays.asList("col2", "col3"), 2);
247      assertEquals(2, kvs.length);
248      verifyData(kvs[0], "row", "col2", 2);
249      verifyData(kvs[1], "row", "col3", 3);
250
251      // Expected block reads: 1. [HBASE-4443]&[HBASE-7845]
252      kvs = getData(FAMILY, "row", Arrays.asList("col5"), 1);
253      assertEquals(1, kvs.length);
254      verifyData(kvs[0], "row", "col5", 5);
255    } finally {
256      HBaseTestingUtil.closeRegionAndWAL(this.region);
257      this.region = null;
258    }
259  }
260
261  /**
262   * Test # of blocks read (targeted at some of the cases Lazy Seek optimizes).
263   */
264  @Test
265  public void testLazySeekBlocksRead() throws Exception {
266    byte[] TABLE = Bytes.toBytes("testLazySeekBlocksRead");
267    String FAMILY = "cf1";
268    Cell[] kvs;
269    this.region = initHRegion(TABLE, testName, conf, FAMILY);
270
271    try {
272      // File 1
273      putData(FAMILY, "row", "col1", 1);
274      putData(FAMILY, "row", "col2", 2);
275      region.flush(true);
276
277      // File 2
278      putData(FAMILY, "row", "col1", 3);
279      putData(FAMILY, "row", "col2", 4);
280      region.flush(true);
281
282      // Expected blocks read: 1.
283      // File 2's top block is also the KV we are
284      // interested. So only 1 seek is needed.
285      kvs = getData(FAMILY, "row", Arrays.asList("col1"), 1);
286      assertEquals(1, kvs.length);
287      verifyData(kvs[0], "row", "col1", 3);
288
289      // Expected blocks read: 2
290      // File 2's top block has the "col1" KV we are
291      // interested. We also need "col2" which is in a block
292      // of its own. So, we need that block as well.
293      kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2"), 2);
294      assertEquals(2, kvs.length);
295      verifyData(kvs[0], "row", "col1", 3);
296      verifyData(kvs[1], "row", "col2", 4);
297
298      // File 3: Add another column
299      putData(FAMILY, "row", "col3", 5);
300      region.flush(true);
301
302      // Expected blocks read: 1
303      // File 3's top block has the "col3" KV we are
304      // interested. So only 1 seek is needed.
305      kvs = getData(FAMILY, "row", "col3", 1);
306      assertEquals(1, kvs.length);
307      verifyData(kvs[0], "row", "col3", 5);
308
309      // Get a column from older file.
310      // For ROWCOL Bloom filter: Expected blocks read: 1.
311      // For ROW Bloom filter: Expected blocks read: 2.
312      // For NONE Bloom filter: Expected blocks read: 2.
313      kvs = getData(FAMILY, "row", Arrays.asList("col1"), 1, 2, 2);
314      assertEquals(1, kvs.length);
315      verifyData(kvs[0], "row", "col1", 3);
316
317      // File 4: Delete the entire row.
318      deleteFamily(FAMILY, "row", 6);
319      region.flush(true);
320
321      // For ROWCOL Bloom filter: Expected blocks read: 2.
322      // For ROW Bloom filter: Expected blocks read: 3.
323      // For NONE Bloom filter: Expected blocks read: 3.
324      kvs = getData(FAMILY, "row", "col1", 2, 3, 3);
325      assertEquals(0, kvs.length);
326      kvs = getData(FAMILY, "row", "col2", 2, 3, 3);
327      assertEquals(0, kvs.length);
328      kvs = getData(FAMILY, "row", "col3", 2);
329      assertEquals(0, kvs.length);
330      kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 4);
331      assertEquals(0, kvs.length);
332
333      // File 5: Delete
334      deleteFamily(FAMILY, "row", 10);
335      region.flush(true);
336
337      // File 6: some more puts, but with timestamps older than the
338      // previous delete.
339      putData(FAMILY, "row", "col1", 7);
340      putData(FAMILY, "row", "col2", 8);
341      putData(FAMILY, "row", "col3", 9);
342      region.flush(true);
343
344      // Baseline expected blocks read: 6. [HBASE-4532]
345      kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 6, 7, 7);
346      assertEquals(0, kvs.length);
347
348      // File 7: Put back new data
349      putData(FAMILY, "row", "col1", 11);
350      putData(FAMILY, "row", "col2", 12);
351      putData(FAMILY, "row", "col3", 13);
352      region.flush(true);
353
354      // Expected blocks read: 8. [HBASE-4585, HBASE-13109]
355      kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 8, 9, 9);
356      assertEquals(3, kvs.length);
357      verifyData(kvs[0], "row", "col1", 11);
358      verifyData(kvs[1], "row", "col2", 12);
359      verifyData(kvs[2], "row", "col3", 13);
360    } finally {
361      HBaseTestingUtil.closeRegionAndWAL(this.region);
362      this.region = null;
363    }
364  }
365
366  /**
367   * Test # of blocks read to ensure disabling cache-fill on Scan works.
368   */
369  @Test
370  public void testBlocksStoredWhenCachingDisabled() throws Exception {
371    byte[] TABLE = Bytes.toBytes("testBlocksReadWhenCachingDisabled");
372    String FAMILY = "cf1";
373
374    BlockCache blockCache = BlockCacheFactory.createBlockCache(conf);
375    this.region = initHRegion(TABLE, testName, conf, FAMILY, blockCache);
376
377    try {
378      putData(FAMILY, "row", "col1", 1);
379      putData(FAMILY, "row", "col2", 2);
380      region.flush(true);
381
382      // Execute a scan with caching turned off
383      // Expected blocks stored: 0
384      long blocksStart = blockCache.getBlockCount();
385      Scan scan = new Scan();
386      scan.setCacheBlocks(false);
387      RegionScanner rs = region.getScanner(scan);
388      List<Cell> result = new ArrayList<>(2);
389      rs.next(result);
390      assertEquals(2 * BLOOM_TYPE.length, result.size());
391      rs.close();
392      long blocksEnd = blockCache.getBlockCount();
393
394      assertEquals(blocksStart, blocksEnd);
395
396      // Execute with caching turned on
397      // Expected blocks stored: 2
398      blocksStart = blocksEnd;
399      scan.setCacheBlocks(true);
400      rs = region.getScanner(scan);
401      result = new ArrayList<>(2);
402      rs.next(result);
403      assertEquals(2 * BLOOM_TYPE.length, result.size());
404      rs.close();
405      blocksEnd = blockCache.getBlockCount();
406
407      assertEquals(2 * BLOOM_TYPE.length, blocksEnd - blocksStart);
408    } finally {
409      HBaseTestingUtil.closeRegionAndWAL(this.region);
410      this.region = null;
411    }
412  }
413
414  @Test
415  public void testLazySeekBlocksReadWithDelete() throws Exception {
416    byte[] TABLE = Bytes.toBytes("testLazySeekBlocksReadWithDelete");
417    String FAMILY = "cf1";
418    Cell[] kvs;
419    this.region = initHRegion(TABLE, testName, conf, FAMILY);
420    try {
421      deleteFamily(FAMILY, "row", 200);
422      for (int i = 0; i < 100; i++) {
423        putData(FAMILY, "row", "col" + i, i);
424      }
425      putData(FAMILY, "row", "col99", 201);
426      region.flush(true);
427
428      kvs = getData(FAMILY, "row", Arrays.asList("col0"), 2);
429      assertEquals(0, kvs.length);
430
431      kvs = getData(FAMILY, "row", Arrays.asList("col99"), 2);
432      assertEquals(1, kvs.length);
433      verifyData(kvs[0], "row", "col99", 201);
434    } finally {
435      HBaseTestingUtil.closeRegionAndWAL(this.region);
436      this.region = null;
437    }
438  }
439
440}