001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static junit.framework.TestCase.assertTrue;
021import static org.junit.Assert.assertEquals;
022
023import java.io.IOException;
024import java.util.ArrayList;
025import java.util.Arrays;
026import java.util.List;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.fs.Path;
029import org.apache.hadoop.hbase.Cell;
030import org.apache.hadoop.hbase.CellUtil;
031import org.apache.hadoop.hbase.HBaseClassTestRule;
032import org.apache.hadoop.hbase.HBaseTestingUtility;
033import org.apache.hadoop.hbase.TableName;
034import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
035import org.apache.hadoop.hbase.client.Delete;
036import org.apache.hadoop.hbase.client.Durability;
037import org.apache.hadoop.hbase.client.Get;
038import org.apache.hadoop.hbase.client.Put;
039import org.apache.hadoop.hbase.client.RegionInfo;
040import org.apache.hadoop.hbase.client.RegionInfoBuilder;
041import org.apache.hadoop.hbase.client.Scan;
042import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
043import org.apache.hadoop.hbase.io.hfile.BlockCache;
044import org.apache.hadoop.hbase.io.hfile.BlockCacheFactory;
045import org.apache.hadoop.hbase.io.hfile.HFile;
046import org.apache.hadoop.hbase.testclassification.RegionServerTests;
047import org.apache.hadoop.hbase.testclassification.SmallTests;
048import org.apache.hadoop.hbase.util.Bytes;
049import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper;
050import org.junit.AfterClass;
051import org.junit.BeforeClass;
052import org.junit.ClassRule;
053import org.junit.Rule;
054import org.junit.Test;
055import org.junit.experimental.categories.Category;
056import org.junit.rules.TestName;
057import org.slf4j.Logger;
058import org.slf4j.LoggerFactory;
059
060@Category({ RegionServerTests.class, SmallTests.class })
061public class TestBlocksRead {
062
063  @ClassRule
064  public static final HBaseClassTestRule CLASS_RULE =
065    HBaseClassTestRule.forClass(TestBlocksRead.class);
066
067  private static final Logger LOG = LoggerFactory.getLogger(TestBlocksRead.class);
068  @Rule
069  public TestName testName = new TestName();
070
071  static final BloomType[] BLOOM_TYPE =
072    new BloomType[] { BloomType.ROWCOL, BloomType.ROW, BloomType.NONE };
073
074  HRegion region = null;
075  private static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
076  private final String DIR = TEST_UTIL.getDataTestDir("TestBlocksRead").toString();
077  private Configuration conf = TEST_UTIL.getConfiguration();
078
079  @BeforeClass
080  public static void setUp() throws Exception {
081    // disable compactions in this test.
082    TEST_UTIL.getConfiguration().setInt("hbase.hstore.compactionThreshold", 10000);
083  }
084
085  @AfterClass
086  public static void tearDown() throws Exception {
087    EnvironmentEdgeManagerTestHelper.reset();
088  }
089
090  /**
091   * Callers must afterward call {@link HBaseTestingUtility#closeRegionAndWAL(HRegion)}
092   * @return created and initialized region.
093   */
094  private HRegion initHRegion(byte[] tableName, String callingMethod, Configuration conf,
095    String family) throws IOException {
096    return initHRegion(tableName, callingMethod, conf, family, null);
097  }
098
099  /**
100   * Callers must afterward call {@link HBaseTestingUtility#closeRegionAndWAL(HRegion)}
101   */
102  private HRegion initHRegion(byte[] tableName, String callingMethod, Configuration conf,
103    String family, BlockCache blockCache) throws IOException {
104    TableDescriptorBuilder builder =
105      TableDescriptorBuilder.newBuilder(TableName.valueOf(tableName));
106    for (int i = 0; i < BLOOM_TYPE.length; i++) {
107      BloomType bloomType = BLOOM_TYPE[i];
108      builder.setColumnFamily(
109        ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(family + "_" + bloomType))
110          .setBlocksize(1).setBloomFilterType(bloomType).build());
111    }
112    RegionInfo info = RegionInfoBuilder.newBuilder(TableName.valueOf(tableName)).build();
113    Path path = new Path(DIR + callingMethod);
114    if (blockCache != null) {
115      return HBaseTestingUtility.createRegionAndWAL(info, path, conf, builder.build(), blockCache);
116    } else {
117      return HBaseTestingUtility.createRegionAndWAL(info, path, conf, builder.build());
118    }
119  }
120
121  private void putData(String family, String row, String col, long version) throws IOException {
122    for (int i = 0; i < BLOOM_TYPE.length; i++) {
123      putData(Bytes.toBytes(family + "_" + BLOOM_TYPE[i]), row, col, version, version);
124    }
125  }
126
127  // generates a value to put for a row/col/version.
128  private static byte[] genValue(String row, String col, long version) {
129    return Bytes.toBytes("Value:" + row + "#" + col + "#" + version);
130  }
131
132  private void putData(byte[] cf, String row, String col, long versionStart, long versionEnd)
133    throws IOException {
134    byte[] columnBytes = Bytes.toBytes(col);
135    Put put = new Put(Bytes.toBytes(row));
136    put.setDurability(Durability.SKIP_WAL);
137
138    for (long version = versionStart; version <= versionEnd; version++) {
139      put.addColumn(cf, columnBytes, version, genValue(row, col, version));
140    }
141    region.put(put);
142  }
143
144  private Cell[] getData(String family, String row, List<String> columns, int expBlocks)
145    throws IOException {
146    return getData(family, row, columns, expBlocks, expBlocks, expBlocks);
147  }
148
149  private Cell[] getData(String family, String row, List<String> columns, int expBlocksRowCol,
150    int expBlocksRow, int expBlocksNone) throws IOException {
151    int[] expBlocks = new int[] { expBlocksRowCol, expBlocksRow, expBlocksNone };
152    Cell[] kvs = null;
153
154    for (int i = 0; i < BLOOM_TYPE.length; i++) {
155      BloomType bloomType = BLOOM_TYPE[i];
156      byte[] cf = Bytes.toBytes(family + "_" + bloomType);
157      long blocksStart = getBlkAccessCount(cf);
158      Get get = new Get(Bytes.toBytes(row));
159
160      for (String column : columns) {
161        get.addColumn(cf, Bytes.toBytes(column));
162      }
163
164      kvs = region.get(get).rawCells();
165      long blocksEnd = getBlkAccessCount(cf);
166      if (expBlocks[i] != -1) {
167        assertEquals("Blocks Read Check for Bloom: " + bloomType, expBlocks[i],
168          blocksEnd - blocksStart);
169      }
170      System.out.println("Blocks Read for Bloom: " + bloomType + " = " + (blocksEnd - blocksStart)
171        + "Expected = " + expBlocks[i]);
172    }
173    return kvs;
174  }
175
176  private Cell[] getData(String family, String row, String column, int expBlocks)
177    throws IOException {
178    return getData(family, row, Arrays.asList(column), expBlocks, expBlocks, expBlocks);
179  }
180
181  private Cell[] getData(String family, String row, String column, int expBlocksRowCol,
182    int expBlocksRow, int expBlocksNone) throws IOException {
183    return getData(family, row, Arrays.asList(column), expBlocksRowCol, expBlocksRow,
184      expBlocksNone);
185  }
186
187  private void deleteFamily(String family, String row, long version) throws IOException {
188    Delete del = new Delete(Bytes.toBytes(row));
189    del.addFamily(Bytes.toBytes(family + "_ROWCOL"), version);
190    del.addFamily(Bytes.toBytes(family + "_ROW"), version);
191    del.addFamily(Bytes.toBytes(family + "_NONE"), version);
192    region.delete(del);
193  }
194
195  private static void verifyData(Cell kv, String expectedRow, String expectedCol,
196    long expectedVersion) {
197    assertTrue("RowCheck", CellUtil.matchingRows(kv, Bytes.toBytes(expectedRow)));
198    assertTrue("ColumnCheck", CellUtil.matchingQualifier(kv, Bytes.toBytes(expectedCol)));
199    assertEquals("TSCheck", expectedVersion, kv.getTimestamp());
200    assertTrue("ValueCheck",
201      CellUtil.matchingValue(kv, genValue(expectedRow, expectedCol, expectedVersion)));
202  }
203
204  private static long getBlkAccessCount(byte[] cf) {
205    return HFile.DATABLOCK_READ_COUNT.sum();
206  }
207
208  /**
209   * Test # of blocks read for some simple seek cases.
210   */
211  @Test
212  public void testBlocksRead() throws Exception {
213    byte[] TABLE = Bytes.toBytes("testBlocksRead");
214    String FAMILY = "cf1";
215    Cell[] kvs;
216    this.region = initHRegion(TABLE, testName.getMethodName(), conf, FAMILY);
217
218    try {
219      putData(FAMILY, "row", "col1", 1);
220      putData(FAMILY, "row", "col2", 2);
221      putData(FAMILY, "row", "col3", 3);
222      putData(FAMILY, "row", "col4", 4);
223      putData(FAMILY, "row", "col5", 5);
224      putData(FAMILY, "row", "col6", 6);
225      putData(FAMILY, "row", "col7", 7);
226      region.flush(true);
227
228      // Expected block reads: 1
229      // The top block has the KV we are
230      // interested. So only 1 seek is needed.
231      kvs = getData(FAMILY, "row", "col1", 1);
232      assertEquals(1, kvs.length);
233      verifyData(kvs[0], "row", "col1", 1);
234
235      // Expected block reads: 2
236      // The top block and next block has the KVs we are
237      // interested. So only 2 seek is needed.
238      kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2"), 2);
239      assertEquals(2, kvs.length);
240      verifyData(kvs[0], "row", "col1", 1);
241      verifyData(kvs[1], "row", "col2", 2);
242
243      // Expected block reads: 3
244      // The first 2 seeks is to find out col2. [HBASE-4443]
245      // One additional seek for col3
246      // So 3 seeks are needed.
247      kvs = getData(FAMILY, "row", Arrays.asList("col2", "col3"), 2);
248      assertEquals(2, kvs.length);
249      verifyData(kvs[0], "row", "col2", 2);
250      verifyData(kvs[1], "row", "col3", 3);
251
252      // Expected block reads: 1. [HBASE-4443]&[HBASE-7845]
253      kvs = getData(FAMILY, "row", Arrays.asList("col5"), 1);
254      assertEquals(1, kvs.length);
255      verifyData(kvs[0], "row", "col5", 5);
256    } finally {
257      HBaseTestingUtility.closeRegionAndWAL(this.region);
258      this.region = null;
259    }
260  }
261
262  /**
263   * Test # of blocks read (targeted at some of the cases Lazy Seek optimizes).
264   */
265  @Test
266  public void testLazySeekBlocksRead() throws Exception {
267    byte[] TABLE = Bytes.toBytes("testLazySeekBlocksRead");
268    String FAMILY = "cf1";
269    Cell[] kvs;
270    this.region = initHRegion(TABLE, testName.getMethodName(), conf, FAMILY);
271
272    try {
273      // File 1
274      putData(FAMILY, "row", "col1", 1);
275      putData(FAMILY, "row", "col2", 2);
276      region.flush(true);
277
278      // File 2
279      putData(FAMILY, "row", "col1", 3);
280      putData(FAMILY, "row", "col2", 4);
281      region.flush(true);
282
283      // Expected blocks read: 1.
284      // File 2's top block is also the KV we are
285      // interested. So only 1 seek is needed.
286      kvs = getData(FAMILY, "row", Arrays.asList("col1"), 1);
287      assertEquals(1, kvs.length);
288      verifyData(kvs[0], "row", "col1", 3);
289
290      // Expected blocks read: 2
291      // File 2's top block has the "col1" KV we are
292      // interested. We also need "col2" which is in a block
293      // of its own. So, we need that block as well.
294      kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2"), 2);
295      assertEquals(2, kvs.length);
296      verifyData(kvs[0], "row", "col1", 3);
297      verifyData(kvs[1], "row", "col2", 4);
298
299      // File 3: Add another column
300      putData(FAMILY, "row", "col3", 5);
301      region.flush(true);
302
303      // Expected blocks read: 1
304      // File 3's top block has the "col3" KV we are
305      // interested. So only 1 seek is needed.
306      kvs = getData(FAMILY, "row", "col3", 1);
307      assertEquals(1, kvs.length);
308      verifyData(kvs[0], "row", "col3", 5);
309
310      // Get a column from older file.
311      // For ROWCOL Bloom filter: Expected blocks read: 1.
312      // For ROW Bloom filter: Expected blocks read: 2.
313      // For NONE Bloom filter: Expected blocks read: 2.
314      kvs = getData(FAMILY, "row", Arrays.asList("col1"), 1, 2, 2);
315      assertEquals(1, kvs.length);
316      verifyData(kvs[0], "row", "col1", 3);
317
318      // File 4: Delete the entire row.
319      deleteFamily(FAMILY, "row", 6);
320      region.flush(true);
321
322      // For ROWCOL Bloom filter: Expected blocks read: 2.
323      // For ROW Bloom filter: Expected blocks read: 3.
324      // For NONE Bloom filter: Expected blocks read: 3.
325      kvs = getData(FAMILY, "row", "col1", 2, 3, 3);
326      assertEquals(0, kvs.length);
327      kvs = getData(FAMILY, "row", "col2", 2, 3, 3);
328      assertEquals(0, kvs.length);
329      kvs = getData(FAMILY, "row", "col3", 2);
330      assertEquals(0, kvs.length);
331      kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 4);
332      assertEquals(0, kvs.length);
333
334      // File 5: Delete
335      deleteFamily(FAMILY, "row", 10);
336      region.flush(true);
337
338      // File 6: some more puts, but with timestamps older than the
339      // previous delete.
340      putData(FAMILY, "row", "col1", 7);
341      putData(FAMILY, "row", "col2", 8);
342      putData(FAMILY, "row", "col3", 9);
343      region.flush(true);
344
345      // Baseline expected blocks read: 6. [HBASE-4532]
346      kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 6, 7, 7);
347      assertEquals(0, kvs.length);
348
349      // File 7: Put back new data
350      putData(FAMILY, "row", "col1", 11);
351      putData(FAMILY, "row", "col2", 12);
352      putData(FAMILY, "row", "col3", 13);
353      region.flush(true);
354
355      // Expected blocks read: 8. [HBASE-4585, HBASE-13109]
356      kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 8, 9, 9);
357      assertEquals(3, kvs.length);
358      verifyData(kvs[0], "row", "col1", 11);
359      verifyData(kvs[1], "row", "col2", 12);
360      verifyData(kvs[2], "row", "col3", 13);
361    } finally {
362      HBaseTestingUtility.closeRegionAndWAL(this.region);
363      this.region = null;
364    }
365  }
366
367  /**
368   * Test # of blocks read to ensure disabling cache-fill on Scan works.
369   */
370  @Test
371  public void testBlocksStoredWhenCachingDisabled() throws Exception {
372    byte[] TABLE = Bytes.toBytes("testBlocksReadWhenCachingDisabled");
373    String FAMILY = "cf1";
374
375    BlockCache blockCache = BlockCacheFactory.createBlockCache(conf);
376    this.region = initHRegion(TABLE, testName.getMethodName(), conf, FAMILY, blockCache);
377
378    try {
379      putData(FAMILY, "row", "col1", 1);
380      putData(FAMILY, "row", "col2", 2);
381      region.flush(true);
382
383      // Execute a scan with caching turned off
384      // Expected blocks stored: 0
385      long blocksStart = blockCache.getBlockCount();
386      Scan scan = new Scan();
387      scan.setCacheBlocks(false);
388      RegionScanner rs = region.getScanner(scan);
389      List<Cell> result = new ArrayList<>(2);
390      rs.next(result);
391      assertEquals(2 * BLOOM_TYPE.length, result.size());
392      rs.close();
393      long blocksEnd = blockCache.getBlockCount();
394
395      assertEquals(blocksStart, blocksEnd);
396
397      // Execute with caching turned on
398      // Expected blocks stored: 2
399      blocksStart = blocksEnd;
400      scan.setCacheBlocks(true);
401      rs = region.getScanner(scan);
402      result = new ArrayList<>(2);
403      rs.next(result);
404      assertEquals(2 * BLOOM_TYPE.length, result.size());
405      rs.close();
406      blocksEnd = blockCache.getBlockCount();
407
408      assertEquals(2 * BLOOM_TYPE.length, blocksEnd - blocksStart);
409    } finally {
410      HBaseTestingUtility.closeRegionAndWAL(this.region);
411      this.region = null;
412    }
413  }
414
415  @Test
416  public void testLazySeekBlocksReadWithDelete() throws Exception {
417    byte[] TABLE = Bytes.toBytes("testLazySeekBlocksReadWithDelete");
418    String FAMILY = "cf1";
419    Cell[] kvs;
420    this.region = initHRegion(TABLE, testName.getMethodName(), conf, FAMILY);
421    try {
422      deleteFamily(FAMILY, "row", 200);
423      for (int i = 0; i < 100; i++) {
424        putData(FAMILY, "row", "col" + i, i);
425      }
426      putData(FAMILY, "row", "col99", 201);
427      region.flush(true);
428
429      kvs = getData(FAMILY, "row", Arrays.asList("col0"), 2);
430      assertEquals(0, kvs.length);
431
432      kvs = getData(FAMILY, "row", Arrays.asList("col99"), 2);
433      assertEquals(1, kvs.length);
434      verifyData(kvs[0], "row", "col99", 201);
435    } finally {
436      HBaseTestingUtility.closeRegionAndWAL(this.region);
437      this.region = null;
438    }
439  }
440
441}