001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static junit.framework.TestCase.assertTrue;
021import static org.junit.Assert.assertEquals;
022import java.io.IOException;
023import java.util.ArrayList;
024import java.util.Arrays;
025import java.util.List;
026import org.apache.hadoop.conf.Configuration;
027import org.apache.hadoop.fs.Path;
028import org.apache.hadoop.hbase.Cell;
029import org.apache.hadoop.hbase.CellUtil;
030import org.apache.hadoop.hbase.HBaseClassTestRule;
031import org.apache.hadoop.hbase.HBaseTestingUtility;
032import org.apache.hadoop.hbase.TableName;
033import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
034import org.apache.hadoop.hbase.client.Delete;
035import org.apache.hadoop.hbase.client.Durability;
036import org.apache.hadoop.hbase.client.Get;
037import org.apache.hadoop.hbase.client.Put;
038import org.apache.hadoop.hbase.client.RegionInfo;
039import org.apache.hadoop.hbase.client.RegionInfoBuilder;
040import org.apache.hadoop.hbase.client.Scan;
041import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
042import org.apache.hadoop.hbase.io.hfile.BlockCache;
043import org.apache.hadoop.hbase.io.hfile.BlockCacheFactory;
044import org.apache.hadoop.hbase.io.hfile.HFile;
045import org.apache.hadoop.hbase.testclassification.RegionServerTests;
046import org.apache.hadoop.hbase.testclassification.SmallTests;
047import org.apache.hadoop.hbase.util.Bytes;
048import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper;
049import org.junit.AfterClass;
050import org.junit.BeforeClass;
051import org.junit.ClassRule;
052import org.junit.Rule;
053import org.junit.Test;
054import org.junit.experimental.categories.Category;
055import org.junit.rules.TestName;
056import org.slf4j.Logger;
057import org.slf4j.LoggerFactory;
058
059@Category({RegionServerTests.class, SmallTests.class})
060public class TestBlocksRead  {
061
062  @ClassRule
063  public static final HBaseClassTestRule CLASS_RULE =
064      HBaseClassTestRule.forClass(TestBlocksRead.class);
065
066  private static final Logger LOG = LoggerFactory.getLogger(TestBlocksRead.class);
067  @Rule
068  public TestName testName = new TestName();
069
070  static final BloomType[] BLOOM_TYPE = new BloomType[] { BloomType.ROWCOL,
071    BloomType.ROW, BloomType.NONE };
072
073  HRegion region = null;
074  private static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
075  private final String DIR = TEST_UTIL.getDataTestDir("TestBlocksRead").toString();
076  private Configuration conf = TEST_UTIL.getConfiguration();
077
078  @BeforeClass
079  public static void setUp() throws Exception {
080    // disable compactions in this test.
081    TEST_UTIL.getConfiguration().setInt("hbase.hstore.compactionThreshold", 10000);
082  }
083
084  @AfterClass
085  public static void tearDown() throws Exception {
086    EnvironmentEdgeManagerTestHelper.reset();
087  }
088
089  /**
090   * Callers must afterward call {@link HBaseTestingUtility#closeRegionAndWAL(HRegion)}
091   * @return created and initialized region.
092   */
093  private HRegion initHRegion(byte[] tableName, String callingMethod, Configuration conf,
094      String family) throws IOException {
095    return initHRegion(tableName, callingMethod, conf, family, null);
096  }
097
098  /**
099   * Callers must afterward call {@link HBaseTestingUtility#closeRegionAndWAL(HRegion)}
100   */
101  private HRegion initHRegion(byte[] tableName, String callingMethod, Configuration conf,
102      String family, BlockCache blockCache) throws IOException {
103    TableDescriptorBuilder builder =
104        TableDescriptorBuilder.newBuilder(TableName.valueOf(tableName));
105    for (int i = 0; i < BLOOM_TYPE.length; i++) {
106      BloomType bloomType = BLOOM_TYPE[i];
107      builder.setColumnFamily(
108          ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(family + "_" + bloomType))
109              .setBlocksize(1).setBloomFilterType(bloomType).build());
110    }
111    RegionInfo info = RegionInfoBuilder.newBuilder(TableName.valueOf(tableName)).build();
112    Path path = new Path(DIR + callingMethod);
113    if (blockCache != null) {
114      return HBaseTestingUtility.createRegionAndWAL(info, path, conf, builder.build(), blockCache);
115    } else {
116      return HBaseTestingUtility.createRegionAndWAL(info, path, conf, builder.build());
117    }
118  }
119
120  private void putData(String family, String row, String col, long version)
121      throws IOException {
122    for (int i = 0; i < BLOOM_TYPE.length; i++) {
123      putData(Bytes.toBytes(family + "_" + BLOOM_TYPE[i]), row, col, version,
124          version);
125    }
126  }
127
128  // generates a value to put for a row/col/version.
129  private static byte[] genValue(String row, String col, long version) {
130    return Bytes.toBytes("Value:" + row + "#" + col + "#" + version);
131  }
132
133  private void putData(byte[] cf, String row, String col, long versionStart,
134      long versionEnd) throws IOException {
135    byte [] columnBytes = Bytes.toBytes(col);
136    Put put = new Put(Bytes.toBytes(row));
137    put.setDurability(Durability.SKIP_WAL);
138
139    for (long version = versionStart; version <= versionEnd; version++) {
140      put.addColumn(cf, columnBytes, version, genValue(row, col, version));
141    }
142    region.put(put);
143  }
144
145  private Cell[] getData(String family, String row, List<String> columns,
146      int expBlocks) throws IOException {
147    return getData(family, row, columns, expBlocks, expBlocks, expBlocks);
148  }
149
150  private Cell[] getData(String family, String row, List<String> columns,
151      int expBlocksRowCol, int expBlocksRow, int expBlocksNone)
152      throws IOException {
153    int[] expBlocks = new int[] { expBlocksRowCol, expBlocksRow, expBlocksNone };
154    Cell[] kvs = null;
155
156    for (int i = 0; i < BLOOM_TYPE.length; i++) {
157      BloomType bloomType = BLOOM_TYPE[i];
158      byte[] cf = Bytes.toBytes(family + "_" + bloomType);
159      long blocksStart = getBlkAccessCount(cf);
160      Get get = new Get(Bytes.toBytes(row));
161
162      for (String column : columns) {
163        get.addColumn(cf, Bytes.toBytes(column));
164      }
165
166      kvs = region.get(get).rawCells();
167      long blocksEnd = getBlkAccessCount(cf);
168      if (expBlocks[i] != -1) {
169        assertEquals("Blocks Read Check for Bloom: " + bloomType, expBlocks[i],
170            blocksEnd - blocksStart);
171      }
172      System.out.println("Blocks Read for Bloom: " + bloomType + " = "
173          + (blocksEnd - blocksStart) + "Expected = " + expBlocks[i]);
174    }
175    return kvs;
176  }
177
178  private Cell[] getData(String family, String row, String column,
179      int expBlocks) throws IOException {
180    return getData(family, row, Arrays.asList(column), expBlocks, expBlocks,
181        expBlocks);
182  }
183
184  private Cell[] getData(String family, String row, String column,
185      int expBlocksRowCol, int expBlocksRow, int expBlocksNone)
186      throws IOException {
187    return getData(family, row, Arrays.asList(column), expBlocksRowCol,
188        expBlocksRow, expBlocksNone);
189  }
190
191  private void deleteFamily(String family, String row, long version)
192      throws IOException {
193    Delete del = new Delete(Bytes.toBytes(row));
194    del.addFamily(Bytes.toBytes(family + "_ROWCOL"), version);
195    del.addFamily(Bytes.toBytes(family + "_ROW"), version);
196    del.addFamily(Bytes.toBytes(family + "_NONE"), version);
197    region.delete(del);
198  }
199
200  private static void verifyData(Cell kv, String expectedRow,
201      String expectedCol, long expectedVersion) {
202    assertTrue("RowCheck", CellUtil.matchingRows(kv,  Bytes.toBytes(expectedRow)));
203    assertTrue("ColumnCheck", CellUtil.matchingQualifier(kv, Bytes.toBytes(expectedCol)));
204    assertEquals("TSCheck", expectedVersion, kv.getTimestamp());
205    assertTrue("ValueCheck", CellUtil.matchingValue(kv, genValue(expectedRow, expectedCol,
206      expectedVersion)));
207  }
208
209  private static long getBlkAccessCount(byte[] cf) {
210    return HFile.DATABLOCK_READ_COUNT.sum();
211  }
212
213  /**
214   * Test # of blocks read for some simple seek cases.
215   */
216  @Test
217  public void testBlocksRead() throws Exception {
218    byte[] TABLE = Bytes.toBytes("testBlocksRead");
219    String FAMILY = "cf1";
220    Cell [] kvs;
221    this.region = initHRegion(TABLE, testName.getMethodName(), conf, FAMILY);
222
223    try {
224      putData(FAMILY, "row", "col1", 1);
225      putData(FAMILY, "row", "col2", 2);
226      putData(FAMILY, "row", "col3", 3);
227      putData(FAMILY, "row", "col4", 4);
228      putData(FAMILY, "row", "col5", 5);
229      putData(FAMILY, "row", "col6", 6);
230      putData(FAMILY, "row", "col7", 7);
231      region.flush(true);
232
233      // Expected block reads: 1
234      // The top block has the KV we are
235      // interested. So only 1 seek is needed.
236      kvs = getData(FAMILY, "row", "col1", 1);
237      assertEquals(1, kvs.length);
238      verifyData(kvs[0], "row", "col1", 1);
239
240      // Expected block reads: 2
241      // The top block and next block has the KVs we are
242      // interested. So only 2 seek is needed.
243      kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2"), 2);
244      assertEquals(2, kvs.length);
245      verifyData(kvs[0], "row", "col1", 1);
246      verifyData(kvs[1], "row", "col2", 2);
247
248      // Expected block reads: 3
249      // The first 2 seeks is to find out col2. [HBASE-4443]
250      // One additional seek for col3
251      // So 3 seeks are needed.
252      kvs = getData(FAMILY, "row", Arrays.asList("col2", "col3"), 2);
253      assertEquals(2, kvs.length);
254      verifyData(kvs[0], "row", "col2", 2);
255      verifyData(kvs[1], "row", "col3", 3);
256
257      // Expected block reads: 1. [HBASE-4443]&[HBASE-7845]
258      kvs = getData(FAMILY, "row", Arrays.asList("col5"), 1);
259      assertEquals(1, kvs.length);
260      verifyData(kvs[0], "row", "col5", 5);
261    } finally {
262      HBaseTestingUtility.closeRegionAndWAL(this.region);
263      this.region = null;
264    }
265  }
266
267  /**
268   * Test # of blocks read (targeted at some of the cases Lazy Seek optimizes).
269   */
270  @Test
271  public void testLazySeekBlocksRead() throws Exception {
272    byte[] TABLE = Bytes.toBytes("testLazySeekBlocksRead");
273    String FAMILY = "cf1";
274    Cell [] kvs;
275    this.region = initHRegion(TABLE, testName.getMethodName(), conf, FAMILY);
276
277    try {
278      // File 1
279      putData(FAMILY, "row", "col1", 1);
280      putData(FAMILY, "row", "col2", 2);
281      region.flush(true);
282
283      // File 2
284      putData(FAMILY, "row", "col1", 3);
285      putData(FAMILY, "row", "col2", 4);
286      region.flush(true);
287
288      // Expected blocks read: 1.
289      // File 2's top block is also the KV we are
290      // interested. So only 1 seek is needed.
291      kvs = getData(FAMILY, "row", Arrays.asList("col1"), 1);
292      assertEquals(1, kvs.length);
293      verifyData(kvs[0], "row", "col1", 3);
294
295      // Expected blocks read: 2
296      // File 2's top block has the "col1" KV we are
297      // interested. We also need "col2" which is in a block
298      // of its own. So, we need that block as well.
299      kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2"), 2);
300      assertEquals(2, kvs.length);
301      verifyData(kvs[0], "row", "col1", 3);
302      verifyData(kvs[1], "row", "col2", 4);
303
304      // File 3: Add another column
305      putData(FAMILY, "row", "col3", 5);
306      region.flush(true);
307
308      // Expected blocks read: 1
309      // File 3's top block has the "col3" KV we are
310      // interested. So only 1 seek is needed.
311      kvs = getData(FAMILY, "row", "col3", 1);
312      assertEquals(1, kvs.length);
313      verifyData(kvs[0], "row", "col3", 5);
314
315      // Get a column from older file.
316      // For ROWCOL Bloom filter: Expected blocks read: 1.
317      // For ROW Bloom filter: Expected blocks read: 2.
318      // For NONE Bloom filter: Expected blocks read: 2.
319      kvs = getData(FAMILY, "row", Arrays.asList("col1"), 1, 2, 2);
320      assertEquals(1, kvs.length);
321      verifyData(kvs[0], "row", "col1", 3);
322
323      // File 4: Delete the entire row.
324      deleteFamily(FAMILY, "row", 6);
325      region.flush(true);
326
327      // For ROWCOL Bloom filter: Expected blocks read: 2.
328      // For ROW Bloom filter: Expected blocks read: 3.
329      // For NONE Bloom filter: Expected blocks read: 3.
330      kvs = getData(FAMILY, "row", "col1", 2, 3, 3);
331      assertEquals(0, kvs.length);
332      kvs = getData(FAMILY, "row", "col2", 2, 3, 3);
333      assertEquals(0, kvs.length);
334      kvs = getData(FAMILY, "row", "col3", 2);
335      assertEquals(0, kvs.length);
336      kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 4);
337      assertEquals(0, kvs.length);
338
339      // File 5: Delete
340      deleteFamily(FAMILY, "row", 10);
341      region.flush(true);
342
343      // File 6: some more puts, but with timestamps older than the
344      // previous delete.
345      putData(FAMILY, "row", "col1", 7);
346      putData(FAMILY, "row", "col2", 8);
347      putData(FAMILY, "row", "col3", 9);
348      region.flush(true);
349
350      // Baseline expected blocks read: 6. [HBASE-4532]
351      kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 6, 7, 7);
352      assertEquals(0, kvs.length);
353
354      // File 7: Put back new data
355      putData(FAMILY, "row", "col1", 11);
356      putData(FAMILY, "row", "col2", 12);
357      putData(FAMILY, "row", "col3", 13);
358      region.flush(true);
359
360
361      // Expected blocks read: 8. [HBASE-4585, HBASE-13109]
362      kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 8, 9, 9);
363      assertEquals(3, kvs.length);
364      verifyData(kvs[0], "row", "col1", 11);
365      verifyData(kvs[1], "row", "col2", 12);
366      verifyData(kvs[2], "row", "col3", 13);
367    } finally {
368      HBaseTestingUtility.closeRegionAndWAL(this.region);
369      this.region = null;
370    }
371  }
372
373  /**
374   * Test # of blocks read to ensure disabling cache-fill on Scan works.
375   */
376  @Test
377  public void testBlocksStoredWhenCachingDisabled() throws Exception {
378    byte [] TABLE = Bytes.toBytes("testBlocksReadWhenCachingDisabled");
379    String FAMILY = "cf1";
380
381    BlockCache blockCache = BlockCacheFactory.createBlockCache(conf);
382    this.region = initHRegion(TABLE, testName.getMethodName(), conf, FAMILY, blockCache);
383
384    try {
385      putData(FAMILY, "row", "col1", 1);
386      putData(FAMILY, "row", "col2", 2);
387      region.flush(true);
388
389      // Execute a scan with caching turned off
390      // Expected blocks stored: 0
391      long blocksStart = blockCache.getBlockCount();
392      Scan scan = new Scan();
393      scan.setCacheBlocks(false);
394      RegionScanner rs = region.getScanner(scan);
395      List<Cell> result = new ArrayList<>(2);
396      rs.next(result);
397      assertEquals(2 * BLOOM_TYPE.length, result.size());
398      rs.close();
399      long blocksEnd = blockCache.getBlockCount();
400
401      assertEquals(blocksStart, blocksEnd);
402
403      // Execute with caching turned on
404      // Expected blocks stored: 2
405      blocksStart = blocksEnd;
406      scan.setCacheBlocks(true);
407      rs = region.getScanner(scan);
408      result = new ArrayList<>(2);
409      rs.next(result);
410      assertEquals(2 * BLOOM_TYPE.length, result.size());
411      rs.close();
412      blocksEnd = blockCache.getBlockCount();
413
414      assertEquals(2 * BLOOM_TYPE.length, blocksEnd - blocksStart);
415    } finally {
416      HBaseTestingUtility.closeRegionAndWAL(this.region);
417      this.region = null;
418    }
419  }
420
421  @Test
422  public void testLazySeekBlocksReadWithDelete() throws Exception {
423    byte[] TABLE = Bytes.toBytes("testLazySeekBlocksReadWithDelete");
424    String FAMILY = "cf1";
425    Cell [] kvs;
426    this.region = initHRegion(TABLE, testName.getMethodName(), conf, FAMILY);
427    try {
428      deleteFamily(FAMILY, "row", 200);
429      for (int i = 0; i < 100; i++) {
430        putData(FAMILY, "row", "col" + i, i);
431      }
432      putData(FAMILY, "row", "col99", 201);
433      region.flush(true);
434
435      kvs = getData(FAMILY, "row", Arrays.asList("col0"), 2);
436      assertEquals(0, kvs.length);
437
438      kvs = getData(FAMILY, "row", Arrays.asList("col99"), 2);
439      assertEquals(1, kvs.length);
440      verifyData(kvs[0], "row", "col99", 201);
441    } finally {
442      HBaseTestingUtility.closeRegionAndWAL(this.region);
443      this.region = null;
444    }
445  }
446
447}