001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static junit.framework.TestCase.assertTrue;
021import static org.junit.Assert.assertEquals;
022
023import java.io.IOException;
024import java.util.ArrayList;
025import java.util.Arrays;
026import java.util.List;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.fs.Path;
029import org.apache.hadoop.hbase.Cell;
030import org.apache.hadoop.hbase.CellUtil;
031import org.apache.hadoop.hbase.HBaseClassTestRule;
032import org.apache.hadoop.hbase.HBaseTestingUtility;
033import org.apache.hadoop.hbase.TableName;
034import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
035import org.apache.hadoop.hbase.client.Delete;
036import org.apache.hadoop.hbase.client.Durability;
037import org.apache.hadoop.hbase.client.Get;
038import org.apache.hadoop.hbase.client.Put;
039import org.apache.hadoop.hbase.client.RegionInfo;
040import org.apache.hadoop.hbase.client.RegionInfoBuilder;
041import org.apache.hadoop.hbase.client.Scan;
042import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
043import org.apache.hadoop.hbase.io.hfile.BlockCache;
044import org.apache.hadoop.hbase.io.hfile.BlockCacheFactory;
045import org.apache.hadoop.hbase.io.hfile.HFile;
046import org.apache.hadoop.hbase.testclassification.MediumTests;
047import org.apache.hadoop.hbase.testclassification.RegionServerTests;
048import org.apache.hadoop.hbase.util.Bytes;
049import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper;
050import org.junit.AfterClass;
051import org.junit.BeforeClass;
052import org.junit.ClassRule;
053import org.junit.Rule;
054import org.junit.Test;
055import org.junit.experimental.categories.Category;
056import org.junit.rules.TestName;
057import org.slf4j.Logger;
058import org.slf4j.LoggerFactory;
059
060@Category({RegionServerTests.class, MediumTests.class})
061public class TestBlocksRead  {
062
063  @ClassRule
064  public static final HBaseClassTestRule CLASS_RULE =
065      HBaseClassTestRule.forClass(TestBlocksRead.class);
066
067  private static final Logger LOG = LoggerFactory.getLogger(TestBlocksRead.class);
068  @Rule
069  public TestName testName = new TestName();
070
071  static final BloomType[] BLOOM_TYPE = new BloomType[] { BloomType.ROWCOL,
072      BloomType.ROW, BloomType.NONE };
073
074  HRegion region = null;
075  private static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
076  private final String DIR = TEST_UTIL.getDataTestDir("TestBlocksRead").toString();
077  private Configuration conf = TEST_UTIL.getConfiguration();
078
079  @BeforeClass
080  public static void setUp() throws Exception {
081    // disable compactions in this test.
082    TEST_UTIL.getConfiguration().setInt("hbase.hstore.compactionThreshold", 10000);
083  }
084
085  @AfterClass
086  public static void tearDown() throws Exception {
087    EnvironmentEdgeManagerTestHelper.reset();
088  }
089
090  /**
091   * Callers must afterward call {@link HBaseTestingUtility#closeRegionAndWAL(HRegion)}
092   * @param tableName
093   * @param callingMethod
094   * @param conf
095   * @param family
096   * @throws IOException
097   * @return created and initialized region.
098   */
099  private HRegion initHRegion(byte[] tableName, String callingMethod, Configuration conf,
100      String family) throws IOException {
101    return initHRegion(tableName, callingMethod, conf, family, null);
102  }
103
104  /**
105   * Callers must afterward call {@link HBaseTestingUtility#closeRegionAndWAL(HRegion)}
106   */
107  private HRegion initHRegion(byte[] tableName, String callingMethod, Configuration conf,
108      String family, BlockCache blockCache) throws IOException {
109    TableDescriptorBuilder builder =
110        TableDescriptorBuilder.newBuilder(TableName.valueOf(tableName));
111    for (int i = 0; i < BLOOM_TYPE.length; i++) {
112      BloomType bloomType = BLOOM_TYPE[i];
113      builder.setColumnFamily(
114          ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(family + "_" + bloomType))
115              .setBlocksize(1).setBloomFilterType(bloomType).build());
116    }
117    RegionInfo info = RegionInfoBuilder.newBuilder(TableName.valueOf(tableName)).build();
118    Path path = new Path(DIR + callingMethod);
119    if (blockCache != null) {
120      return HBaseTestingUtility.createRegionAndWAL(info, path, conf, builder.build(), blockCache);
121    } else {
122      return HBaseTestingUtility.createRegionAndWAL(info, path, conf, builder.build());
123    }
124  }
125
126  private void putData(String family, String row, String col, long version)
127      throws IOException {
128    for (int i = 0; i < BLOOM_TYPE.length; i++) {
129      putData(Bytes.toBytes(family + "_" + BLOOM_TYPE[i]), row, col, version,
130          version);
131    }
132  }
133
134  // generates a value to put for a row/col/version.
135  private static byte[] genValue(String row, String col, long version) {
136    return Bytes.toBytes("Value:" + row + "#" + col + "#" + version);
137  }
138
139  private void putData(byte[] cf, String row, String col, long versionStart,
140      long versionEnd) throws IOException {
141    byte columnBytes[] = Bytes.toBytes(col);
142    Put put = new Put(Bytes.toBytes(row));
143    put.setDurability(Durability.SKIP_WAL);
144
145    for (long version = versionStart; version <= versionEnd; version++) {
146      put.addColumn(cf, columnBytes, version, genValue(row, col, version));
147    }
148    region.put(put);
149  }
150
151  private Cell[] getData(String family, String row, List<String> columns,
152      int expBlocks) throws IOException {
153    return getData(family, row, columns, expBlocks, expBlocks, expBlocks);
154  }
155
156  private Cell[] getData(String family, String row, List<String> columns,
157      int expBlocksRowCol, int expBlocksRow, int expBlocksNone)
158      throws IOException {
159    int[] expBlocks = new int[] { expBlocksRowCol, expBlocksRow, expBlocksNone };
160    Cell[] kvs = null;
161
162    for (int i = 0; i < BLOOM_TYPE.length; i++) {
163      BloomType bloomType = BLOOM_TYPE[i];
164      byte[] cf = Bytes.toBytes(family + "_" + bloomType);
165      long blocksStart = getBlkAccessCount(cf);
166      Get get = new Get(Bytes.toBytes(row));
167
168      for (String column : columns) {
169        get.addColumn(cf, Bytes.toBytes(column));
170      }
171
172      kvs = region.get(get).rawCells();
173      long blocksEnd = getBlkAccessCount(cf);
174      if (expBlocks[i] != -1) {
175        assertEquals("Blocks Read Check for Bloom: " + bloomType, expBlocks[i],
176            blocksEnd - blocksStart);
177      }
178      System.out.println("Blocks Read for Bloom: " + bloomType + " = "
179          + (blocksEnd - blocksStart) + "Expected = " + expBlocks[i]);
180    }
181    return kvs;
182  }
183
184  private Cell[] getData(String family, String row, String column,
185      int expBlocks) throws IOException {
186    return getData(family, row, Arrays.asList(column), expBlocks, expBlocks,
187        expBlocks);
188  }
189
190  private Cell[] getData(String family, String row, String column,
191      int expBlocksRowCol, int expBlocksRow, int expBlocksNone)
192      throws IOException {
193    return getData(family, row, Arrays.asList(column), expBlocksRowCol,
194        expBlocksRow, expBlocksNone);
195  }
196
197  private void deleteFamily(String family, String row, long version)
198      throws IOException {
199    Delete del = new Delete(Bytes.toBytes(row));
200    del.addFamily(Bytes.toBytes(family + "_ROWCOL"), version);
201    del.addFamily(Bytes.toBytes(family + "_ROW"), version);
202    del.addFamily(Bytes.toBytes(family + "_NONE"), version);
203    region.delete(del);
204  }
205
206  private static void verifyData(Cell kv, String expectedRow,
207      String expectedCol, long expectedVersion) {
208    assertTrue("RowCheck", CellUtil.matchingRows(kv,  Bytes.toBytes(expectedRow)));
209    assertTrue("ColumnCheck", CellUtil.matchingQualifier(kv, Bytes.toBytes(expectedCol)));
210    assertEquals("TSCheck", expectedVersion, kv.getTimestamp());
211    assertTrue("ValueCheck", CellUtil.matchingValue(kv, genValue(expectedRow, expectedCol, expectedVersion)));
212  }
213
214  private static long getBlkAccessCount(byte[] cf) {
215      return HFile.DATABLOCK_READ_COUNT.sum();
216  }
217
218  /**
219   * Test # of blocks read for some simple seek cases.
220   *
221   * @throws Exception
222   */
223  @Test
224  public void testBlocksRead() throws Exception {
225    byte[] TABLE = Bytes.toBytes("testBlocksRead");
226    String FAMILY = "cf1";
227    Cell kvs[];
228    this.region = initHRegion(TABLE, testName.getMethodName(), conf, FAMILY);
229
230    try {
231      putData(FAMILY, "row", "col1", 1);
232      putData(FAMILY, "row", "col2", 2);
233      putData(FAMILY, "row", "col3", 3);
234      putData(FAMILY, "row", "col4", 4);
235      putData(FAMILY, "row", "col5", 5);
236      putData(FAMILY, "row", "col6", 6);
237      putData(FAMILY, "row", "col7", 7);
238      region.flush(true);
239
240      // Expected block reads: 1
241      // The top block has the KV we are
242      // interested. So only 1 seek is needed.
243      kvs = getData(FAMILY, "row", "col1", 1);
244      assertEquals(1, kvs.length);
245      verifyData(kvs[0], "row", "col1", 1);
246
247      // Expected block reads: 2
248      // The top block and next block has the KVs we are
249      // interested. So only 2 seek is needed.
250      kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2"), 2);
251      assertEquals(2, kvs.length);
252      verifyData(kvs[0], "row", "col1", 1);
253      verifyData(kvs[1], "row", "col2", 2);
254
255      // Expected block reads: 3
256      // The first 2 seeks is to find out col2. [HBASE-4443]
257      // One additional seek for col3
258      // So 3 seeks are needed.
259      kvs = getData(FAMILY, "row", Arrays.asList("col2", "col3"), 2);
260      assertEquals(2, kvs.length);
261      verifyData(kvs[0], "row", "col2", 2);
262      verifyData(kvs[1], "row", "col3", 3);
263
264      // Expected block reads: 1. [HBASE-4443]&[HBASE-7845]
265      kvs = getData(FAMILY, "row", Arrays.asList("col5"), 1);
266      assertEquals(1, kvs.length);
267      verifyData(kvs[0], "row", "col5", 5);
268    } finally {
269      HBaseTestingUtility.closeRegionAndWAL(this.region);
270      this.region = null;
271    }
272  }
273
274  /**
275   * Test # of blocks read (targeted at some of the cases Lazy Seek optimizes).
276   *
277   * @throws Exception
278   */
279  @Test
280  public void testLazySeekBlocksRead() throws Exception {
281    byte[] TABLE = Bytes.toBytes("testLazySeekBlocksRead");
282    String FAMILY = "cf1";
283    Cell kvs[];
284    this.region = initHRegion(TABLE, testName.getMethodName(), conf, FAMILY);
285
286    try {
287      // File 1
288      putData(FAMILY, "row", "col1", 1);
289      putData(FAMILY, "row", "col2", 2);
290      region.flush(true);
291
292      // File 2
293      putData(FAMILY, "row", "col1", 3);
294      putData(FAMILY, "row", "col2", 4);
295      region.flush(true);
296
297      // Expected blocks read: 1.
298      // File 2's top block is also the KV we are
299      // interested. So only 1 seek is needed.
300      kvs = getData(FAMILY, "row", Arrays.asList("col1"), 1);
301      assertEquals(1, kvs.length);
302      verifyData(kvs[0], "row", "col1", 3);
303
304      // Expected blocks read: 2
305      // File 2's top block has the "col1" KV we are
306      // interested. We also need "col2" which is in a block
307      // of its own. So, we need that block as well.
308      kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2"), 2);
309      assertEquals(2, kvs.length);
310      verifyData(kvs[0], "row", "col1", 3);
311      verifyData(kvs[1], "row", "col2", 4);
312
313      // File 3: Add another column
314      putData(FAMILY, "row", "col3", 5);
315      region.flush(true);
316
317      // Expected blocks read: 1
318      // File 3's top block has the "col3" KV we are
319      // interested. So only 1 seek is needed.
320      kvs = getData(FAMILY, "row", "col3", 1);
321      assertEquals(1, kvs.length);
322      verifyData(kvs[0], "row", "col3", 5);
323
324      // Get a column from older file.
325      // For ROWCOL Bloom filter: Expected blocks read: 1.
326      // For ROW Bloom filter: Expected blocks read: 2.
327      // For NONE Bloom filter: Expected blocks read: 2.
328      kvs = getData(FAMILY, "row", Arrays.asList("col1"), 1, 2, 2);
329      assertEquals(1, kvs.length);
330      verifyData(kvs[0], "row", "col1", 3);
331
332      // File 4: Delete the entire row.
333      deleteFamily(FAMILY, "row", 6);
334      region.flush(true);
335
336      // For ROWCOL Bloom filter: Expected blocks read: 2.
337      // For ROW Bloom filter: Expected blocks read: 3.
338      // For NONE Bloom filter: Expected blocks read: 3.
339      kvs = getData(FAMILY, "row", "col1", 2, 3, 3);
340      assertEquals(0, kvs.length);
341      kvs = getData(FAMILY, "row", "col2", 2, 3, 3);
342      assertEquals(0, kvs.length);
343      kvs = getData(FAMILY, "row", "col3", 2);
344      assertEquals(0, kvs.length);
345      kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 4);
346      assertEquals(0, kvs.length);
347
348      // File 5: Delete
349      deleteFamily(FAMILY, "row", 10);
350      region.flush(true);
351
352      // File 6: some more puts, but with timestamps older than the
353      // previous delete.
354      putData(FAMILY, "row", "col1", 7);
355      putData(FAMILY, "row", "col2", 8);
356      putData(FAMILY, "row", "col3", 9);
357      region.flush(true);
358
359      // Baseline expected blocks read: 6. [HBASE-4532]
360      kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 6, 7, 7);
361      assertEquals(0, kvs.length);
362
363      // File 7: Put back new data
364      putData(FAMILY, "row", "col1", 11);
365      putData(FAMILY, "row", "col2", 12);
366      putData(FAMILY, "row", "col3", 13);
367      region.flush(true);
368
369
370      // Expected blocks read: 8. [HBASE-4585, HBASE-13109]
371      kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 8, 9, 9);
372      assertEquals(3, kvs.length);
373      verifyData(kvs[0], "row", "col1", 11);
374      verifyData(kvs[1], "row", "col2", 12);
375      verifyData(kvs[2], "row", "col3", 13);
376    } finally {
377      HBaseTestingUtility.closeRegionAndWAL(this.region);
378      this.region = null;
379    }
380  }
381
382  /**
383   * Test # of blocks read to ensure disabling cache-fill on Scan works.
384   * @throws Exception
385   */
386  @Test
387  public void testBlocksStoredWhenCachingDisabled() throws Exception {
388    byte [] TABLE = Bytes.toBytes("testBlocksReadWhenCachingDisabled");
389    String FAMILY = "cf1";
390
391    BlockCache blockCache = BlockCacheFactory.createBlockCache(conf);
392    this.region = initHRegion(TABLE, testName.getMethodName(), conf, FAMILY, blockCache);
393
394    try {
395      putData(FAMILY, "row", "col1", 1);
396      putData(FAMILY, "row", "col2", 2);
397      region.flush(true);
398
399      // Execute a scan with caching turned off
400      // Expected blocks stored: 0
401      long blocksStart = blockCache.getBlockCount();
402      Scan scan = new Scan();
403      scan.setCacheBlocks(false);
404      RegionScanner rs = region.getScanner(scan);
405      List<Cell> result = new ArrayList<>(2);
406      rs.next(result);
407      assertEquals(2 * BLOOM_TYPE.length, result.size());
408      rs.close();
409      long blocksEnd = blockCache.getBlockCount();
410
411      assertEquals(blocksStart, blocksEnd);
412
413      // Execute with caching turned on
414      // Expected blocks stored: 2
415      blocksStart = blocksEnd;
416      scan.setCacheBlocks(true);
417      rs = region.getScanner(scan);
418      result = new ArrayList<>(2);
419      rs.next(result);
420      assertEquals(2 * BLOOM_TYPE.length, result.size());
421      rs.close();
422      blocksEnd = blockCache.getBlockCount();
423
424      assertEquals(2 * BLOOM_TYPE.length, blocksEnd - blocksStart);
425    } finally {
426      HBaseTestingUtility.closeRegionAndWAL(this.region);
427      this.region = null;
428    }
429  }
430
431  @Test
432  public void testLazySeekBlocksReadWithDelete() throws Exception {
433    byte[] TABLE = Bytes.toBytes("testLazySeekBlocksReadWithDelete");
434    String FAMILY = "cf1";
435    Cell kvs[];
436    this.region = initHRegion(TABLE, testName.getMethodName(), conf, FAMILY);
437    try {
438      deleteFamily(FAMILY, "row", 200);
439      for (int i = 0; i < 100; i++) {
440        putData(FAMILY, "row", "col" + i, i);
441      }
442      putData(FAMILY, "row", "col99", 201);
443      region.flush(true);
444
445      kvs = getData(FAMILY, "row", Arrays.asList("col0"), 2);
446      assertEquals(0, kvs.length);
447
448      kvs = getData(FAMILY, "row", Arrays.asList("col99"), 2);
449      assertEquals(1, kvs.length);
450      verifyData(kvs[0], "row", "col99", 201);
451    } finally {
452      HBaseTestingUtility.closeRegionAndWAL(this.region);
453      this.region = null;
454    }
455  }
456
457}