001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static junit.framework.TestCase.assertTrue;
021import static org.junit.Assert.assertEquals;
022
023import java.io.IOException;
024import java.util.ArrayList;
025import java.util.Arrays;
026import java.util.List;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.fs.Path;
029import org.apache.hadoop.hbase.Cell;
030import org.apache.hadoop.hbase.CellUtil;
031import org.apache.hadoop.hbase.HBaseClassTestRule;
032import org.apache.hadoop.hbase.HBaseTestingUtility;
033import org.apache.hadoop.hbase.HColumnDescriptor;
034import org.apache.hadoop.hbase.HRegionInfo;
035import org.apache.hadoop.hbase.HTableDescriptor;
036import org.apache.hadoop.hbase.TableName;
037import org.apache.hadoop.hbase.client.Delete;
038import org.apache.hadoop.hbase.client.Durability;
039import org.apache.hadoop.hbase.client.Get;
040import org.apache.hadoop.hbase.client.Put;
041import org.apache.hadoop.hbase.client.Scan;
042import org.apache.hadoop.hbase.io.hfile.BlockCache;
043import org.apache.hadoop.hbase.io.hfile.CacheConfig;
044import org.apache.hadoop.hbase.io.hfile.HFile;
045import org.apache.hadoop.hbase.testclassification.MediumTests;
046import org.apache.hadoop.hbase.testclassification.RegionServerTests;
047import org.apache.hadoop.hbase.util.Bytes;
048import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper;
049import org.junit.AfterClass;
050import org.junit.BeforeClass;
051import org.junit.ClassRule;
052import org.junit.Rule;
053import org.junit.Test;
054import org.junit.experimental.categories.Category;
055import org.junit.rules.TestName;
056import org.slf4j.Logger;
057import org.slf4j.LoggerFactory;
058
059@Category({RegionServerTests.class, MediumTests.class})
060public class TestBlocksRead  {
061
062  @ClassRule
063  public static final HBaseClassTestRule CLASS_RULE =
064      HBaseClassTestRule.forClass(TestBlocksRead.class);
065
066  private static final Logger LOG = LoggerFactory.getLogger(TestBlocksRead.class);
067  @Rule
068  public TestName testName = new TestName();
069
070  static final BloomType[] BLOOM_TYPE = new BloomType[] { BloomType.ROWCOL,
071      BloomType.ROW, BloomType.NONE };
072
073  private static BlockCache blockCache;
074  HRegion region = null;
075  private static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
076  private final String DIR = TEST_UTIL.getDataTestDir("TestBlocksRead").toString();
077  private Configuration conf = TEST_UTIL.getConfiguration();
078
079  @BeforeClass
080  public static void setUp() throws Exception {
081    // disable compactions in this test.
082    TEST_UTIL.getConfiguration().setInt("hbase.hstore.compactionThreshold", 10000);
083    CacheConfig.instantiateBlockCache(TEST_UTIL.getConfiguration());
084  }
085
086  @AfterClass
087  public static void tearDown() throws Exception {
088    EnvironmentEdgeManagerTestHelper.reset();
089  }
090
091  /**
092   * Callers must afterward call {@link HBaseTestingUtility#closeRegionAndWAL(HRegion)}
093   * @param tableName
094   * @param callingMethod
095   * @param conf
096   * @param family
097   * @throws IOException
098   * @return created and initialized region.
099   */
100  private HRegion initHRegion(byte[] tableName, String callingMethod,
101      Configuration conf, String family) throws IOException {
102    HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(tableName));
103    HColumnDescriptor familyDesc;
104    for (int i = 0; i < BLOOM_TYPE.length; i++) {
105      BloomType bloomType = BLOOM_TYPE[i];
106      familyDesc = new HColumnDescriptor(family + "_" + bloomType)
107          .setBlocksize(1)
108          .setBloomFilterType(BLOOM_TYPE[i]);
109      htd.addFamily(familyDesc);
110    }
111
112    HRegionInfo info = new HRegionInfo(htd.getTableName(), null, null, false);
113    Path path = new Path(DIR + callingMethod);
114    HRegion r = HBaseTestingUtility.createRegionAndWAL(info, path, conf, htd);
115    blockCache = new CacheConfig(conf).getBlockCache();
116    return r;
117  }
118
119  private void putData(String family, String row, String col, long version)
120      throws IOException {
121    for (int i = 0; i < BLOOM_TYPE.length; i++) {
122      putData(Bytes.toBytes(family + "_" + BLOOM_TYPE[i]), row, col, version,
123          version);
124    }
125  }
126
127  // generates a value to put for a row/col/version.
128  private static byte[] genValue(String row, String col, long version) {
129    return Bytes.toBytes("Value:" + row + "#" + col + "#" + version);
130  }
131
132  private void putData(byte[] cf, String row, String col, long versionStart,
133      long versionEnd) throws IOException {
134    byte columnBytes[] = Bytes.toBytes(col);
135    Put put = new Put(Bytes.toBytes(row));
136    put.setDurability(Durability.SKIP_WAL);
137
138    for (long version = versionStart; version <= versionEnd; version++) {
139      put.addColumn(cf, columnBytes, version, genValue(row, col, version));
140    }
141    region.put(put);
142  }
143
144  private Cell[] getData(String family, String row, List<String> columns,
145      int expBlocks) throws IOException {
146    return getData(family, row, columns, expBlocks, expBlocks, expBlocks);
147  }
148
149  private Cell[] getData(String family, String row, List<String> columns,
150      int expBlocksRowCol, int expBlocksRow, int expBlocksNone)
151      throws IOException {
152    int[] expBlocks = new int[] { expBlocksRowCol, expBlocksRow, expBlocksNone };
153    Cell[] kvs = null;
154
155    for (int i = 0; i < BLOOM_TYPE.length; i++) {
156      BloomType bloomType = BLOOM_TYPE[i];
157      byte[] cf = Bytes.toBytes(family + "_" + bloomType);
158      long blocksStart = getBlkAccessCount(cf);
159      Get get = new Get(Bytes.toBytes(row));
160
161      for (String column : columns) {
162        get.addColumn(cf, Bytes.toBytes(column));
163      }
164
165      kvs = region.get(get).rawCells();
166      long blocksEnd = getBlkAccessCount(cf);
167      if (expBlocks[i] != -1) {
168        assertEquals("Blocks Read Check for Bloom: " + bloomType, expBlocks[i],
169            blocksEnd - blocksStart);
170      }
171      System.out.println("Blocks Read for Bloom: " + bloomType + " = "
172          + (blocksEnd - blocksStart) + "Expected = " + expBlocks[i]);
173    }
174    return kvs;
175  }
176
177  private Cell[] getData(String family, String row, String column,
178      int expBlocks) throws IOException {
179    return getData(family, row, Arrays.asList(column), expBlocks, expBlocks,
180        expBlocks);
181  }
182
183  private Cell[] getData(String family, String row, String column,
184      int expBlocksRowCol, int expBlocksRow, int expBlocksNone)
185      throws IOException {
186    return getData(family, row, Arrays.asList(column), expBlocksRowCol,
187        expBlocksRow, expBlocksNone);
188  }
189
190  private void deleteFamily(String family, String row, long version)
191      throws IOException {
192    Delete del = new Delete(Bytes.toBytes(row));
193    del.addFamily(Bytes.toBytes(family + "_ROWCOL"), version);
194    del.addFamily(Bytes.toBytes(family + "_ROW"), version);
195    del.addFamily(Bytes.toBytes(family + "_NONE"), version);
196    region.delete(del);
197  }
198
199  private static void verifyData(Cell kv, String expectedRow,
200      String expectedCol, long expectedVersion) {
201    assertTrue("RowCheck", CellUtil.matchingRows(kv,  Bytes.toBytes(expectedRow)));
202    assertTrue("ColumnCheck", CellUtil.matchingQualifier(kv, Bytes.toBytes(expectedCol)));
203    assertEquals("TSCheck", expectedVersion, kv.getTimestamp());
204    assertTrue("ValueCheck", CellUtil.matchingValue(kv, genValue(expectedRow, expectedCol, expectedVersion)));
205  }
206
207  private static long getBlkAccessCount(byte[] cf) {
208      return HFile.DATABLOCK_READ_COUNT.sum();
209  }
210
211  private static long getBlkCount() {
212    return blockCache.getBlockCount();
213  }
214
215  /**
216   * Test # of blocks read for some simple seek cases.
217   *
218   * @throws Exception
219   */
220  @Test
221  public void testBlocksRead() throws Exception {
222    byte[] TABLE = Bytes.toBytes("testBlocksRead");
223    String FAMILY = "cf1";
224    Cell kvs[];
225    this.region = initHRegion(TABLE, testName.getMethodName(), conf, FAMILY);
226
227    try {
228      putData(FAMILY, "row", "col1", 1);
229      putData(FAMILY, "row", "col2", 2);
230      putData(FAMILY, "row", "col3", 3);
231      putData(FAMILY, "row", "col4", 4);
232      putData(FAMILY, "row", "col5", 5);
233      putData(FAMILY, "row", "col6", 6);
234      putData(FAMILY, "row", "col7", 7);
235      region.flush(true);
236
237      // Expected block reads: 1
238      // The top block has the KV we are
239      // interested. So only 1 seek is needed.
240      kvs = getData(FAMILY, "row", "col1", 1);
241      assertEquals(1, kvs.length);
242      verifyData(kvs[0], "row", "col1", 1);
243
244      // Expected block reads: 2
245      // The top block and next block has the KVs we are
246      // interested. So only 2 seek is needed.
247      kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2"), 2);
248      assertEquals(2, kvs.length);
249      verifyData(kvs[0], "row", "col1", 1);
250      verifyData(kvs[1], "row", "col2", 2);
251
252      // Expected block reads: 3
253      // The first 2 seeks is to find out col2. [HBASE-4443]
254      // One additional seek for col3
255      // So 3 seeks are needed.
256      kvs = getData(FAMILY, "row", Arrays.asList("col2", "col3"), 2);
257      assertEquals(2, kvs.length);
258      verifyData(kvs[0], "row", "col2", 2);
259      verifyData(kvs[1], "row", "col3", 3);
260
261      // Expected block reads: 1. [HBASE-4443]&[HBASE-7845]
262      kvs = getData(FAMILY, "row", Arrays.asList("col5"), 1);
263      assertEquals(1, kvs.length);
264      verifyData(kvs[0], "row", "col5", 5);
265    } finally {
266      HBaseTestingUtility.closeRegionAndWAL(this.region);
267      this.region = null;
268    }
269  }
270
271  /**
272   * Test # of blocks read (targeted at some of the cases Lazy Seek optimizes).
273   *
274   * @throws Exception
275   */
276  @Test
277  public void testLazySeekBlocksRead() throws Exception {
278    byte[] TABLE = Bytes.toBytes("testLazySeekBlocksRead");
279    String FAMILY = "cf1";
280    Cell kvs[];
281    this.region = initHRegion(TABLE, testName.getMethodName(), conf, FAMILY);
282
283    try {
284      // File 1
285      putData(FAMILY, "row", "col1", 1);
286      putData(FAMILY, "row", "col2", 2);
287      region.flush(true);
288
289      // File 2
290      putData(FAMILY, "row", "col1", 3);
291      putData(FAMILY, "row", "col2", 4);
292      region.flush(true);
293
294      // Expected blocks read: 1.
295      // File 2's top block is also the KV we are
296      // interested. So only 1 seek is needed.
297      kvs = getData(FAMILY, "row", Arrays.asList("col1"), 1);
298      assertEquals(1, kvs.length);
299      verifyData(kvs[0], "row", "col1", 3);
300
301      // Expected blocks read: 2
302      // File 2's top block has the "col1" KV we are
303      // interested. We also need "col2" which is in a block
304      // of its own. So, we need that block as well.
305      kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2"), 2);
306      assertEquals(2, kvs.length);
307      verifyData(kvs[0], "row", "col1", 3);
308      verifyData(kvs[1], "row", "col2", 4);
309
310      // File 3: Add another column
311      putData(FAMILY, "row", "col3", 5);
312      region.flush(true);
313
314      // Expected blocks read: 1
315      // File 3's top block has the "col3" KV we are
316      // interested. So only 1 seek is needed.
317      kvs = getData(FAMILY, "row", "col3", 1);
318      assertEquals(1, kvs.length);
319      verifyData(kvs[0], "row", "col3", 5);
320
321      // Get a column from older file.
322      // For ROWCOL Bloom filter: Expected blocks read: 1.
323      // For ROW Bloom filter: Expected blocks read: 2.
324      // For NONE Bloom filter: Expected blocks read: 2.
325      kvs = getData(FAMILY, "row", Arrays.asList("col1"), 1, 2, 2);
326      assertEquals(1, kvs.length);
327      verifyData(kvs[0], "row", "col1", 3);
328
329      // File 4: Delete the entire row.
330      deleteFamily(FAMILY, "row", 6);
331      region.flush(true);
332
333      // For ROWCOL Bloom filter: Expected blocks read: 2.
334      // For ROW Bloom filter: Expected blocks read: 3.
335      // For NONE Bloom filter: Expected blocks read: 3.
336      kvs = getData(FAMILY, "row", "col1", 2, 3, 3);
337      assertEquals(0, kvs.length);
338      kvs = getData(FAMILY, "row", "col2", 2, 3, 3);
339      assertEquals(0, kvs.length);
340      kvs = getData(FAMILY, "row", "col3", 2);
341      assertEquals(0, kvs.length);
342      kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 4);
343      assertEquals(0, kvs.length);
344
345      // File 5: Delete
346      deleteFamily(FAMILY, "row", 10);
347      region.flush(true);
348
349      // File 6: some more puts, but with timestamps older than the
350      // previous delete.
351      putData(FAMILY, "row", "col1", 7);
352      putData(FAMILY, "row", "col2", 8);
353      putData(FAMILY, "row", "col3", 9);
354      region.flush(true);
355
356      // Baseline expected blocks read: 6. [HBASE-4532]
357      kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 6, 7, 7);
358      assertEquals(0, kvs.length);
359
360      // File 7: Put back new data
361      putData(FAMILY, "row", "col1", 11);
362      putData(FAMILY, "row", "col2", 12);
363      putData(FAMILY, "row", "col3", 13);
364      region.flush(true);
365
366
367      // Expected blocks read: 8. [HBASE-4585, HBASE-13109]
368      kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 8, 9, 9);
369      assertEquals(3, kvs.length);
370      verifyData(kvs[0], "row", "col1", 11);
371      verifyData(kvs[1], "row", "col2", 12);
372      verifyData(kvs[2], "row", "col3", 13);
373    } finally {
374      HBaseTestingUtility.closeRegionAndWAL(this.region);
375      this.region = null;
376    }
377  }
378
379  /**
380   * Test # of blocks read to ensure disabling cache-fill on Scan works.
381   * @throws Exception
382   */
383  @Test
384  public void testBlocksStoredWhenCachingDisabled() throws Exception {
385    byte [] TABLE = Bytes.toBytes("testBlocksReadWhenCachingDisabled");
386    String FAMILY = "cf1";
387
388    this.region = initHRegion(TABLE, testName.getMethodName(), conf, FAMILY);
389
390    try {
391      putData(FAMILY, "row", "col1", 1);
392      putData(FAMILY, "row", "col2", 2);
393      region.flush(true);
394
395      // Execute a scan with caching turned off
396      // Expected blocks stored: 0
397      long blocksStart = getBlkCount();
398      Scan scan = new Scan();
399      scan.setCacheBlocks(false);
400      RegionScanner rs = region.getScanner(scan);
401      List<Cell> result = new ArrayList<>(2);
402      rs.next(result);
403      assertEquals(2 * BLOOM_TYPE.length, result.size());
404      rs.close();
405      long blocksEnd = getBlkCount();
406
407      assertEquals(blocksStart, blocksEnd);
408
409      // Execute with caching turned on
410      // Expected blocks stored: 2
411      blocksStart = blocksEnd;
412      scan.setCacheBlocks(true);
413      rs = region.getScanner(scan);
414      result = new ArrayList<>(2);
415      rs.next(result);
416      assertEquals(2 * BLOOM_TYPE.length, result.size());
417      rs.close();
418      blocksEnd = getBlkCount();
419
420      assertEquals(2 * BLOOM_TYPE.length, blocksEnd - blocksStart);
421    } finally {
422      HBaseTestingUtility.closeRegionAndWAL(this.region);
423      this.region = null;
424    }
425  }
426
427  @Test
428  public void testLazySeekBlocksReadWithDelete() throws Exception {
429    byte[] TABLE = Bytes.toBytes("testLazySeekBlocksReadWithDelete");
430    String FAMILY = "cf1";
431    Cell kvs[];
432    this.region = initHRegion(TABLE, testName.getMethodName(), conf, FAMILY);
433    try {
434      deleteFamily(FAMILY, "row", 200);
435      for (int i = 0; i < 100; i++) {
436        putData(FAMILY, "row", "col" + i, i);
437      }
438      putData(FAMILY, "row", "col99", 201);
439      region.flush(true);
440
441      kvs = getData(FAMILY, "row", Arrays.asList("col0"), 2);
442      assertEquals(0, kvs.length);
443
444      kvs = getData(FAMILY, "row", Arrays.asList("col99"), 2);
445      assertEquals(1, kvs.length);
446      verifyData(kvs[0], "row", "col99", 201);
447    } finally {
448      HBaseTestingUtility.closeRegionAndWAL(this.region);
449      this.region = null;
450    }
451  }
452
453}