001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static org.apache.hadoop.hbase.HBaseTestingUtility.START_KEY;
021import static org.apache.hadoop.hbase.HBaseTestingUtility.fam1;
022import static org.apache.hadoop.hbase.regionserver.HStoreFile.BULKLOAD_TIME_KEY;
023import static org.apache.hadoop.hbase.regionserver.HStoreFile.MOB_CELLS_COUNT;
024import static org.junit.Assert.assertEquals;
025import static org.junit.Assert.assertTrue;
026
027import java.io.IOException;
028import java.util.ArrayList;
029import java.util.HashSet;
030import java.util.List;
031import java.util.Map;
032import java.util.Random;
033import java.util.Set;
034import org.apache.hadoop.conf.Configuration;
035import org.apache.hadoop.fs.FileStatus;
036import org.apache.hadoop.fs.FileSystem;
037import org.apache.hadoop.fs.Path;
038import org.apache.hadoop.hbase.Cell;
039import org.apache.hadoop.hbase.CellComparatorImpl;
040import org.apache.hadoop.hbase.CellUtil;
041import org.apache.hadoop.hbase.HBaseClassTestRule;
042import org.apache.hadoop.hbase.HBaseTestingUtility;
043import org.apache.hadoop.hbase.HColumnDescriptor;
044import org.apache.hadoop.hbase.HConstants;
045import org.apache.hadoop.hbase.HTableDescriptor;
046import org.apache.hadoop.hbase.KeyValue;
047import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
048import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
049import org.apache.hadoop.hbase.client.Delete;
050import org.apache.hadoop.hbase.client.Durability;
051import org.apache.hadoop.hbase.client.Put;
052import org.apache.hadoop.hbase.client.RegionInfo;
053import org.apache.hadoop.hbase.client.RegionInfoBuilder;
054import org.apache.hadoop.hbase.client.Scan;
055import org.apache.hadoop.hbase.client.Table;
056import org.apache.hadoop.hbase.client.TableDescriptor;
057import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
058import org.apache.hadoop.hbase.io.hfile.CacheConfig;
059import org.apache.hadoop.hbase.io.hfile.HFile;
060import org.apache.hadoop.hbase.io.hfile.HFileContext;
061import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
062import org.apache.hadoop.hbase.mob.MobConstants;
063import org.apache.hadoop.hbase.mob.MobFileCache;
064import org.apache.hadoop.hbase.mob.MobUtils;
065import org.apache.hadoop.hbase.testclassification.SmallTests;
066import org.apache.hadoop.hbase.util.Bytes;
067import org.apache.hadoop.hbase.util.CommonFSUtils;
068import org.apache.hadoop.hbase.util.Pair;
069import org.junit.After;
070import org.junit.ClassRule;
071import org.junit.Rule;
072import org.junit.Test;
073import org.junit.experimental.categories.Category;
074import org.junit.rules.TestName;
075import org.slf4j.Logger;
076import org.slf4j.LoggerFactory;
077
078/**
079 * Test mob store compaction
080 */
081@Category(SmallTests.class)
082public class TestMobStoreCompaction {
083
084  @ClassRule
085  public static final HBaseClassTestRule CLASS_RULE =
086      HBaseClassTestRule.forClass(TestMobStoreCompaction.class);
087
088  @Rule
089  public TestName name = new TestName();
090  static final Logger LOG = LoggerFactory.getLogger(TestMobStoreCompaction.class.getName());
091  private final static HBaseTestingUtility UTIL = new HBaseTestingUtility();
092  private Configuration conf = null;
093
094  private HRegion region = null;
095  private HTableDescriptor htd = null;
096  private HColumnDescriptor hcd = null;
097  private long mobCellThreshold = 1000;
098
099  private FileSystem fs;
100
101  private static final byte[] COLUMN_FAMILY = fam1;
102  private final byte[] STARTROW = Bytes.toBytes(START_KEY);
103  private int compactionThreshold;
104
105  private void init(Configuration conf, long mobThreshold) throws Exception {
106    this.conf = conf;
107    this.mobCellThreshold = mobThreshold;
108    HBaseTestingUtility UTIL = new HBaseTestingUtility(conf);
109
110    compactionThreshold = conf.getInt("hbase.hstore.compactionThreshold", 3);
111    htd = UTIL.createTableDescriptor(name.getMethodName());
112    hcd = new HColumnDescriptor(COLUMN_FAMILY);
113    hcd.setMobEnabled(true);
114    hcd.setMobThreshold(mobThreshold);
115    hcd.setMaxVersions(1);
116    htd.modifyFamily(hcd);
117
118    RegionInfo regionInfo = RegionInfoBuilder.newBuilder(htd.getTableName()).build();
119    region = HBaseTestingUtility
120        .createRegionAndWAL(regionInfo, UTIL.getDataTestDir(), conf, htd, new MobFileCache(conf));
121    fs = FileSystem.get(conf);
122  }
123
124  @After
125  public void tearDown() throws Exception {
126    region.close();
127    fs.delete(UTIL.getDataTestDir(), true);
128  }
129
130  /**
131   * During compaction, cells smaller than the threshold won't be affected.
132   */
133  @Test
134  public void testSmallerValue() throws Exception {
135    init(UTIL.getConfiguration(), 500);
136    byte[] dummyData = makeDummyData(300); // smaller than mob threshold
137    Table loader = new RegionAsTable(region);
138    // one hfile per row
139    for (int i = 0; i < compactionThreshold; i++) {
140      Put p = createPut(i, dummyData);
141      loader.put(p);
142      region.flush(true);
143    }
144    assertEquals("Before compaction: store files", compactionThreshold, countStoreFiles());
145    assertEquals("Before compaction: mob file count", 0, countMobFiles());
146    assertEquals("Before compaction: rows", compactionThreshold, UTIL.countRows(region));
147    assertEquals("Before compaction: mob rows", 0, countMobRows());
148
149    region.compactStores();
150
151    assertEquals("After compaction: store files", 1, countStoreFiles());
152    assertEquals("After compaction: mob file count", 0, countMobFiles());
153    assertEquals("After compaction: referenced mob file count", 0, countReferencedMobFiles());
154    assertEquals("After compaction: rows", compactionThreshold, UTIL.countRows(region));
155    assertEquals("After compaction: mob rows", 0, countMobRows());
156  }
157
158  /**
159   * During compaction, the mob threshold size is changed.
160   */
161  @Test
162  public void testLargerValue() throws Exception {
163    init(UTIL.getConfiguration(), 200);
164    byte[] dummyData = makeDummyData(300); // larger than mob threshold
165    Table loader = new RegionAsTable(region);
166    for (int i = 0; i < compactionThreshold; i++) {
167      Put p = createPut(i, dummyData);
168      loader.put(p);
169      region.flush(true);
170    }
171    assertEquals("Before compaction: store files", compactionThreshold, countStoreFiles());
172    assertEquals("Before compaction: mob file count", compactionThreshold, countMobFiles());
173    assertEquals("Before compaction: rows", compactionThreshold, UTIL.countRows(region));
174    assertEquals("Before compaction: mob rows", compactionThreshold, countMobRows());
175    assertEquals("Before compaction: number of mob cells", compactionThreshold,
176        countMobCellsInMetadata());
177    // Change the threshold larger than the data size
178    setMobThreshold(region, COLUMN_FAMILY, 500);
179    region.initialize();
180    region.compactStores();
181
182    assertEquals("After compaction: store files", 1, countStoreFiles());
183    assertEquals("After compaction: mob file count", compactionThreshold, countMobFiles());
184    assertEquals("After compaction: referenced mob file count", 0, countReferencedMobFiles());
185    assertEquals("After compaction: rows", compactionThreshold, UTIL.countRows(region));
186    assertEquals("After compaction: mob rows", 0, countMobRows());
187  }
188
189  private static HRegion setMobThreshold(HRegion region, byte[] cfName, long modThreshold) {
190    ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder
191            .newBuilder(region.getTableDescriptor().getColumnFamily(cfName))
192            .setMobThreshold(modThreshold)
193            .build();
194    TableDescriptor td = TableDescriptorBuilder
195            .newBuilder(region.getTableDescriptor())
196            .removeColumnFamily(cfName)
197            .setColumnFamily(cfd)
198            .build();
199    region.setTableDescriptor(td);
200    return region;
201  }
202
203  /**
204   * This test will first generate store files, then bulk load them and trigger the compaction.
205   * When compaction, the cell value will be larger than the threshold.
206   */
207  @Test
208  public void testMobCompactionWithBulkload() throws Exception {
209    // The following will produce store files of 600.
210    init(UTIL.getConfiguration(), 300);
211    byte[] dummyData = makeDummyData(600);
212
213    Path hbaseRootDir = CommonFSUtils.getRootDir(conf);
214    Path basedir = new Path(hbaseRootDir, htd.getNameAsString());
215    List<Pair<byte[], String>> hfiles = new ArrayList<>(1);
216    for (int i = 0; i < compactionThreshold; i++) {
217      Path hpath = new Path(basedir, "hfile" + i);
218      hfiles.add(Pair.newPair(COLUMN_FAMILY, hpath.toString()));
219      createHFile(hpath, i, dummyData);
220    }
221
222    // The following will bulk load the above generated store files and compact, with 600(fileSize)
223    // > 300(threshold)
224    Map<byte[], List<Path>> map = region.bulkLoadHFiles(hfiles, true, null);
225    assertTrue("Bulkload result:", !map.isEmpty());
226    assertEquals("Before compaction: store files", compactionThreshold, countStoreFiles());
227    assertEquals("Before compaction: mob file count", 0, countMobFiles());
228    assertEquals("Before compaction: rows", compactionThreshold, UTIL.countRows(region));
229    assertEquals("Before compaction: mob rows", 0, countMobRows());
230    assertEquals("Before compaction: referenced mob file count", 0, countReferencedMobFiles());
231
232    region.compactStores();
233
234    assertEquals("After compaction: store files", 1, countStoreFiles());
235    assertEquals("After compaction: mob file count:", 1, countMobFiles());
236    assertEquals("After compaction: rows", compactionThreshold, UTIL.countRows(region));
237    assertEquals("After compaction: mob rows", compactionThreshold, countMobRows());
238    assertEquals("After compaction: referenced mob file count", 1, countReferencedMobFiles());
239    assertEquals("After compaction: number of mob cells", compactionThreshold,
240        countMobCellsInMetadata());
241  }
242
243  @Test
244  public void testMajorCompactionAfterDelete() throws Exception {
245    init(UTIL.getConfiguration(), 100);
246    byte[] dummyData = makeDummyData(200); // larger than mob threshold
247    Table loader = new RegionAsTable(region);
248    // create hfiles and mob hfiles but don't trigger compaction
249    int numHfiles = compactionThreshold - 1;
250    byte[] deleteRow = Bytes.add(STARTROW, Bytes.toBytes(0));
251    for (int i = 0; i < numHfiles; i++) {
252      Put p = createPut(i, dummyData);
253      loader.put(p);
254      region.flush(true);
255    }
256    assertEquals("Before compaction: store files", numHfiles, countStoreFiles());
257    assertEquals("Before compaction: mob file count", numHfiles, countMobFiles());
258    assertEquals("Before compaction: rows", numHfiles, UTIL.countRows(region));
259    assertEquals("Before compaction: mob rows", numHfiles, countMobRows());
260    assertEquals("Before compaction: number of mob cells", numHfiles, countMobCellsInMetadata());
261    // now let's delete some cells that contain mobs
262    Delete delete = new Delete(deleteRow);
263    delete.addFamily(COLUMN_FAMILY);
264    region.delete(delete);
265    region.flush(true);
266
267    assertEquals("Before compaction: store files", numHfiles + 1, countStoreFiles());
268    assertEquals("Before compaction: mob files", numHfiles, countMobFiles());
269    // region.compactStores();
270    region.compact(true);
271    assertEquals("After compaction: store files", 1, countStoreFiles());
272    // still have original mob hfiles and now added a mob del file
273    assertEquals("After compaction: mob files", numHfiles + 1, countMobFiles());
274
275    Scan scan = new Scan();
276    scan.setRaw(true);
277    InternalScanner scanner = region.getScanner(scan);
278    List<Cell> results = new ArrayList<>();
279    scanner.next(results);
280    int deleteCount = 0;
281    while (!results.isEmpty()) {
282      for (Cell c : results) {
283        if (c.getTypeByte() == KeyValue.Type.DeleteFamily.getCode()) {
284          deleteCount++;
285          assertTrue(Bytes.equals(CellUtil.cloneRow(c), deleteRow));
286        }
287      }
288      results.clear();
289      scanner.next(results);
290    }
291    // assert the delete mark is retained after the major compaction
292    assertEquals(1, deleteCount);
293    scanner.close();
294    // assert the deleted cell is not counted
295    assertEquals("The cells in mob files", numHfiles - 1, countMobCellsInMobFiles(1));
296  }
297
298  private int countStoreFiles() throws IOException {
299    HStore store = region.getStore(COLUMN_FAMILY);
300    return store.getStorefilesCount();
301  }
302
303  private int countMobFiles() throws IOException {
304    Path mobDirPath = MobUtils.getMobFamilyPath(conf, htd.getTableName(), hcd.getNameAsString());
305    if (fs.exists(mobDirPath)) {
306      FileStatus[] files = UTIL.getTestFileSystem().listStatus(mobDirPath);
307      return files.length;
308    }
309    return 0;
310  }
311
312  private long countMobCellsInMetadata() throws IOException {
313    long mobCellsCount = 0;
314    Path mobDirPath = MobUtils.getMobFamilyPath(conf, htd.getTableName(), hcd.getNameAsString());
315    Configuration copyOfConf = new Configuration(conf);
316    copyOfConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f);
317    CacheConfig cacheConfig = new CacheConfig(copyOfConf);
318    if (fs.exists(mobDirPath)) {
319      FileStatus[] files = UTIL.getTestFileSystem().listStatus(mobDirPath);
320      for (FileStatus file : files) {
321        HStoreFile sf = new HStoreFile(fs, file.getPath(), conf, cacheConfig, BloomType.NONE, true);
322        sf.initReader();
323        Map<byte[], byte[]> fileInfo = sf.getReader().loadFileInfo();
324        byte[] count = fileInfo.get(MOB_CELLS_COUNT);
325        assertTrue(count != null);
326        mobCellsCount += Bytes.toLong(count);
327      }
328    }
329    return mobCellsCount;
330  }
331
332  private Put createPut(int rowIdx, byte[] dummyData) throws IOException {
333    Put p = new Put(Bytes.add(STARTROW, Bytes.toBytes(rowIdx)));
334    p.setDurability(Durability.SKIP_WAL);
335    p.addColumn(COLUMN_FAMILY, Bytes.toBytes("colX"), dummyData);
336    return p;
337  }
338
339  /**
340   * Create an HFile with the given number of bytes
341   */
342  private void createHFile(Path path, int rowIdx, byte[] dummyData) throws IOException {
343    HFileContext meta = new HFileContextBuilder().build();
344    HFile.Writer writer = HFile.getWriterFactory(conf, new CacheConfig(conf)).withPath(fs, path)
345        .withFileContext(meta).create();
346    long now = System.currentTimeMillis();
347    try {
348      KeyValue kv = new KeyValue(Bytes.add(STARTROW, Bytes.toBytes(rowIdx)), COLUMN_FAMILY,
349          Bytes.toBytes("colX"), now, dummyData);
350      writer.append(kv);
351    } finally {
352      writer.appendFileInfo(BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
353      writer.close();
354    }
355  }
356
357  private int countMobRows() throws IOException {
358    Scan scan = new Scan();
359    // Do not retrieve the mob data when scanning
360    scan.setAttribute(MobConstants.MOB_SCAN_RAW, Bytes.toBytes(Boolean.TRUE));
361    InternalScanner scanner = region.getScanner(scan);
362
363    int scannedCount = 0;
364    List<Cell> results = new ArrayList<>();
365    boolean hasMore = true;
366    while (hasMore) {
367      hasMore = scanner.next(results);
368      for (Cell c : results) {
369        if (MobUtils.isMobReferenceCell(c)) {
370          scannedCount++;
371        }
372      }
373      results.clear();
374    }
375    scanner.close();
376
377    return scannedCount;
378  }
379
380  private byte[] makeDummyData(int size) {
381    byte[] dummyData = new byte[size];
382    new Random().nextBytes(dummyData);
383    return dummyData;
384  }
385
386  private int countReferencedMobFiles() throws IOException {
387    Scan scan = new Scan();
388    // Do not retrieve the mob data when scanning
389    scan.setAttribute(MobConstants.MOB_SCAN_RAW, Bytes.toBytes(Boolean.TRUE));
390    InternalScanner scanner = region.getScanner(scan);
391
392    List<Cell> kvs = new ArrayList<>();
393    boolean hasMore = true;
394    String fileName;
395    Set<String> files = new HashSet<>();
396    do {
397      kvs.clear();
398      hasMore = scanner.next(kvs);
399      for (Cell kv : kvs) {
400        if (!MobUtils.isMobReferenceCell(kv)) {
401          continue;
402        }
403        if (!MobUtils.hasValidMobRefCellValue(kv)) {
404          continue;
405        }
406        int size = MobUtils.getMobValueLength(kv);
407        if (size <= mobCellThreshold) {
408          continue;
409        }
410        fileName = MobUtils.getMobFileName(kv);
411        if (fileName.isEmpty()) {
412          continue;
413        }
414        files.add(fileName);
415        Path familyPath = MobUtils.getMobFamilyPath(conf, htd.getTableName(),
416            hcd.getNameAsString());
417        assertTrue(fs.exists(new Path(familyPath, fileName)));
418      }
419    } while (hasMore);
420
421    scanner.close();
422
423    return files.size();
424  }
425
426  private int countMobCellsInMobFiles(int expectedNumDelfiles) throws IOException {
427    Configuration copyOfConf = new Configuration(conf);
428    copyOfConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f);
429    CacheConfig cacheConfig = new CacheConfig(copyOfConf);
430    Path mobDirPath = MobUtils.getMobFamilyPath(conf, htd.getTableName(), hcd.getNameAsString());
431    List<HStoreFile> sfs = new ArrayList<>();
432    int numDelfiles = 0;
433    int size = 0;
434    if (fs.exists(mobDirPath)) {
435      for (FileStatus f : fs.listStatus(mobDirPath)) {
436        HStoreFile sf = new HStoreFile(fs, f.getPath(), conf, cacheConfig, BloomType.NONE, true);
437        sfs.add(sf);
438        if (StoreFileInfo.isDelFile(sf.getPath())) {
439          numDelfiles++;
440        }
441      }
442
443      List<StoreFileScanner> scanners = StoreFileScanner.getScannersForStoreFiles(sfs, false, true,
444        false, false, HConstants.LATEST_TIMESTAMP);
445      long timeToPurgeDeletes = Math.max(conf.getLong("hbase.hstore.time.to.purge.deletes", 0), 0);
446      long ttl = HStore.determineTTLFromFamily(hcd);
447      ScanInfo scanInfo = new ScanInfo(copyOfConf, hcd, ttl, timeToPurgeDeletes,
448        CellComparatorImpl.COMPARATOR);
449      StoreScanner scanner = new StoreScanner(scanInfo, ScanType.COMPACT_DROP_DELETES, scanners);
450      try {
451        size += UTIL.countRows(scanner);
452      } finally {
453        scanner.close();
454      }
455    }
456    // assert the number of the existing del files
457    assertEquals(expectedNumDelfiles, numDelfiles);
458    return size;
459  }
460}