001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.mob;
019
020import static org.apache.hadoop.hbase.HBaseTestingUtility.START_KEY;
021import static org.apache.hadoop.hbase.HBaseTestingUtility.fam1;
022import static org.apache.hadoop.hbase.regionserver.HStoreFile.BULKLOAD_TIME_KEY;
023import static org.apache.hadoop.hbase.regionserver.HStoreFile.MOB_CELLS_COUNT;
024import static org.junit.Assert.assertEquals;
025import static org.junit.Assert.assertTrue;
026
027import java.io.IOException;
028import java.util.ArrayList;
029import java.util.Arrays;
030import java.util.Collection;
031import java.util.HashSet;
032import java.util.List;
033import java.util.Map;
034import java.util.Optional;
035import java.util.Set;
036import java.util.UUID;
037import org.apache.hadoop.conf.Configuration;
038import org.apache.hadoop.fs.FileStatus;
039import org.apache.hadoop.fs.FileSystem;
040import org.apache.hadoop.fs.Path;
041import org.apache.hadoop.hbase.Cell;
042import org.apache.hadoop.hbase.HBaseClassTestRule;
043import org.apache.hadoop.hbase.HBaseTestingUtility;
044import org.apache.hadoop.hbase.HColumnDescriptor;
045import org.apache.hadoop.hbase.HConstants;
046import org.apache.hadoop.hbase.HTableDescriptor;
047import org.apache.hadoop.hbase.KeyValue;
048import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
049import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
050import org.apache.hadoop.hbase.client.Delete;
051import org.apache.hadoop.hbase.client.Durability;
052import org.apache.hadoop.hbase.client.Put;
053import org.apache.hadoop.hbase.client.RegionInfo;
054import org.apache.hadoop.hbase.client.RegionInfoBuilder;
055import org.apache.hadoop.hbase.client.Scan;
056import org.apache.hadoop.hbase.client.Table;
057import org.apache.hadoop.hbase.client.TableDescriptor;
058import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
059import org.apache.hadoop.hbase.io.hfile.CacheConfig;
060import org.apache.hadoop.hbase.io.hfile.HFile;
061import org.apache.hadoop.hbase.io.hfile.HFileContext;
062import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
063import org.apache.hadoop.hbase.regionserver.BloomType;
064import org.apache.hadoop.hbase.regionserver.HRegion;
065import org.apache.hadoop.hbase.regionserver.HStore;
066import org.apache.hadoop.hbase.regionserver.HStoreFile;
067import org.apache.hadoop.hbase.regionserver.InternalScanner;
068import org.apache.hadoop.hbase.regionserver.RegionAsTable;
069import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
070import org.apache.hadoop.hbase.regionserver.compactions.CompactionLifeCycleTracker;
071import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
072import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController;
073import org.apache.hadoop.hbase.security.User;
074import org.apache.hadoop.hbase.testclassification.MediumTests;
075import org.apache.hadoop.hbase.util.Bytes;
076import org.apache.hadoop.hbase.util.CommonFSUtils;
077import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
078import org.apache.hadoop.hbase.util.Pair;
079import org.junit.After;
080import org.junit.ClassRule;
081import org.junit.Rule;
082import org.junit.Test;
083import org.junit.experimental.categories.Category;
084import org.junit.rules.TestName;
085import org.junit.runner.RunWith;
086import org.junit.runners.Parameterized;
087import org.slf4j.Logger;
088import org.slf4j.LoggerFactory;
089
090/**
091 * Test mob store compaction
092 */
093@RunWith(Parameterized.class)
094@Category(MediumTests.class)
095public class TestMobStoreCompaction {
096
097  @ClassRule
098  public static final HBaseClassTestRule CLASS_RULE =
099    HBaseClassTestRule.forClass(TestMobStoreCompaction.class);
100
101  @Rule
102  public TestName name = new TestName();
103  static final Logger LOG = LoggerFactory.getLogger(TestMobStoreCompaction.class.getName());
104  private final static HBaseTestingUtility UTIL = new HBaseTestingUtility();
105  private Configuration conf = null;
106
107  private HRegion region = null;
108  private HTableDescriptor htd = null;
109  private HColumnDescriptor hcd = null;
110  private long mobCellThreshold = 1000;
111
112  private FileSystem fs;
113
114  private static final byte[] COLUMN_FAMILY = fam1;
115  private final byte[] STARTROW = Bytes.toBytes(START_KEY);
116  private int compactionThreshold;
117
118  private Boolean useFileBasedSFT;
119
120  public TestMobStoreCompaction(Boolean useFileBasedSFT) {
121    this.useFileBasedSFT = useFileBasedSFT;
122  }
123
124  @Parameterized.Parameters
125  public static Collection<Boolean> data() {
126    Boolean[] data = { false, true };
127    return Arrays.asList(data);
128  }
129
130  private void init(Configuration conf, long mobThreshold) throws Exception {
131    if (useFileBasedSFT) {
132      conf.set(StoreFileTrackerFactory.TRACKER_IMPL,
133        "org.apache.hadoop.hbase.regionserver.storefiletracker.FileBasedStoreFileTracker");
134    }
135
136    this.conf = conf;
137    this.mobCellThreshold = mobThreshold;
138    HBaseTestingUtility UTIL = new HBaseTestingUtility(conf);
139
140    compactionThreshold = conf.getInt("hbase.hstore.compactionThreshold", 3);
141    htd = UTIL.createTableDescriptor(TestMobUtils.getTableName(name));
142    hcd = new HColumnDescriptor(COLUMN_FAMILY);
143    hcd.setMobEnabled(true);
144    hcd.setMobThreshold(mobThreshold);
145    hcd.setMaxVersions(1);
146    htd.modifyFamily(hcd);
147
148    RegionInfo regionInfo = RegionInfoBuilder.newBuilder(htd.getTableName()).build();
149    region = HBaseTestingUtility.createRegionAndWAL(regionInfo, UTIL.getDataTestDir(), conf, htd,
150      new MobFileCache(conf));
151    fs = FileSystem.get(conf);
152  }
153
154  @After
155  public void tearDown() throws Exception {
156    region.close();
157    fs.delete(UTIL.getDataTestDir(), true);
158  }
159
160  /**
161   * During compaction, cells smaller than the threshold won't be affected.
162   */
163  @Test
164  public void testSmallerValue() throws Exception {
165    init(UTIL.getConfiguration(), 500);
166    byte[] dummyData = makeDummyData(300); // smaller than mob threshold
167    Table loader = new RegionAsTable(region);
168    // one hfile per row
169    for (int i = 0; i < compactionThreshold; i++) {
170      Put p = createPut(i, dummyData);
171      loader.put(p);
172      region.flush(true);
173    }
174    assertEquals("Before compaction: store files", compactionThreshold, countStoreFiles());
175    assertEquals("Before compaction: mob file count", 0, countMobFiles());
176    assertEquals("Before compaction: rows", compactionThreshold, UTIL.countRows(region));
177    assertEquals("Before compaction: mob rows", 0, countMobRows());
178
179    region.compactStores();
180
181    assertEquals("After compaction: store files", 1, countStoreFiles());
182    assertEquals("After compaction: mob file count", 0, countMobFiles());
183    assertEquals("After compaction: referenced mob file count", 0, countReferencedMobFiles());
184    assertEquals("After compaction: rows", compactionThreshold, UTIL.countRows(region));
185    assertEquals("After compaction: mob rows", 0, countMobRows());
186  }
187
188  /**
189   * During compaction, the mob threshold size is changed.
190   */
191  @Test
192  public void testLargerValue() throws Exception {
193    init(UTIL.getConfiguration(), 200);
194    byte[] dummyData = makeDummyData(300); // larger than mob threshold
195    Table loader = new RegionAsTable(region);
196    for (int i = 0; i < compactionThreshold; i++) {
197      Put p = createPut(i, dummyData);
198      loader.put(p);
199      region.flush(true);
200    }
201    assertEquals("Before compaction: store files", compactionThreshold, countStoreFiles());
202    assertEquals("Before compaction: mob file count", compactionThreshold, countMobFiles());
203    assertEquals("Before compaction: rows", compactionThreshold, UTIL.countRows(region));
204    assertEquals("Before compaction: mob rows", compactionThreshold, countMobRows());
205    assertEquals("Before compaction: number of mob cells", compactionThreshold,
206      countMobCellsInMetadata());
207    // Change the threshold larger than the data size
208    setMobThreshold(region, COLUMN_FAMILY, 500);
209    region.initialize();
210
211    List<HStore> stores = region.getStores();
212    for (HStore store : stores) {
213      // Force major compaction
214      store.triggerMajorCompaction();
215      Optional<CompactionContext> context = store.requestCompaction(HStore.PRIORITY_USER,
216        CompactionLifeCycleTracker.DUMMY, User.getCurrent());
217      if (!context.isPresent()) {
218        continue;
219      }
220      region.compact(context.get(), store, NoLimitThroughputController.INSTANCE, User.getCurrent());
221    }
222
223    assertEquals("After compaction: store files", 1, countStoreFiles());
224    assertEquals("After compaction: mob file count", compactionThreshold, countMobFiles());
225    assertEquals("After compaction: referenced mob file count", 0, countReferencedMobFiles());
226    assertEquals("After compaction: rows", compactionThreshold, UTIL.countRows(region));
227    assertEquals("After compaction: mob rows", 0, countMobRows());
228  }
229
230  private static HRegion setMobThreshold(HRegion region, byte[] cfName, long modThreshold) {
231    ColumnFamilyDescriptor cfd =
232      ColumnFamilyDescriptorBuilder.newBuilder(region.getTableDescriptor().getColumnFamily(cfName))
233        .setMobThreshold(modThreshold).build();
234    TableDescriptor td = TableDescriptorBuilder.newBuilder(region.getTableDescriptor())
235      .removeColumnFamily(cfName).setColumnFamily(cfd).build();
236    region.setTableDescriptor(td);
237    return region;
238  }
239
240  /**
241   * This test will first generate store files, then bulk load them and trigger the compaction. When
242   * compaction, the cell value will be larger than the threshold.
243   */
244  @Test
245  public void testMobCompactionWithBulkload() throws Exception {
246    // The following will produce store files of 600.
247    init(UTIL.getConfiguration(), 300);
248    byte[] dummyData = makeDummyData(600);
249
250    Path hbaseRootDir = CommonFSUtils.getRootDir(conf);
251    Path basedir = new Path(hbaseRootDir, htd.getNameAsString());
252    List<Pair<byte[], String>> hfiles = new ArrayList<>(1);
253    for (int i = 0; i < compactionThreshold; i++) {
254      Path hpath = new Path(basedir, UUID.randomUUID().toString().replace("-", ""));
255      hfiles.add(Pair.newPair(COLUMN_FAMILY, hpath.toString()));
256      createHFile(hpath, i, dummyData);
257    }
258
259    // The following will bulk load the above generated store files and compact, with 600(fileSize)
260    // > 300(threshold)
261    Map<byte[], List<Path>> map = region.bulkLoadHFiles(hfiles, true, null);
262    assertTrue("Bulkload result:", !map.isEmpty());
263    assertEquals("Before compaction: store files", compactionThreshold, countStoreFiles());
264    assertEquals("Before compaction: mob file count", 0, countMobFiles());
265    assertEquals("Before compaction: rows", compactionThreshold, UTIL.countRows(region));
266    assertEquals("Before compaction: mob rows", 0, countMobRows());
267    assertEquals("Before compaction: referenced mob file count", 0, countReferencedMobFiles());
268
269    region.compactStores();
270
271    assertEquals("After compaction: store files", 1, countStoreFiles());
272    assertEquals("After compaction: mob file count:", 1, countMobFiles());
273    assertEquals("After compaction: rows", compactionThreshold, UTIL.countRows(region));
274    assertEquals("After compaction: mob rows", compactionThreshold, countMobRows());
275    assertEquals("After compaction: referenced mob file count", 1, countReferencedMobFiles());
276    assertEquals("After compaction: number of mob cells", compactionThreshold,
277      countMobCellsInMetadata());
278  }
279
280  @Test
281  public void testMajorCompactionAfterDelete() throws Exception {
282    init(UTIL.getConfiguration(), 100);
283    byte[] dummyData = makeDummyData(200); // larger than mob threshold
284    Table loader = new RegionAsTable(region);
285    // create hfiles and mob hfiles but don't trigger compaction
286    int numHfiles = compactionThreshold - 1;
287    byte[] deleteRow = Bytes.add(STARTROW, Bytes.toBytes(0));
288    for (int i = 0; i < numHfiles; i++) {
289      Put p = createPut(i, dummyData);
290      loader.put(p);
291      region.flush(true);
292    }
293    assertEquals("Before compaction: store files", numHfiles, countStoreFiles());
294    assertEquals("Before compaction: mob file count", numHfiles, countMobFiles());
295    assertEquals("Before compaction: rows", numHfiles, UTIL.countRows(region));
296    assertEquals("Before compaction: mob rows", numHfiles, countMobRows());
297    assertEquals("Before compaction: number of mob cells", numHfiles, countMobCellsInMetadata());
298    // now let's delete some cells that contain mobs
299    Delete delete = new Delete(deleteRow);
300    delete.addFamily(COLUMN_FAMILY);
301    region.delete(delete);
302    region.flush(true);
303
304    assertEquals("Before compaction: store files", numHfiles + 1, countStoreFiles());
305    assertEquals("Before compaction: mob files", numHfiles, countMobFiles());
306    // region.compactStores();
307    region.compact(true);
308    assertEquals("After compaction: store files", 1, countStoreFiles());
309  }
310
311  private int countStoreFiles() throws IOException {
312    HStore store = region.getStore(COLUMN_FAMILY);
313    return store.getStorefilesCount();
314  }
315
316  private int countMobFiles() throws IOException {
317    Path mobDirPath = MobUtils.getMobFamilyPath(conf, htd.getTableName(), hcd.getNameAsString());
318    if (fs.exists(mobDirPath)) {
319      FileStatus[] files = UTIL.getTestFileSystem().listStatus(mobDirPath);
320      return files.length;
321    }
322    return 0;
323  }
324
325  private long countMobCellsInMetadata() throws IOException {
326    long mobCellsCount = 0;
327    Path mobDirPath = MobUtils.getMobFamilyPath(conf, htd.getTableName(), hcd.getNameAsString());
328    Configuration copyOfConf = new Configuration(conf);
329    copyOfConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f);
330    CacheConfig cacheConfig = new CacheConfig(copyOfConf);
331    if (fs.exists(mobDirPath)) {
332      FileStatus[] files = UTIL.getTestFileSystem().listStatus(mobDirPath);
333      for (FileStatus file : files) {
334        HStoreFile sf = new HStoreFile(fs, file.getPath(), conf, cacheConfig, BloomType.NONE, true);
335        sf.initReader();
336        Map<byte[], byte[]> fileInfo = sf.getReader().loadFileInfo();
337        byte[] count = fileInfo.get(MOB_CELLS_COUNT);
338        assertTrue(count != null);
339        mobCellsCount += Bytes.toLong(count);
340      }
341    }
342    return mobCellsCount;
343  }
344
345  private Put createPut(int rowIdx, byte[] dummyData) throws IOException {
346    Put p = new Put(Bytes.add(STARTROW, Bytes.toBytes(rowIdx)));
347    p.setDurability(Durability.SKIP_WAL);
348    p.addColumn(COLUMN_FAMILY, Bytes.toBytes("colX"), dummyData);
349    return p;
350  }
351
352  /**
353   * Create an HFile with the given number of bytes
354   */
355  private void createHFile(Path path, int rowIdx, byte[] dummyData) throws IOException {
356    HFileContext meta = new HFileContextBuilder().build();
357    HFile.Writer writer = HFile.getWriterFactory(conf, new CacheConfig(conf)).withPath(fs, path)
358      .withFileContext(meta).create();
359    long now = EnvironmentEdgeManager.currentTime();
360    try {
361      KeyValue kv = new KeyValue(Bytes.add(STARTROW, Bytes.toBytes(rowIdx)), COLUMN_FAMILY,
362        Bytes.toBytes("colX"), now, dummyData);
363      writer.append(kv);
364    } finally {
365      writer.appendFileInfo(BULKLOAD_TIME_KEY, Bytes.toBytes(EnvironmentEdgeManager.currentTime()));
366      writer.close();
367    }
368  }
369
370  private int countMobRows() throws IOException {
371    Scan scan = new Scan();
372    // Do not retrieve the mob data when scanning
373    scan.setAttribute(MobConstants.MOB_SCAN_RAW, Bytes.toBytes(Boolean.TRUE));
374    InternalScanner scanner = region.getScanner(scan);
375
376    int scannedCount = 0;
377    List<Cell> results = new ArrayList<>();
378    boolean hasMore = true;
379    while (hasMore) {
380      hasMore = scanner.next(results);
381      for (Cell c : results) {
382        if (MobUtils.isMobReferenceCell(c)) {
383          scannedCount++;
384        }
385      }
386      results.clear();
387    }
388    scanner.close();
389
390    return scannedCount;
391  }
392
393  private byte[] makeDummyData(int size) {
394    byte[] dummyData = new byte[size];
395    Bytes.random(dummyData);
396    return dummyData;
397  }
398
399  private int countReferencedMobFiles() throws IOException {
400    Scan scan = new Scan();
401    // Do not retrieve the mob data when scanning
402    scan.setAttribute(MobConstants.MOB_SCAN_RAW, Bytes.toBytes(Boolean.TRUE));
403    InternalScanner scanner = region.getScanner(scan);
404
405    List<Cell> kvs = new ArrayList<>();
406    boolean hasMore = true;
407    String fileName;
408    Set<String> files = new HashSet<>();
409    do {
410      kvs.clear();
411      hasMore = scanner.next(kvs);
412      for (Cell kv : kvs) {
413        if (!MobUtils.isMobReferenceCell(kv)) {
414          continue;
415        }
416        if (!MobUtils.hasValidMobRefCellValue(kv)) {
417          continue;
418        }
419        int size = MobUtils.getMobValueLength(kv);
420        if (size <= mobCellThreshold) {
421          continue;
422        }
423        fileName = MobUtils.getMobFileName(kv);
424        if (fileName.isEmpty()) {
425          continue;
426        }
427        files.add(fileName);
428        Path familyPath =
429          MobUtils.getMobFamilyPath(conf, htd.getTableName(), hcd.getNameAsString());
430        assertTrue(fs.exists(new Path(familyPath, fileName)));
431      }
432    } while (hasMore);
433
434    scanner.close();
435
436    return files.size();
437  }
438}