001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.mob;
019
020import static org.apache.hadoop.hbase.HBaseTestingUtil.START_KEY;
021import static org.apache.hadoop.hbase.HBaseTestingUtil.fam1;
022import static org.apache.hadoop.hbase.regionserver.HStoreFile.BULKLOAD_TIME_KEY;
023import static org.apache.hadoop.hbase.regionserver.HStoreFile.MOB_CELLS_COUNT;
024import static org.junit.Assert.assertEquals;
025import static org.junit.Assert.assertTrue;
026
027import java.io.IOException;
028import java.util.ArrayList;
029import java.util.Arrays;
030import java.util.Collection;
031import java.util.HashSet;
032import java.util.List;
033import java.util.Map;
034import java.util.Optional;
035import java.util.Set;
036import java.util.UUID;
037import org.apache.hadoop.conf.Configuration;
038import org.apache.hadoop.fs.FileStatus;
039import org.apache.hadoop.fs.FileSystem;
040import org.apache.hadoop.fs.Path;
041import org.apache.hadoop.hbase.Cell;
042import org.apache.hadoop.hbase.HBaseClassTestRule;
043import org.apache.hadoop.hbase.HBaseTestingUtil;
044import org.apache.hadoop.hbase.HConstants;
045import org.apache.hadoop.hbase.KeyValue;
046import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
047import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
048import org.apache.hadoop.hbase.client.Delete;
049import org.apache.hadoop.hbase.client.Durability;
050import org.apache.hadoop.hbase.client.Put;
051import org.apache.hadoop.hbase.client.RegionInfo;
052import org.apache.hadoop.hbase.client.RegionInfoBuilder;
053import org.apache.hadoop.hbase.client.Scan;
054import org.apache.hadoop.hbase.client.Table;
055import org.apache.hadoop.hbase.client.TableDescriptor;
056import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
057import org.apache.hadoop.hbase.io.hfile.CacheConfig;
058import org.apache.hadoop.hbase.io.hfile.HFile;
059import org.apache.hadoop.hbase.io.hfile.HFileContext;
060import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
061import org.apache.hadoop.hbase.regionserver.BloomType;
062import org.apache.hadoop.hbase.regionserver.HRegion;
063import org.apache.hadoop.hbase.regionserver.HStore;
064import org.apache.hadoop.hbase.regionserver.HStoreFile;
065import org.apache.hadoop.hbase.regionserver.InternalScanner;
066import org.apache.hadoop.hbase.regionserver.RegionAsTable;
067import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
068import org.apache.hadoop.hbase.regionserver.compactions.CompactionLifeCycleTracker;
069import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
070import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController;
071import org.apache.hadoop.hbase.security.User;
072import org.apache.hadoop.hbase.testclassification.MediumTests;
073import org.apache.hadoop.hbase.util.Bytes;
074import org.apache.hadoop.hbase.util.CommonFSUtils;
075import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
076import org.apache.hadoop.hbase.util.Pair;
077import org.junit.After;
078import org.junit.ClassRule;
079import org.junit.Rule;
080import org.junit.Test;
081import org.junit.experimental.categories.Category;
082import org.junit.rules.TestName;
083import org.junit.runner.RunWith;
084import org.junit.runners.Parameterized;
085import org.slf4j.Logger;
086import org.slf4j.LoggerFactory;
087
088/**
089 * Test mob store compaction
090 */
091@RunWith(Parameterized.class)
092@Category(MediumTests.class)
093public class TestMobStoreCompaction {
094
095  @ClassRule
096  public static final HBaseClassTestRule CLASS_RULE =
097    HBaseClassTestRule.forClass(TestMobStoreCompaction.class);
098
099  @Rule
100  public TestName name = new TestName();
101  static final Logger LOG = LoggerFactory.getLogger(TestMobStoreCompaction.class.getName());
102  private final static HBaseTestingUtil UTIL = new HBaseTestingUtil();
103  private Configuration conf = null;
104
105  private HRegion region = null;
106  private TableDescriptor tableDescriptor = null;
107  private ColumnFamilyDescriptor familyDescriptor = null;
108  private long mobCellThreshold = 1000;
109
110  private FileSystem fs;
111
112  private static final byte[] COLUMN_FAMILY = fam1;
113  private final byte[] STARTROW = Bytes.toBytes(START_KEY);
114  private int compactionThreshold;
115
116  private Boolean useFileBasedSFT;
117
118  public TestMobStoreCompaction(Boolean useFileBasedSFT) {
119    this.useFileBasedSFT = useFileBasedSFT;
120  }
121
122  @Parameterized.Parameters
123  public static Collection<Boolean> data() {
124    Boolean[] data = { false, true };
125    return Arrays.asList(data);
126  }
127
128  private void init(Configuration conf, long mobThreshold) throws Exception {
129    if (useFileBasedSFT) {
130      conf.set(StoreFileTrackerFactory.TRACKER_IMPL,
131        "org.apache.hadoop.hbase.regionserver.storefiletracker.FileBasedStoreFileTracker");
132    }
133
134    this.conf = conf;
135    this.mobCellThreshold = mobThreshold;
136
137    HBaseTestingUtil UTIL = new HBaseTestingUtil(conf);
138
139    compactionThreshold = conf.getInt("hbase.hstore.compactionThreshold", 3);
140    familyDescriptor = ColumnFamilyDescriptorBuilder.newBuilder(COLUMN_FAMILY).setMobEnabled(true)
141      .setMobThreshold(mobThreshold).setMaxVersions(1).build();
142    tableDescriptor = UTIL.createModifyableTableDescriptor(TestMobUtils.getTableName(name))
143      .modifyColumnFamily(familyDescriptor).build();
144
145    RegionInfo regionInfo = RegionInfoBuilder.newBuilder(tableDescriptor.getTableName()).build();
146    region = HBaseTestingUtil.createRegionAndWAL(regionInfo, UTIL.getDataTestDir(), conf,
147      tableDescriptor, new MobFileCache(conf));
148    fs = FileSystem.get(conf);
149  }
150
151  @After
152  public void tearDown() throws Exception {
153    region.close();
154    fs.delete(UTIL.getDataTestDir(), true);
155  }
156
157  /**
158   * During compaction, cells smaller than the threshold won't be affected.
159   */
160  @Test
161  public void testSmallerValue() throws Exception {
162    init(UTIL.getConfiguration(), 500);
163    byte[] dummyData = makeDummyData(300); // smaller than mob threshold
164    Table loader = new RegionAsTable(region);
165    // one hfile per row
166    for (int i = 0; i < compactionThreshold; i++) {
167      Put p = createPut(i, dummyData);
168      loader.put(p);
169      region.flush(true);
170    }
171    assertEquals("Before compaction: store files", compactionThreshold, countStoreFiles());
172    assertEquals("Before compaction: mob file count", 0, countMobFiles());
173    assertEquals("Before compaction: rows", compactionThreshold, UTIL.countRows(region));
174    assertEquals("Before compaction: mob rows", 0, countMobRows());
175
176    region.compactStores();
177
178    assertEquals("After compaction: store files", 1, countStoreFiles());
179    assertEquals("After compaction: mob file count", 0, countMobFiles());
180    assertEquals("After compaction: referenced mob file count", 0, countReferencedMobFiles());
181    assertEquals("After compaction: rows", compactionThreshold, UTIL.countRows(region));
182    assertEquals("After compaction: mob rows", 0, countMobRows());
183  }
184
185  /**
186   * During compaction, the mob threshold size is changed.
187   */
188  @Test
189  public void testLargerValue() throws Exception {
190    init(UTIL.getConfiguration(), 200);
191    byte[] dummyData = makeDummyData(300); // larger than mob threshold
192    Table loader = new RegionAsTable(region);
193    for (int i = 0; i < compactionThreshold; i++) {
194      Put p = createPut(i, dummyData);
195      loader.put(p);
196      region.flush(true);
197    }
198    assertEquals("Before compaction: store files", compactionThreshold, countStoreFiles());
199    assertEquals("Before compaction: mob file count", compactionThreshold, countMobFiles());
200    assertEquals("Before compaction: rows", compactionThreshold, UTIL.countRows(region));
201    assertEquals("Before compaction: mob rows", compactionThreshold, countMobRows());
202    assertEquals("Before compaction: number of mob cells", compactionThreshold,
203      countMobCellsInMetadata());
204    // Change the threshold larger than the data size
205    setMobThreshold(region, COLUMN_FAMILY, 500);
206    region.initialize();
207
208    List<HStore> stores = region.getStores();
209    for (HStore store : stores) {
210      // Force major compaction
211      store.triggerMajorCompaction();
212      Optional<CompactionContext> context = store.requestCompaction(HStore.PRIORITY_USER,
213        CompactionLifeCycleTracker.DUMMY, User.getCurrent());
214      if (!context.isPresent()) {
215        continue;
216      }
217      region.compact(context.get(), store, NoLimitThroughputController.INSTANCE, User.getCurrent());
218    }
219
220    assertEquals("After compaction: store files", 1, countStoreFiles());
221    assertEquals("After compaction: mob file count", compactionThreshold, countMobFiles());
222    assertEquals("After compaction: referenced mob file count", 0, countReferencedMobFiles());
223    assertEquals("After compaction: rows", compactionThreshold, UTIL.countRows(region));
224    assertEquals("After compaction: mob rows", 0, countMobRows());
225  }
226
227  private static HRegion setMobThreshold(HRegion region, byte[] cfName, long modThreshold) {
228    ColumnFamilyDescriptor cfd =
229      ColumnFamilyDescriptorBuilder.newBuilder(region.getTableDescriptor().getColumnFamily(cfName))
230        .setMobThreshold(modThreshold).build();
231    TableDescriptor td = TableDescriptorBuilder.newBuilder(region.getTableDescriptor())
232      .removeColumnFamily(cfName).setColumnFamily(cfd).build();
233    region.setTableDescriptor(td);
234    return region;
235  }
236
237  /**
238   * This test will first generate store files, then bulk load them and trigger the compaction. When
239   * compaction, the cell value will be larger than the threshold.
240   */
241  @Test
242  public void testMobCompactionWithBulkload() throws Exception {
243    // The following will produce store files of 600.
244    init(UTIL.getConfiguration(), 300);
245    byte[] dummyData = makeDummyData(600);
246
247    Path hbaseRootDir = CommonFSUtils.getRootDir(conf);
248    Path basedir = new Path(hbaseRootDir, tableDescriptor.getTableName().getNameAsString());
249    List<Pair<byte[], String>> hfiles = new ArrayList<>(1);
250    for (int i = 0; i < compactionThreshold; i++) {
251      Path hpath = new Path(basedir, UUID.randomUUID().toString().replace("-", ""));
252      hfiles.add(Pair.newPair(COLUMN_FAMILY, hpath.toString()));
253      createHFile(hpath, i, dummyData);
254    }
255
256    // The following will bulk load the above generated store files and compact, with 600(fileSize)
257    // > 300(threshold)
258    Map<byte[], List<Path>> map = region.bulkLoadHFiles(hfiles, true, null);
259    assertTrue("Bulkload result:", !map.isEmpty());
260    assertEquals("Before compaction: store files", compactionThreshold, countStoreFiles());
261    assertEquals("Before compaction: mob file count", 0, countMobFiles());
262    assertEquals("Before compaction: rows", compactionThreshold, UTIL.countRows(region));
263    assertEquals("Before compaction: mob rows", 0, countMobRows());
264    assertEquals("Before compaction: referenced mob file count", 0, countReferencedMobFiles());
265
266    region.compactStores();
267
268    assertEquals("After compaction: store files", 1, countStoreFiles());
269    assertEquals("After compaction: mob file count:", 1, countMobFiles());
270    assertEquals("After compaction: rows", compactionThreshold, UTIL.countRows(region));
271    assertEquals("After compaction: mob rows", compactionThreshold, countMobRows());
272    assertEquals("After compaction: referenced mob file count", 1, countReferencedMobFiles());
273    assertEquals("After compaction: number of mob cells", compactionThreshold,
274      countMobCellsInMetadata());
275  }
276
277  @Test
278  public void testMajorCompactionAfterDelete() throws Exception {
279    init(UTIL.getConfiguration(), 100);
280    byte[] dummyData = makeDummyData(200); // larger than mob threshold
281    Table loader = new RegionAsTable(region);
282    // create hfiles and mob hfiles but don't trigger compaction
283    int numHfiles = compactionThreshold - 1;
284    byte[] deleteRow = Bytes.add(STARTROW, Bytes.toBytes(0));
285    for (int i = 0; i < numHfiles; i++) {
286      Put p = createPut(i, dummyData);
287      loader.put(p);
288      region.flush(true);
289    }
290    assertEquals("Before compaction: store files", numHfiles, countStoreFiles());
291    assertEquals("Before compaction: mob file count", numHfiles, countMobFiles());
292    assertEquals("Before compaction: rows", numHfiles, UTIL.countRows(region));
293    assertEquals("Before compaction: mob rows", numHfiles, countMobRows());
294    assertEquals("Before compaction: number of mob cells", numHfiles, countMobCellsInMetadata());
295    // now let's delete some cells that contain mobs
296    Delete delete = new Delete(deleteRow);
297    delete.addFamily(COLUMN_FAMILY);
298    region.delete(delete);
299    region.flush(true);
300
301    assertEquals("Before compaction: store files", numHfiles + 1, countStoreFiles());
302    assertEquals("Before compaction: mob files", numHfiles, countMobFiles());
303    // region.compactStores();
304    region.compact(true);
305    assertEquals("After compaction: store files", 1, countStoreFiles());
306  }
307
308  private int countStoreFiles() throws IOException {
309    HStore store = region.getStore(COLUMN_FAMILY);
310    return store.getStorefilesCount();
311  }
312
313  private int countMobFiles() throws IOException {
314    Path mobDirPath = MobUtils.getMobFamilyPath(conf, tableDescriptor.getTableName(),
315      familyDescriptor.getNameAsString());
316    if (fs.exists(mobDirPath)) {
317      FileStatus[] files = UTIL.getTestFileSystem().listStatus(mobDirPath);
318      return files.length;
319    }
320    return 0;
321  }
322
323  private long countMobCellsInMetadata() throws IOException {
324    long mobCellsCount = 0;
325    Path mobDirPath = MobUtils.getMobFamilyPath(conf, tableDescriptor.getTableName(),
326      familyDescriptor.getNameAsString());
327    Configuration copyOfConf = new Configuration(conf);
328    copyOfConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f);
329    CacheConfig cacheConfig = new CacheConfig(copyOfConf);
330    if (fs.exists(mobDirPath)) {
331      FileStatus[] files = UTIL.getTestFileSystem().listStatus(mobDirPath);
332      for (FileStatus file : files) {
333        HStoreFile sf = new HStoreFile(fs, file.getPath(), conf, cacheConfig, BloomType.NONE, true);
334        sf.initReader();
335        Map<byte[], byte[]> fileInfo = sf.getReader().loadFileInfo();
336        byte[] count = fileInfo.get(MOB_CELLS_COUNT);
337        assertTrue(count != null);
338        mobCellsCount += Bytes.toLong(count);
339      }
340    }
341    return mobCellsCount;
342  }
343
344  private Put createPut(int rowIdx, byte[] dummyData) throws IOException {
345    Put p = new Put(Bytes.add(STARTROW, Bytes.toBytes(rowIdx)));
346    p.setDurability(Durability.SKIP_WAL);
347    p.addColumn(COLUMN_FAMILY, Bytes.toBytes("colX"), dummyData);
348    return p;
349  }
350
351  /**
352   * Create an HFile with the given number of bytes
353   */
354  private void createHFile(Path path, int rowIdx, byte[] dummyData) throws IOException {
355    HFileContext meta = new HFileContextBuilder().build();
356    HFile.Writer writer = HFile.getWriterFactory(conf, new CacheConfig(conf)).withPath(fs, path)
357      .withFileContext(meta).create();
358    long now = EnvironmentEdgeManager.currentTime();
359    try {
360      KeyValue kv = new KeyValue(Bytes.add(STARTROW, Bytes.toBytes(rowIdx)), COLUMN_FAMILY,
361        Bytes.toBytes("colX"), now, dummyData);
362      writer.append(kv);
363    } finally {
364      writer.appendFileInfo(BULKLOAD_TIME_KEY, Bytes.toBytes(EnvironmentEdgeManager.currentTime()));
365      writer.close();
366    }
367  }
368
369  private int countMobRows() throws IOException {
370    Scan scan = new Scan();
371    // Do not retrieve the mob data when scanning
372    scan.setAttribute(MobConstants.MOB_SCAN_RAW, Bytes.toBytes(Boolean.TRUE));
373    InternalScanner scanner = region.getScanner(scan);
374
375    int scannedCount = 0;
376    List<Cell> results = new ArrayList<>();
377    boolean hasMore = true;
378    while (hasMore) {
379      hasMore = scanner.next(results);
380      for (Cell c : results) {
381        if (MobUtils.isMobReferenceCell(c)) {
382          scannedCount++;
383        }
384      }
385      results.clear();
386    }
387    scanner.close();
388
389    return scannedCount;
390  }
391
392  private byte[] makeDummyData(int size) {
393    byte[] dummyData = new byte[size];
394    Bytes.random(dummyData);
395    return dummyData;
396  }
397
398  private int countReferencedMobFiles() throws IOException {
399    Scan scan = new Scan();
400    // Do not retrieve the mob data when scanning
401    scan.setAttribute(MobConstants.MOB_SCAN_RAW, Bytes.toBytes(Boolean.TRUE));
402    InternalScanner scanner = region.getScanner(scan);
403
404    List<Cell> kvs = new ArrayList<>();
405    boolean hasMore = true;
406    String fileName;
407    Set<String> files = new HashSet<>();
408    do {
409      kvs.clear();
410      hasMore = scanner.next(kvs);
411      for (Cell kv : kvs) {
412        if (!MobUtils.isMobReferenceCell(kv)) {
413          continue;
414        }
415        if (!MobUtils.hasValidMobRefCellValue(kv)) {
416          continue;
417        }
418        int size = MobUtils.getMobValueLength(kv);
419        if (size <= mobCellThreshold) {
420          continue;
421        }
422        fileName = MobUtils.getMobFileName(kv);
423        if (fileName.isEmpty()) {
424          continue;
425        }
426        files.add(fileName);
427        Path familyPath = MobUtils.getMobFamilyPath(conf, tableDescriptor.getTableName(),
428          familyDescriptor.getNameAsString());
429        assertTrue(fs.exists(new Path(familyPath, fileName)));
430      }
431    } while (hasMore);
432
433    scanner.close();
434
435    return files.size();
436  }
437
438}