001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.mob;
019
020import static org.apache.hadoop.hbase.mob.MobConstants.DEFAULT_MOB_CLEANER_BATCH_SIZE_UPPER_BOUND;
021import static org.apache.hadoop.hbase.mob.MobConstants.MOB_CLEANER_BATCH_SIZE_UPPER_BOUND;
022
023import java.io.FileNotFoundException;
024import java.io.IOException;
025import java.nio.ByteBuffer;
026import java.text.ParseException;
027import java.text.SimpleDateFormat;
028import java.util.ArrayList;
029import java.util.Arrays;
030import java.util.Calendar;
031import java.util.Collection;
032import java.util.Date;
033import java.util.List;
034import java.util.Optional;
035import java.util.UUID;
036import java.util.function.Consumer;
037import org.apache.hadoop.conf.Configuration;
038import org.apache.hadoop.fs.FileStatus;
039import org.apache.hadoop.fs.FileSystem;
040import org.apache.hadoop.fs.Path;
041import org.apache.hadoop.hbase.Cell;
042import org.apache.hadoop.hbase.HConstants;
043import org.apache.hadoop.hbase.PrivateCellUtil;
044import org.apache.hadoop.hbase.TableName;
045import org.apache.hadoop.hbase.Tag;
046import org.apache.hadoop.hbase.TagType;
047import org.apache.hadoop.hbase.TagUtil;
048import org.apache.hadoop.hbase.backup.HFileArchiver;
049import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
050import org.apache.hadoop.hbase.client.RegionInfo;
051import org.apache.hadoop.hbase.client.RegionInfoBuilder;
052import org.apache.hadoop.hbase.client.Scan;
053import org.apache.hadoop.hbase.client.TableDescriptor;
054import org.apache.hadoop.hbase.io.HFileLink;
055import org.apache.hadoop.hbase.io.compress.Compression;
056import org.apache.hadoop.hbase.io.crypto.Encryption;
057import org.apache.hadoop.hbase.io.hfile.CacheConfig;
058import org.apache.hadoop.hbase.io.hfile.HFile;
059import org.apache.hadoop.hbase.io.hfile.HFileContext;
060import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
061import org.apache.hadoop.hbase.regionserver.BloomType;
062import org.apache.hadoop.hbase.regionserver.HStoreFile;
063import org.apache.hadoop.hbase.regionserver.StoreFileWriter;
064import org.apache.hadoop.hbase.regionserver.StoreUtils;
065import org.apache.hadoop.hbase.util.Bytes;
066import org.apache.hadoop.hbase.util.ChecksumType;
067import org.apache.hadoop.hbase.util.CommonFSUtils;
068import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
069import org.apache.yetus.audience.InterfaceAudience;
070import org.slf4j.Logger;
071import org.slf4j.LoggerFactory;
072
073import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSetMultimap;
074import org.apache.hbase.thirdparty.com.google.common.collect.SetMultimap;
075
076/**
077 * The mob utilities
078 */
079@InterfaceAudience.Private
080public final class MobUtils {
081
082  private static final Logger LOG = LoggerFactory.getLogger(MobUtils.class);
083  public static final String SEP = "_";
084
085  private static final ThreadLocal<SimpleDateFormat> LOCAL_FORMAT =
086    new ThreadLocal<SimpleDateFormat>() {
087      @Override
088      protected SimpleDateFormat initialValue() {
089        return new SimpleDateFormat("yyyyMMdd");
090      }
091    };
092
093  /**
094   * Private constructor to keep this class from being instantiated.
095   */
096  private MobUtils() {
097  }
098
099  /**
100   * Formats a date to a string.
101   * @param date The date.
102   * @return The string format of the date, it's yyyymmdd.
103   */
104  public static String formatDate(Date date) {
105    return LOCAL_FORMAT.get().format(date);
106  }
107
108  /**
109   * Parses the string to a date.
110   * @param dateString The string format of a date, it's yyyymmdd.
111   * @return A date.
112   */
113  public static Date parseDate(String dateString) throws ParseException {
114    return LOCAL_FORMAT.get().parse(dateString);
115  }
116
117  /**
118   * Whether the current cell is a mob reference cell.
119   * @param cell The current cell.
120   * @return True if the cell has a mob reference tag, false if it doesn't.
121   */
122  public static boolean isMobReferenceCell(Cell cell) {
123    if (cell.getTagsLength() > 0) {
124      Optional<Tag> tag = PrivateCellUtil.getTag(cell, TagType.MOB_REFERENCE_TAG_TYPE);
125      if (tag.isPresent()) {
126        return true;
127      }
128    }
129    return false;
130  }
131
132  /**
133   * Gets the table name tag.
134   * @param cell The current cell.
135   * @return The table name tag.
136   */
137  private static Optional<Tag> getTableNameTag(Cell cell) {
138    Optional<Tag> tag = Optional.empty();
139    if (cell.getTagsLength() > 0) {
140      tag = PrivateCellUtil.getTag(cell, TagType.MOB_TABLE_NAME_TAG_TYPE);
141    }
142    return tag;
143  }
144
145  /**
146   * Gets the table name from when this cell was written into a mob hfile as a string.
147   * @param cell to extract tag from
148   * @return table name as a string. empty if the tag is not found.
149   */
150  public static Optional<String> getTableNameString(Cell cell) {
151    Optional<Tag> tag = getTableNameTag(cell);
152    Optional<String> name = Optional.empty();
153    if (tag.isPresent()) {
154      name = Optional.of(Tag.getValueAsString(tag.get()));
155    }
156    return name;
157  }
158
159  /**
160   * Get the table name from when this cell was written into a mob hfile as a TableName.
161   * @param cell to extract tag from
162   * @return name of table as a TableName. empty if the tag is not found.
163   */
164  public static Optional<TableName> getTableName(Cell cell) {
165    Optional<Tag> maybe = getTableNameTag(cell);
166    Optional<TableName> name = Optional.empty();
167    if (maybe.isPresent()) {
168      final Tag tag = maybe.get();
169      if (tag.hasArray()) {
170        name = Optional
171          .of(TableName.valueOf(tag.getValueArray(), tag.getValueOffset(), tag.getValueLength()));
172      } else {
173        // TODO ByteBuffer handling in tags looks busted. revisit.
174        ByteBuffer buffer = tag.getValueByteBuffer().duplicate();
175        buffer.mark();
176        buffer.position(tag.getValueOffset());
177        buffer.limit(tag.getValueOffset() + tag.getValueLength());
178        name = Optional.of(TableName.valueOf(buffer));
179      }
180    }
181    return name;
182  }
183
184  /**
185   * Whether the tag list has a mob reference tag.
186   * @param tags The tag list.
187   * @return True if the list has a mob reference tag, false if it doesn't.
188   */
189  public static boolean hasMobReferenceTag(List<Tag> tags) {
190    if (!tags.isEmpty()) {
191      for (Tag tag : tags) {
192        if (tag.getType() == TagType.MOB_REFERENCE_TAG_TYPE) {
193          return true;
194        }
195      }
196    }
197    return false;
198  }
199
200  /**
201   * Indicates whether it's a raw scan. The information is set in the attribute "hbase.mob.scan.raw"
202   * of scan. For a mob cell, in a normal scan the scanners retrieves the mob cell from the mob
203   * file. In a raw scan, the scanner directly returns cell in HBase without retrieve the one in the
204   * mob file.
205   * @param scan The current scan.
206   * @return True if it's a raw scan.
207   */
208  public static boolean isRawMobScan(Scan scan) {
209    byte[] raw = scan.getAttribute(MobConstants.MOB_SCAN_RAW);
210    try {
211      return raw != null && Bytes.toBoolean(raw);
212    } catch (IllegalArgumentException e) {
213      return false;
214    }
215  }
216
217  /**
218   * Indicates whether it's a reference only scan. The information is set in the attribute
219   * "hbase.mob.scan.ref.only" of scan. If it's a ref only scan, only the cells with ref tag are
220   * returned.
221   * @param scan The current scan.
222   * @return True if it's a ref only scan.
223   */
224  public static boolean isRefOnlyScan(Scan scan) {
225    byte[] refOnly = scan.getAttribute(MobConstants.MOB_SCAN_REF_ONLY);
226    try {
227      return refOnly != null && Bytes.toBoolean(refOnly);
228    } catch (IllegalArgumentException e) {
229      return false;
230    }
231  }
232
233  /**
234   * Indicates whether the scan contains the information of caching blocks. The information is set
235   * in the attribute "hbase.mob.cache.blocks" of scan.
236   * @param scan The current scan.
237   * @return True when the Scan attribute specifies to cache the MOB blocks.
238   */
239  public static boolean isCacheMobBlocks(Scan scan) {
240    byte[] cache = scan.getAttribute(MobConstants.MOB_CACHE_BLOCKS);
241    try {
242      return cache != null && Bytes.toBoolean(cache);
243    } catch (IllegalArgumentException e) {
244      return false;
245    }
246  }
247
248  /**
249   * Sets the attribute of caching blocks in the scan.
250   * @param scan        The current scan.
251   * @param cacheBlocks True, set the attribute of caching blocks into the scan, the scanner with
252   *                    this scan caches blocks. False, the scanner doesn't cache blocks for this
253   *                    scan.
254   */
255  public static void setCacheMobBlocks(Scan scan, boolean cacheBlocks) {
256    scan.setAttribute(MobConstants.MOB_CACHE_BLOCKS, Bytes.toBytes(cacheBlocks));
257  }
258
259  /**
260   * Cleans the expired mob files. Cleans the files whose creation date is older than (current -
261   * columnFamily.ttl), and the minVersions of that column family is 0.
262   * @param fs               The current file system.
263   * @param conf             The current configuration.
264   * @param tableName        The current table name.
265   * @param columnDescriptor The descriptor of the current column family.
266   * @param cacheConfig      The cacheConfig that disables the block cache.
267   * @param current          The current time.
268   */
269  public static void cleanExpiredMobFiles(FileSystem fs, Configuration conf, TableName tableName,
270    ColumnFamilyDescriptor columnDescriptor, CacheConfig cacheConfig, long current)
271    throws IOException {
272    long timeToLive = columnDescriptor.getTimeToLive();
273    if (Integer.MAX_VALUE == timeToLive) {
274      // no need to clean, because the TTL is not set.
275      return;
276    }
277
278    Calendar calendar = Calendar.getInstance();
279    calendar.setTimeInMillis(current - timeToLive * 1000);
280    calendar.set(Calendar.HOUR_OF_DAY, 0);
281    calendar.set(Calendar.MINUTE, 0);
282    calendar.set(Calendar.SECOND, 0);
283    calendar.set(Calendar.MILLISECOND, 0);
284
285    Date expireDate = calendar.getTime();
286
287    LOG.info("MOB HFiles older than " + expireDate.toGMTString() + " will be deleted!");
288
289    FileStatus[] stats = null;
290    Path mobTableDir = CommonFSUtils.getTableDir(getMobHome(conf), tableName);
291    Path path = getMobFamilyPath(conf, tableName, columnDescriptor.getNameAsString());
292    try {
293      stats = fs.listStatus(path);
294    } catch (FileNotFoundException e) {
295      LOG.warn("Failed to find the mob file " + path, e);
296    }
297    if (null == stats) {
298      // no file found
299      return;
300    }
301    List<HStoreFile> filesToClean = new ArrayList<>();
302    int deletedFileCount = 0;
303    for (FileStatus file : stats) {
304      String fileName = file.getPath().getName();
305      try {
306        if (HFileLink.isHFileLink(file.getPath())) {
307          HFileLink hfileLink = HFileLink.buildFromHFileLinkPattern(conf, file.getPath());
308          fileName = hfileLink.getOriginPath().getName();
309        }
310
311        Date fileDate = parseDate(MobFileName.getDateFromName(fileName));
312
313        if (LOG.isDebugEnabled()) {
314          LOG.debug("Checking file {}", fileName);
315        }
316        if (fileDate.getTime() < expireDate.getTime()) {
317          if (LOG.isDebugEnabled()) {
318            LOG.debug("{} is an expired file", fileName);
319          }
320          filesToClean
321            .add(new HStoreFile(fs, file.getPath(), conf, cacheConfig, BloomType.NONE, true));
322          if (
323            filesToClean.size() >= conf.getInt(MOB_CLEANER_BATCH_SIZE_UPPER_BOUND,
324              DEFAULT_MOB_CLEANER_BATCH_SIZE_UPPER_BOUND)
325          ) {
326            if (
327              removeMobFiles(conf, fs, tableName, mobTableDir, columnDescriptor.getName(),
328                filesToClean)
329            ) {
330              deletedFileCount += filesToClean.size();
331            }
332            filesToClean.clear();
333          }
334        }
335      } catch (Exception e) {
336        LOG.error("Cannot parse the fileName " + fileName, e);
337      }
338    }
339    if (
340      !filesToClean.isEmpty() && removeMobFiles(conf, fs, tableName, mobTableDir,
341        columnDescriptor.getName(), filesToClean)
342    ) {
343      deletedFileCount += filesToClean.size();
344    }
345    LOG.info("Table {} {} expired mob files in total are deleted", tableName, deletedFileCount);
346  }
347
348  /**
349   * Gets the root dir of the mob files. It's {HBASE_DIR}/mobdir.
350   * @param conf The current configuration.
351   * @return the root dir of the mob file.
352   */
353  public static Path getMobHome(Configuration conf) {
354    Path hbaseDir = new Path(conf.get(HConstants.HBASE_DIR));
355    return getMobHome(hbaseDir);
356  }
357
358  /**
359   * Gets the root dir of the mob files under the qualified HBase root dir. It's {rootDir}/mobdir.
360   * @param rootDir The qualified path of HBase root directory.
361   * @return The root dir of the mob file.
362   */
363  public static Path getMobHome(Path rootDir) {
364    return new Path(rootDir, MobConstants.MOB_DIR_NAME);
365  }
366
367  /**
368   * Gets the qualified root dir of the mob files.
369   * @param conf The current configuration.
370   * @return The qualified root dir.
371   */
372  public static Path getQualifiedMobRootDir(Configuration conf) throws IOException {
373    Path hbaseDir = new Path(conf.get(HConstants.HBASE_DIR));
374    Path mobRootDir = new Path(hbaseDir, MobConstants.MOB_DIR_NAME);
375    FileSystem fs = mobRootDir.getFileSystem(conf);
376    return mobRootDir.makeQualified(fs.getUri(), fs.getWorkingDirectory());
377  }
378
379  /**
380   * Gets the table dir of the mob files under the qualified HBase root dir. It's
381   * {rootDir}/mobdir/data/${namespace}/${tableName}
382   * @param rootDir   The qualified path of HBase root directory.
383   * @param tableName The name of table.
384   * @return The table dir of the mob file.
385   */
386  public static Path getMobTableDir(Path rootDir, TableName tableName) {
387    return CommonFSUtils.getTableDir(getMobHome(rootDir), tableName);
388  }
389
390  /**
391   * Gets the region dir of the mob files. It's
392   * {HBASE_DIR}/mobdir/data/{namespace}/{tableName}/{regionEncodedName}.
393   * @param conf      The current configuration.
394   * @param tableName The current table name.
395   * @return The region dir of the mob files.
396   */
397  public static Path getMobRegionPath(Configuration conf, TableName tableName) {
398    return getMobRegionPath(new Path(conf.get(HConstants.HBASE_DIR)), tableName);
399  }
400
401  /**
402   * Gets the region dir of the mob files under the specified root dir. It's
403   * {rootDir}/mobdir/data/{namespace}/{tableName}/{regionEncodedName}.
404   * @param rootDir   The qualified path of HBase root directory.
405   * @param tableName The current table name.
406   * @return The region dir of the mob files.
407   */
408  public static Path getMobRegionPath(Path rootDir, TableName tableName) {
409    Path tablePath = CommonFSUtils.getTableDir(getMobHome(rootDir), tableName);
410    RegionInfo regionInfo = getMobRegionInfo(tableName);
411    return new Path(tablePath, regionInfo.getEncodedName());
412  }
413
414  /**
415   * Gets the family dir of the mob files. It's
416   * {HBASE_DIR}/mobdir/{namespace}/{tableName}/{regionEncodedName}/{columnFamilyName}.
417   * @param conf       The current configuration.
418   * @param tableName  The current table name.
419   * @param familyName The current family name.
420   * @return The family dir of the mob files.
421   */
422  public static Path getMobFamilyPath(Configuration conf, TableName tableName, String familyName) {
423    return new Path(getMobRegionPath(conf, tableName), familyName);
424  }
425
426  /**
427   * Gets the family dir of the mob files. It's
428   * {HBASE_DIR}/mobdir/{namespace}/{tableName}/{regionEncodedName}/{columnFamilyName}.
429   * @param regionPath The path of mob region which is a dummy one.
430   * @param familyName The current family name.
431   * @return The family dir of the mob files.
432   */
433  public static Path getMobFamilyPath(Path regionPath, String familyName) {
434    return new Path(regionPath, familyName);
435  }
436
437  /**
438   * Gets the RegionInfo of the mob files. This is a dummy region. The mob files are not saved in a
439   * region in HBase. It's internally used only.
440   * @return A dummy mob region info.
441   */
442  public static RegionInfo getMobRegionInfo(TableName tableName) {
443    return RegionInfoBuilder.newBuilder(tableName).setStartKey(MobConstants.MOB_REGION_NAME_BYTES)
444      .setEndKey(HConstants.EMPTY_END_ROW).setSplit(false).setRegionId(0).build();
445  }
446
447  /**
448   * Gets whether the current RegionInfo is a mob one.
449   * @param regionInfo The current RegionInfo.
450   * @return If true, the current RegionInfo is a mob one.
451   */
452  public static boolean isMobRegionInfo(RegionInfo regionInfo) {
453    return regionInfo == null
454      ? false
455      : getMobRegionInfo(regionInfo.getTable()).getEncodedName()
456        .equals(regionInfo.getEncodedName());
457  }
458
459  /**
460   * Gets whether the current region name follows the pattern of a mob region name.
461   * @param tableName  The current table name.
462   * @param regionName The current region name.
463   * @return True if the current region name follows the pattern of a mob region name.
464   */
465  public static boolean isMobRegionName(TableName tableName, byte[] regionName) {
466    return Bytes.equals(regionName, getMobRegionInfo(tableName).getRegionName());
467  }
468
469  /**
470   * Archives the mob files.
471   * @param conf       The current configuration.
472   * @param fs         The current file system.
473   * @param tableName  The table name.
474   * @param tableDir   The table directory.
475   * @param family     The name of the column family.
476   * @param storeFiles The files to be deleted.
477   */
478  public static boolean removeMobFiles(Configuration conf, FileSystem fs, TableName tableName,
479    Path tableDir, byte[] family, Collection<HStoreFile> storeFiles) {
480    try {
481      HFileArchiver.archiveStoreFiles(conf, fs, getMobRegionInfo(tableName), tableDir, family,
482        storeFiles);
483      LOG.info("Table {} {} expired mob files are deleted", tableName, storeFiles.size());
484      return true;
485    } catch (IOException e) {
486      LOG.error("Failed to delete the mob files, table {}", tableName, e);
487    }
488    return false;
489  }
490
491  /**
492   * Creates a mob reference KeyValue. The value of the mob reference KeyValue is mobCellValueSize +
493   * mobFileName.
494   * @param cell         The original Cell.
495   * @param fileName     The mob file name where the mob reference KeyValue is written.
496   * @param tableNameTag The tag of the current table name. It's very important in cloning the
497   *                     snapshot.
498   * @return The mob reference KeyValue.
499   */
500  public static Cell createMobRefCell(Cell cell, byte[] fileName, Tag tableNameTag) {
501    // Append the tags to the KeyValue.
502    // The key is same, the value is the filename of the mob file
503    List<Tag> tags = new ArrayList<>();
504    // Add the ref tag as the 1st one.
505    tags.add(MobConstants.MOB_REF_TAG);
506    // Add the tag of the source table name, this table is where this mob file is flushed
507    // from.
508    // It's very useful in cloning the snapshot. When reading from the cloning table, we need to
509    // find the original mob files by this table name. For details please see cloning
510    // snapshot for mob files.
511    tags.add(tableNameTag);
512    return createMobRefCell(cell, fileName, TagUtil.fromList(tags));
513  }
514
515  public static Cell createMobRefCell(Cell cell, byte[] fileName, byte[] refCellTags) {
516    byte[] refValue = Bytes.add(Bytes.toBytes(cell.getValueLength()), fileName);
517    return PrivateCellUtil.createCell(cell, refValue, TagUtil.concatTags(refCellTags, cell));
518  }
519
520  /**
521   * Creates a writer for the mob file in temp directory.
522   * @param conf          The current configuration.
523   * @param fs            The current file system.
524   * @param family        The descriptor of the current column family.
525   * @param date          The date string, its format is yyyymmmdd.
526   * @param basePath      The basic path for a temp directory.
527   * @param maxKeyCount   The key count.
528   * @param compression   The compression algorithm.
529   * @param startKey      The hex string of the start key.
530   * @param cacheConfig   The current cache config.
531   * @param cryptoContext The encryption context.
532   * @param isCompaction  If the writer is used in compaction.
533   * @return The writer for the mob file.
534   */
535  public static StoreFileWriter createWriter(Configuration conf, FileSystem fs,
536    ColumnFamilyDescriptor family, String date, Path basePath, long maxKeyCount,
537    Compression.Algorithm compression, String startKey, CacheConfig cacheConfig,
538    Encryption.Context cryptoContext, boolean isCompaction, String regionName) throws IOException {
539    MobFileName mobFileName = MobFileName.create(startKey, date,
540      UUID.randomUUID().toString().replaceAll("-", ""), regionName);
541    return createWriter(conf, fs, family, mobFileName, basePath, maxKeyCount, compression,
542      cacheConfig, cryptoContext, isCompaction);
543  }
544
545  /**
546   * Creates a writer for the mob file in temp directory.
547   * @param conf          The current configuration.
548   * @param fs            The current file system.
549   * @param family        The descriptor of the current column family.
550   * @param mobFileName   The mob file name.
551   * @param basePath      The basic path for a temp directory.
552   * @param maxKeyCount   The key count.
553   * @param compression   The compression algorithm.
554   * @param cacheConfig   The current cache config.
555   * @param cryptoContext The encryption context.
556   * @param isCompaction  If the writer is used in compaction.
557   * @return The writer for the mob file.
558   */
559  public static StoreFileWriter createWriter(Configuration conf, FileSystem fs,
560    ColumnFamilyDescriptor family, MobFileName mobFileName, Path basePath, long maxKeyCount,
561    Compression.Algorithm compression, CacheConfig cacheConfig, Encryption.Context cryptoContext,
562    boolean isCompaction) throws IOException {
563    return createWriter(conf, fs, family, new Path(basePath, mobFileName.getFileName()),
564      maxKeyCount, compression, cacheConfig, cryptoContext, StoreUtils.getChecksumType(conf),
565      StoreUtils.getBytesPerChecksum(conf), family.getBlocksize(), BloomType.NONE, isCompaction);
566  }
567
568  /**
569   * Creates a writer for the mob file in temp directory.
570   * @param conf             The current configuration.
571   * @param fs               The current file system.
572   * @param family           The descriptor of the current column family.
573   * @param path             The path for a temp directory.
574   * @param maxKeyCount      The key count.
575   * @param compression      The compression algorithm.
576   * @param cacheConfig      The current cache config.
577   * @param cryptoContext    The encryption context.
578   * @param checksumType     The checksum type.
579   * @param bytesPerChecksum The bytes per checksum.
580   * @param blocksize        The HFile block size.
581   * @param bloomType        The bloom filter type.
582   * @param isCompaction     If the writer is used in compaction.
583   * @return The writer for the mob file.
584   */
585  public static StoreFileWriter createWriter(Configuration conf, FileSystem fs,
586    ColumnFamilyDescriptor family, Path path, long maxKeyCount, Compression.Algorithm compression,
587    CacheConfig cacheConfig, Encryption.Context cryptoContext, ChecksumType checksumType,
588    int bytesPerChecksum, int blocksize, BloomType bloomType, boolean isCompaction)
589    throws IOException {
590    return createWriter(conf, fs, family, path, maxKeyCount, compression, cacheConfig,
591      cryptoContext, checksumType, bytesPerChecksum, blocksize, bloomType, isCompaction, null);
592  }
593
594  /**
595   * Creates a writer for the mob file in temp directory.
596   * @param conf                  The current configuration.
597   * @param fs                    The current file system.
598   * @param family                The descriptor of the current column family.
599   * @param path                  The path for a temp directory.
600   * @param maxKeyCount           The key count.
601   * @param compression           The compression algorithm.
602   * @param cacheConfig           The current cache config.
603   * @param cryptoContext         The encryption context.
604   * @param checksumType          The checksum type.
605   * @param bytesPerChecksum      The bytes per checksum.
606   * @param blocksize             The HFile block size.
607   * @param bloomType             The bloom filter type.
608   * @param isCompaction          If the writer is used in compaction.
609   * @param writerCreationTracker to track the current writer in the store
610   * @return The writer for the mob file.
611   */
612  public static StoreFileWriter createWriter(Configuration conf, FileSystem fs,
613    ColumnFamilyDescriptor family, Path path, long maxKeyCount, Compression.Algorithm compression,
614    CacheConfig cacheConfig, Encryption.Context cryptoContext, ChecksumType checksumType,
615    int bytesPerChecksum, int blocksize, BloomType bloomType, boolean isCompaction,
616    Consumer<Path> writerCreationTracker) throws IOException {
617    if (compression == null) {
618      compression = HFile.DEFAULT_COMPRESSION_ALGORITHM;
619    }
620    final CacheConfig writerCacheConf;
621    if (isCompaction) {
622      writerCacheConf = new CacheConfig(cacheConfig);
623      writerCacheConf.setCacheDataOnWrite(false);
624    } else {
625      writerCacheConf = cacheConfig;
626    }
627    HFileContext hFileContext = new HFileContextBuilder().withCompression(compression)
628      .withIncludesMvcc(true).withIncludesTags(true).withCompressTags(family.isCompressTags())
629      .withChecksumType(checksumType).withBytesPerCheckSum(bytesPerChecksum)
630      .withBlockSize(blocksize).withHBaseCheckSum(true)
631      .withDataBlockEncoding(family.getDataBlockEncoding()).withEncryptionContext(cryptoContext)
632      .withCreateTime(EnvironmentEdgeManager.currentTime()).build();
633
634    StoreFileWriter w = new StoreFileWriter.Builder(conf, writerCacheConf, fs).withFilePath(path)
635      .withBloomType(bloomType).withMaxKeyCount(maxKeyCount).withFileContext(hFileContext)
636      .withWriterCreationTracker(writerCreationTracker).build();
637    return w;
638  }
639
640  /**
641   * Indicates whether the current mob ref cell has a valid value. A mob ref cell has a mob
642   * reference tag. The value of a mob ref cell consists of two parts, real mob value length and mob
643   * file name. The real mob value length takes 4 bytes. The remaining part is the mob file name.
644   * @param cell The mob ref cell.
645   * @return True if the cell has a valid value.
646   */
647  public static boolean hasValidMobRefCellValue(Cell cell) {
648    return cell.getValueLength() > Bytes.SIZEOF_INT;
649  }
650
651  /**
652   * Gets the mob value length from the mob ref cell. A mob ref cell has a mob reference tag. The
653   * value of a mob ref cell consists of two parts, real mob value length and mob file name. The
654   * real mob value length takes 4 bytes. The remaining part is the mob file name.
655   * @param cell The mob ref cell.
656   * @return The real mob value length.
657   */
658  public static int getMobValueLength(Cell cell) {
659    return PrivateCellUtil.getValueAsInt(cell);
660  }
661
662  /**
663   * Gets the mob file name from the mob ref cell. A mob ref cell has a mob reference tag. The value
664   * of a mob ref cell consists of two parts, real mob value length and mob file name. The real mob
665   * value length takes 4 bytes. The remaining part is the mob file name.
666   * @param cell The mob ref cell.
667   * @return The mob file name.
668   */
669  public static String getMobFileName(Cell cell) {
670    return Bytes.toString(cell.getValueArray(), cell.getValueOffset() + Bytes.SIZEOF_INT,
671      cell.getValueLength() - Bytes.SIZEOF_INT);
672  }
673
674  /**
675   * Checks whether this table has mob-enabled columns.
676   * @param htd The current table descriptor.
677   * @return Whether this table has mob-enabled columns.
678   */
679  public static boolean hasMobColumns(TableDescriptor htd) {
680    ColumnFamilyDescriptor[] hcds = htd.getColumnFamilies();
681    for (ColumnFamilyDescriptor hcd : hcds) {
682      if (hcd.isMobEnabled()) {
683        return true;
684      }
685    }
686    return false;
687  }
688
689  /**
690   * Get list of Mob column families (if any exists)
691   * @param htd table descriptor
692   * @return list of Mob column families
693   */
694  public static List<ColumnFamilyDescriptor> getMobColumnFamilies(TableDescriptor htd) {
695
696    List<ColumnFamilyDescriptor> fams = new ArrayList<ColumnFamilyDescriptor>();
697    ColumnFamilyDescriptor[] hcds = htd.getColumnFamilies();
698    for (ColumnFamilyDescriptor hcd : hcds) {
699      if (hcd.isMobEnabled()) {
700        fams.add(hcd);
701      }
702    }
703    return fams;
704  }
705
706  /**
707   * Indicates whether return null value when the mob file is missing or corrupt. The information is
708   * set in the attribute "empty.value.on.mobcell.miss" of scan.
709   * @param scan The current scan.
710   * @return True if the readEmptyValueOnMobCellMiss is enabled.
711   */
712  public static boolean isReadEmptyValueOnMobCellMiss(Scan scan) {
713    byte[] readEmptyValueOnMobCellMiss =
714      scan.getAttribute(MobConstants.EMPTY_VALUE_ON_MOBCELL_MISS);
715    try {
716      return readEmptyValueOnMobCellMiss != null && Bytes.toBoolean(readEmptyValueOnMobCellMiss);
717    } catch (IllegalArgumentException e) {
718      return false;
719    }
720  }
721
722  /**
723   * Checks if the mob file is expired.
724   * @param column   The descriptor of the current column family.
725   * @param current  The current time.
726   * @param fileDate The date string parsed from the mob file name.
727   * @return True if the mob file is expired.
728   */
729  public static boolean isMobFileExpired(ColumnFamilyDescriptor column, long current,
730    String fileDate) {
731    if (column.getMinVersions() > 0) {
732      return false;
733    }
734    long timeToLive = column.getTimeToLive();
735    if (Integer.MAX_VALUE == timeToLive) {
736      return false;
737    }
738
739    Date expireDate = new Date(current - timeToLive * 1000);
740    expireDate = new Date(expireDate.getYear(), expireDate.getMonth(), expireDate.getDate());
741    try {
742      Date date = parseDate(fileDate);
743      if (date.getTime() < expireDate.getTime()) {
744        return true;
745      }
746    } catch (ParseException e) {
747      LOG.warn("Failed to parse the date " + fileDate, e);
748      return false;
749    }
750    return false;
751  }
752
753  /**
754   * Serialize a set of referenced mob hfiles
755   * @param mobRefSet to serialize, may be null
756   * @return byte array to i.e. put into store file metadata. will not be null
757   */
758  public static byte[] serializeMobFileRefs(SetMultimap<TableName, String> mobRefSet) {
759    if (mobRefSet != null && mobRefSet.size() > 0) {
760      // Here we rely on the fact that '/' and ',' are not allowed in either table names nor hfile
761      // names for serialization.
762      //
763      // exampleTable/filename1,filename2//example:table/filename5//otherTable/filename3,filename4
764      //
765      // to approximate the needed capacity we use the fact that there will usually be 1 table name
766      // and each mob filename is around 105 bytes. we pick an arbitrary number to cover "most"
767      // single table name lengths
768      StringBuilder sb = new StringBuilder(100 + mobRefSet.size() * 105);
769      boolean doubleSlash = false;
770      for (TableName tableName : mobRefSet.keySet()) {
771        if (doubleSlash) {
772          sb.append("//");
773        } else {
774          doubleSlash = true;
775        }
776        sb.append(tableName).append("/");
777        boolean comma = false;
778        for (String refs : mobRefSet.get(tableName)) {
779          if (comma) {
780            sb.append(",");
781          } else {
782            comma = true;
783          }
784          sb.append(refs);
785        }
786      }
787      return Bytes.toBytes(sb.toString());
788    } else {
789      return HStoreFile.NULL_VALUE;
790    }
791  }
792
793  /**
794   * Deserialize the set of referenced mob hfiles from store file metadata.
795   * @param bytes compatibly serialized data. can not be null
796   * @return a setmultimap of original table to list of hfile names. will be empty if no values.
797   * @throws IllegalStateException if there are values but no table name
798   */
799  public static ImmutableSetMultimap.Builder<TableName, String> deserializeMobFileRefs(byte[] bytes)
800    throws IllegalStateException {
801    ImmutableSetMultimap.Builder<TableName, String> map = ImmutableSetMultimap.builder();
802    if (bytes.length > 1) {
803      // TODO avoid turning the tablename pieces in to strings.
804      String s = Bytes.toString(bytes);
805      String[] tables = s.split("//");
806      for (String tableEnc : tables) {
807        final int delim = tableEnc.indexOf('/');
808        if (delim <= 0) {
809          throw new IllegalStateException("MOB reference data does not match expected encoding: "
810            + "no table name included before list of mob refs.");
811        }
812        TableName table = TableName.valueOf(tableEnc.substring(0, delim));
813        String[] refs = tableEnc.substring(delim + 1).split(",");
814        map.putAll(table, refs);
815      }
816    } else {
817      if (LOG.isDebugEnabled()) {
818        // array length 1 should be the NULL_VALUE.
819        if (!Arrays.equals(HStoreFile.NULL_VALUE, bytes)) {
820          LOG.debug(
821            "Serialized MOB file refs array was treated as the placeholder 'no entries' but"
822              + " didn't have the expected placeholder byte. expected={} and actual={}",
823            Arrays.toString(HStoreFile.NULL_VALUE), Arrays.toString(bytes));
824        }
825      }
826    }
827    return map;
828  }
829}