001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.mob;
019
020import static org.apache.hadoop.hbase.mob.MobConstants.DEFAULT_MOB_CLEANER_BATCH_SIZE_UPPER_BOUND;
021import static org.apache.hadoop.hbase.mob.MobConstants.MOB_CLEANER_BATCH_SIZE_UPPER_BOUND;
022
023import java.io.FileNotFoundException;
024import java.io.IOException;
025import java.nio.ByteBuffer;
026import java.text.ParseException;
027import java.text.SimpleDateFormat;
028import java.util.ArrayList;
029import java.util.Arrays;
030import java.util.Calendar;
031import java.util.Collection;
032import java.util.Date;
033import java.util.List;
034import java.util.Optional;
035import java.util.UUID;
036import java.util.function.Consumer;
037import org.apache.hadoop.conf.Configuration;
038import org.apache.hadoop.fs.FileStatus;
039import org.apache.hadoop.fs.FileSystem;
040import org.apache.hadoop.fs.Path;
041import org.apache.hadoop.hbase.Cell;
042import org.apache.hadoop.hbase.HConstants;
043import org.apache.hadoop.hbase.PrivateCellUtil;
044import org.apache.hadoop.hbase.TableName;
045import org.apache.hadoop.hbase.Tag;
046import org.apache.hadoop.hbase.TagType;
047import org.apache.hadoop.hbase.TagUtil;
048import org.apache.hadoop.hbase.backup.HFileArchiver;
049import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
050import org.apache.hadoop.hbase.client.RegionInfo;
051import org.apache.hadoop.hbase.client.RegionInfoBuilder;
052import org.apache.hadoop.hbase.client.Scan;
053import org.apache.hadoop.hbase.client.TableDescriptor;
054import org.apache.hadoop.hbase.io.HFileLink;
055import org.apache.hadoop.hbase.io.compress.Compression;
056import org.apache.hadoop.hbase.io.crypto.Encryption;
057import org.apache.hadoop.hbase.io.hfile.CacheConfig;
058import org.apache.hadoop.hbase.io.hfile.HFile;
059import org.apache.hadoop.hbase.io.hfile.HFileContext;
060import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
061import org.apache.hadoop.hbase.regionserver.BloomType;
062import org.apache.hadoop.hbase.regionserver.HStoreFile;
063import org.apache.hadoop.hbase.regionserver.StoreFileWriter;
064import org.apache.hadoop.hbase.regionserver.StoreUtils;
065import org.apache.hadoop.hbase.util.Bytes;
066import org.apache.hadoop.hbase.util.ChecksumType;
067import org.apache.hadoop.hbase.util.CommonFSUtils;
068import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
069import org.apache.yetus.audience.InterfaceAudience;
070import org.slf4j.Logger;
071import org.slf4j.LoggerFactory;
072
073import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSetMultimap;
074import org.apache.hbase.thirdparty.com.google.common.collect.SetMultimap;
075
076/**
077 * The mob utilities
078 */
079@InterfaceAudience.Private
080public final class MobUtils {
081
082  private static final Logger LOG = LoggerFactory.getLogger(MobUtils.class);
083  public static final String SEP = "_";
084
085  private static final ThreadLocal<SimpleDateFormat> LOCAL_FORMAT =
086    new ThreadLocal<SimpleDateFormat>() {
087      @Override
088      protected SimpleDateFormat initialValue() {
089        return new SimpleDateFormat("yyyyMMdd");
090      }
091    };
092
093  /**
094   * Private constructor to keep this class from being instantiated.
095   */
096  private MobUtils() {
097  }
098
099  /**
100   * Formats a date to a string.
101   * @param date The date.
102   * @return The string format of the date, it's yyyymmdd.
103   */
104  public static String formatDate(Date date) {
105    return LOCAL_FORMAT.get().format(date);
106  }
107
108  /**
109   * Parses the string to a date.
110   * @param dateString The string format of a date, it's yyyymmdd.
111   * @return A date.
112   */
113  public static Date parseDate(String dateString) throws ParseException {
114    return LOCAL_FORMAT.get().parse(dateString);
115  }
116
117  /**
118   * Whether the current cell is a mob reference cell.
119   * @param cell The current cell.
120   * @return True if the cell has a mob reference tag, false if it doesn't.
121   */
122  public static boolean isMobReferenceCell(Cell cell) {
123    if (cell.getTagsLength() > 0) {
124      Optional<Tag> tag = PrivateCellUtil.getTag(cell, TagType.MOB_REFERENCE_TAG_TYPE);
125      if (tag.isPresent()) {
126        return true;
127      }
128    }
129    return false;
130  }
131
132  /**
133   * Gets the table name tag.
134   * @param cell The current cell.
135   * @return The table name tag.
136   */
137  private static Optional<Tag> getTableNameTag(Cell cell) {
138    Optional<Tag> tag = Optional.empty();
139    if (cell.getTagsLength() > 0) {
140      tag = PrivateCellUtil.getTag(cell, TagType.MOB_TABLE_NAME_TAG_TYPE);
141    }
142    return tag;
143  }
144
145  /**
146   * Gets the table name from when this cell was written into a mob hfile as a string.
147   * @param cell to extract tag from
148   * @return table name as a string. empty if the tag is not found.
149   */
150  public static Optional<String> getTableNameString(Cell cell) {
151    Optional<Tag> tag = getTableNameTag(cell);
152    Optional<String> name = Optional.empty();
153    if (tag.isPresent()) {
154      name = Optional.of(Tag.getValueAsString(tag.get()));
155    }
156    return name;
157  }
158
159  /**
160   * Get the table name from when this cell was written into a mob hfile as a TableName.
161   * @param cell to extract tag from
162   * @return name of table as a TableName. empty if the tag is not found.
163   */
164  public static Optional<TableName> getTableName(Cell cell) {
165    Optional<Tag> maybe = getTableNameTag(cell);
166    Optional<TableName> name = Optional.empty();
167    if (maybe.isPresent()) {
168      final Tag tag = maybe.get();
169      if (tag.hasArray()) {
170        name = Optional
171          .of(TableName.valueOf(tag.getValueArray(), tag.getValueOffset(), tag.getValueLength()));
172      } else {
173        // TODO ByteBuffer handling in tags looks busted. revisit.
174        ByteBuffer buffer = tag.getValueByteBuffer().duplicate();
175        buffer.mark();
176        buffer.position(tag.getValueOffset());
177        buffer.limit(tag.getValueOffset() + tag.getValueLength());
178        name = Optional.of(TableName.valueOf(buffer));
179      }
180    }
181    return name;
182  }
183
184  /**
185   * Whether the tag list has a mob reference tag.
186   * @param tags The tag list.
187   * @return True if the list has a mob reference tag, false if it doesn't.
188   */
189  public static boolean hasMobReferenceTag(List<Tag> tags) {
190    if (!tags.isEmpty()) {
191      for (Tag tag : tags) {
192        if (tag.getType() == TagType.MOB_REFERENCE_TAG_TYPE) {
193          return true;
194        }
195      }
196    }
197    return false;
198  }
199
200  /**
201   * Indicates whether it's a raw scan. The information is set in the attribute "hbase.mob.scan.raw"
202   * of scan. For a mob cell, in a normal scan the scanners retrieves the mob cell from the mob
203   * file. In a raw scan, the scanner directly returns cell in HBase without retrieve the one in the
204   * mob file.
205   * @param scan The current scan.
206   * @return True if it's a raw scan.
207   */
208  public static boolean isRawMobScan(Scan scan) {
209    byte[] raw = scan.getAttribute(MobConstants.MOB_SCAN_RAW);
210    try {
211      return raw != null && Bytes.toBoolean(raw);
212    } catch (IllegalArgumentException e) {
213      return false;
214    }
215  }
216
217  /**
218   * Indicates whether it's a reference only scan. The information is set in the attribute
219   * "hbase.mob.scan.ref.only" of scan. If it's a ref only scan, only the cells with ref tag are
220   * returned.
221   * @param scan The current scan.
222   * @return True if it's a ref only scan.
223   */
224  public static boolean isRefOnlyScan(Scan scan) {
225    byte[] refOnly = scan.getAttribute(MobConstants.MOB_SCAN_REF_ONLY);
226    try {
227      return refOnly != null && Bytes.toBoolean(refOnly);
228    } catch (IllegalArgumentException e) {
229      return false;
230    }
231  }
232
233  /**
234   * Indicates whether the scan contains the information of caching blocks. The information is set
235   * in the attribute "hbase.mob.cache.blocks" of scan.
236   * @param scan The current scan.
237   * @return True when the Scan attribute specifies to cache the MOB blocks.
238   */
239  public static boolean isCacheMobBlocks(Scan scan) {
240    byte[] cache = scan.getAttribute(MobConstants.MOB_CACHE_BLOCKS);
241    try {
242      return cache != null && Bytes.toBoolean(cache);
243    } catch (IllegalArgumentException e) {
244      return false;
245    }
246  }
247
248  /**
249   * Sets the attribute of caching blocks in the scan.
250   * @param scan        The current scan.
251   * @param cacheBlocks True, set the attribute of caching blocks into the scan, the scanner with
252   *                    this scan caches blocks. False, the scanner doesn't cache blocks for this
253   *                    scan.
254   */
255  public static void setCacheMobBlocks(Scan scan, boolean cacheBlocks) {
256    scan.setAttribute(MobConstants.MOB_CACHE_BLOCKS, Bytes.toBytes(cacheBlocks));
257  }
258
259  /**
260   * Cleans the expired mob files. Cleans the files whose creation date is older than (current -
261   * columnFamily.ttl), and the minVersions of that column family is 0.
262   * @param fs               The current file system.
263   * @param conf             The current configuration.
264   * @param tableName        The current table name.
265   * @param columnDescriptor The descriptor of the current column family.
266   * @param cacheConfig      The cacheConfig that disables the block cache.
267   * @param current          The current time.
268   */
269  public static void cleanExpiredMobFiles(FileSystem fs, Configuration conf, TableName tableName,
270    ColumnFamilyDescriptor columnDescriptor, CacheConfig cacheConfig, long current)
271    throws IOException {
272    long timeToLive = columnDescriptor.getTimeToLive();
273    if (Integer.MAX_VALUE == timeToLive) {
274      // no need to clean, because the TTL is not set.
275      return;
276    }
277
278    Calendar calendar = Calendar.getInstance();
279    calendar.setTimeInMillis(current - timeToLive * 1000);
280    calendar.set(Calendar.HOUR_OF_DAY, 0);
281    calendar.set(Calendar.MINUTE, 0);
282    calendar.set(Calendar.SECOND, 0);
283
284    Date expireDate = calendar.getTime();
285
286    LOG.info("MOB HFiles older than " + expireDate.toGMTString() + " will be deleted!");
287
288    FileStatus[] stats = null;
289    Path mobTableDir = CommonFSUtils.getTableDir(getMobHome(conf), tableName);
290    Path path = getMobFamilyPath(conf, tableName, columnDescriptor.getNameAsString());
291    try {
292      stats = fs.listStatus(path);
293    } catch (FileNotFoundException e) {
294      LOG.warn("Failed to find the mob file " + path, e);
295    }
296    if (null == stats) {
297      // no file found
298      return;
299    }
300    List<HStoreFile> filesToClean = new ArrayList<>();
301    int deletedFileCount = 0;
302    for (FileStatus file : stats) {
303      String fileName = file.getPath().getName();
304      try {
305        if (HFileLink.isHFileLink(file.getPath())) {
306          HFileLink hfileLink = HFileLink.buildFromHFileLinkPattern(conf, file.getPath());
307          fileName = hfileLink.getOriginPath().getName();
308        }
309
310        Date fileDate = parseDate(MobFileName.getDateFromName(fileName));
311
312        if (LOG.isDebugEnabled()) {
313          LOG.debug("Checking file {}", fileName);
314        }
315        if (fileDate.getTime() < expireDate.getTime()) {
316          if (LOG.isDebugEnabled()) {
317            LOG.debug("{} is an expired file", fileName);
318          }
319          filesToClean
320            .add(new HStoreFile(fs, file.getPath(), conf, cacheConfig, BloomType.NONE, true));
321          if (
322            filesToClean.size() >= conf.getInt(MOB_CLEANER_BATCH_SIZE_UPPER_BOUND,
323              DEFAULT_MOB_CLEANER_BATCH_SIZE_UPPER_BOUND)
324          ) {
325            if (
326              removeMobFiles(conf, fs, tableName, mobTableDir, columnDescriptor.getName(),
327                filesToClean)
328            ) {
329              deletedFileCount += filesToClean.size();
330            }
331            filesToClean.clear();
332          }
333        }
334      } catch (Exception e) {
335        LOG.error("Cannot parse the fileName " + fileName, e);
336      }
337    }
338    if (
339      !filesToClean.isEmpty() && removeMobFiles(conf, fs, tableName, mobTableDir,
340        columnDescriptor.getName(), filesToClean)
341    ) {
342      deletedFileCount += filesToClean.size();
343    }
344    LOG.info("Table {} {} expired mob files in total are deleted", tableName, deletedFileCount);
345  }
346
347  /**
348   * Gets the root dir of the mob files. It's {HBASE_DIR}/mobdir.
349   * @param conf The current configuration.
350   * @return the root dir of the mob file.
351   */
352  public static Path getMobHome(Configuration conf) {
353    Path hbaseDir = new Path(conf.get(HConstants.HBASE_DIR));
354    return getMobHome(hbaseDir);
355  }
356
357  /**
358   * Gets the root dir of the mob files under the qualified HBase root dir. It's {rootDir}/mobdir.
359   * @param rootDir The qualified path of HBase root directory.
360   * @return The root dir of the mob file.
361   */
362  public static Path getMobHome(Path rootDir) {
363    return new Path(rootDir, MobConstants.MOB_DIR_NAME);
364  }
365
366  /**
367   * Gets the qualified root dir of the mob files.
368   * @param conf The current configuration.
369   * @return The qualified root dir.
370   */
371  public static Path getQualifiedMobRootDir(Configuration conf) throws IOException {
372    Path hbaseDir = new Path(conf.get(HConstants.HBASE_DIR));
373    Path mobRootDir = new Path(hbaseDir, MobConstants.MOB_DIR_NAME);
374    FileSystem fs = mobRootDir.getFileSystem(conf);
375    return mobRootDir.makeQualified(fs.getUri(), fs.getWorkingDirectory());
376  }
377
378  /**
379   * Gets the table dir of the mob files under the qualified HBase root dir. It's
380   * {rootDir}/mobdir/data/${namespace}/${tableName}
381   * @param rootDir   The qualified path of HBase root directory.
382   * @param tableName The name of table.
383   * @return The table dir of the mob file.
384   */
385  public static Path getMobTableDir(Path rootDir, TableName tableName) {
386    return CommonFSUtils.getTableDir(getMobHome(rootDir), tableName);
387  }
388
389  /**
390   * Gets the region dir of the mob files. It's
391   * {HBASE_DIR}/mobdir/data/{namespace}/{tableName}/{regionEncodedName}.
392   * @param conf      The current configuration.
393   * @param tableName The current table name.
394   * @return The region dir of the mob files.
395   */
396  public static Path getMobRegionPath(Configuration conf, TableName tableName) {
397    return getMobRegionPath(new Path(conf.get(HConstants.HBASE_DIR)), tableName);
398  }
399
400  /**
401   * Gets the region dir of the mob files under the specified root dir. It's
402   * {rootDir}/mobdir/data/{namespace}/{tableName}/{regionEncodedName}.
403   * @param rootDir   The qualified path of HBase root directory.
404   * @param tableName The current table name.
405   * @return The region dir of the mob files.
406   */
407  public static Path getMobRegionPath(Path rootDir, TableName tableName) {
408    Path tablePath = CommonFSUtils.getTableDir(getMobHome(rootDir), tableName);
409    RegionInfo regionInfo = getMobRegionInfo(tableName);
410    return new Path(tablePath, regionInfo.getEncodedName());
411  }
412
413  /**
414   * Gets the family dir of the mob files. It's
415   * {HBASE_DIR}/mobdir/{namespace}/{tableName}/{regionEncodedName}/{columnFamilyName}.
416   * @param conf       The current configuration.
417   * @param tableName  The current table name.
418   * @param familyName The current family name.
419   * @return The family dir of the mob files.
420   */
421  public static Path getMobFamilyPath(Configuration conf, TableName tableName, String familyName) {
422    return new Path(getMobRegionPath(conf, tableName), familyName);
423  }
424
425  /**
426   * Gets the family dir of the mob files. It's
427   * {HBASE_DIR}/mobdir/{namespace}/{tableName}/{regionEncodedName}/{columnFamilyName}.
428   * @param regionPath The path of mob region which is a dummy one.
429   * @param familyName The current family name.
430   * @return The family dir of the mob files.
431   */
432  public static Path getMobFamilyPath(Path regionPath, String familyName) {
433    return new Path(regionPath, familyName);
434  }
435
436  /**
437   * Gets the RegionInfo of the mob files. This is a dummy region. The mob files are not saved in a
438   * region in HBase. It's internally used only.
439   */
440  public static RegionInfo getMobRegionInfo(TableName tableName) {
441    return RegionInfoBuilder.newBuilder(tableName).setStartKey(MobConstants.MOB_REGION_NAME_BYTES)
442      .setEndKey(HConstants.EMPTY_END_ROW).setSplit(false).setRegionId(0).build();
443  }
444
445  /**
446   * Gets whether the current RegionInfo is a mob one.
447   * @param regionInfo The current RegionInfo.
448   * @return If true, the current RegionInfo is a mob one.
449   */
450  public static boolean isMobRegionInfo(RegionInfo regionInfo) {
451    return regionInfo == null
452      ? false
453      : getMobRegionInfo(regionInfo.getTable()).getEncodedName()
454        .equals(regionInfo.getEncodedName());
455  }
456
457  /**
458   * Gets whether the current region name follows the pattern of a mob region name.
459   * @param tableName  The current table name.
460   * @param regionName The current region name.
461   * @return True if the current region name follows the pattern of a mob region name.
462   */
463  public static boolean isMobRegionName(TableName tableName, byte[] regionName) {
464    return Bytes.equals(regionName, getMobRegionInfo(tableName).getRegionName());
465  }
466
467  /**
468   * Archives the mob files.
469   * @param conf       The current configuration.
470   * @param fs         The current file system.
471   * @param tableName  The table name.
472   * @param tableDir   The table directory.
473   * @param family     The name of the column family.
474   * @param storeFiles The files to be deleted.
475   */
476  public static boolean removeMobFiles(Configuration conf, FileSystem fs, TableName tableName,
477    Path tableDir, byte[] family, Collection<HStoreFile> storeFiles) {
478    try {
479      HFileArchiver.archiveStoreFiles(conf, fs, getMobRegionInfo(tableName), tableDir, family,
480        storeFiles);
481      LOG.info("Table {} {} expired mob files are deleted", tableName, storeFiles.size());
482      return true;
483    } catch (IOException e) {
484      LOG.error("Failed to delete the mob files, table {}", tableName, e);
485    }
486    return false;
487  }
488
489  /**
490   * Creates a mob reference KeyValue. The value of the mob reference KeyValue is mobCellValueSize +
491   * mobFileName.
492   * @param cell         The original Cell.
493   * @param fileName     The mob file name where the mob reference KeyValue is written.
494   * @param tableNameTag The tag of the current table name. It's very important in cloning the
495   *                     snapshot.
496   * @return The mob reference KeyValue.
497   */
498  public static Cell createMobRefCell(Cell cell, byte[] fileName, Tag tableNameTag) {
499    // Append the tags to the KeyValue.
500    // The key is same, the value is the filename of the mob file
501    List<Tag> tags = new ArrayList<>();
502    // Add the ref tag as the 1st one.
503    tags.add(MobConstants.MOB_REF_TAG);
504    // Add the tag of the source table name, this table is where this mob file is flushed
505    // from.
506    // It's very useful in cloning the snapshot. When reading from the cloning table, we need to
507    // find the original mob files by this table name. For details please see cloning
508    // snapshot for mob files.
509    tags.add(tableNameTag);
510    return createMobRefCell(cell, fileName, TagUtil.fromList(tags));
511  }
512
513  public static Cell createMobRefCell(Cell cell, byte[] fileName, byte[] refCellTags) {
514    byte[] refValue = Bytes.add(Bytes.toBytes(cell.getValueLength()), fileName);
515    return PrivateCellUtil.createCell(cell, refValue, TagUtil.concatTags(refCellTags, cell));
516  }
517
518  /**
519   * Creates a writer for the mob file in temp directory.
520   * @param conf          The current configuration.
521   * @param fs            The current file system.
522   * @param family        The descriptor of the current column family.
523   * @param date          The date string, its format is yyyymmmdd.
524   * @param basePath      The basic path for a temp directory.
525   * @param maxKeyCount   The key count.
526   * @param compression   The compression algorithm.
527   * @param startKey      The hex string of the start key.
528   * @param cacheConfig   The current cache config.
529   * @param cryptoContext The encryption context.
530   * @param isCompaction  If the writer is used in compaction.
531   * @return The writer for the mob file.
532   */
533  public static StoreFileWriter createWriter(Configuration conf, FileSystem fs,
534    ColumnFamilyDescriptor family, String date, Path basePath, long maxKeyCount,
535    Compression.Algorithm compression, String startKey, CacheConfig cacheConfig,
536    Encryption.Context cryptoContext, boolean isCompaction, String regionName) throws IOException {
537    MobFileName mobFileName = MobFileName.create(startKey, date,
538      UUID.randomUUID().toString().replaceAll("-", ""), regionName);
539    return createWriter(conf, fs, family, mobFileName, basePath, maxKeyCount, compression,
540      cacheConfig, cryptoContext, isCompaction);
541  }
542
543  /**
544   * Creates a writer for the mob file in temp directory.
545   * @param conf          The current configuration.
546   * @param fs            The current file system.
547   * @param family        The descriptor of the current column family.
548   * @param mobFileName   The mob file name.
549   * @param basePath      The basic path for a temp directory.
550   * @param maxKeyCount   The key count.
551   * @param compression   The compression algorithm.
552   * @param cacheConfig   The current cache config.
553   * @param cryptoContext The encryption context.
554   * @param isCompaction  If the writer is used in compaction.
555   * @return The writer for the mob file.
556   */
557  public static StoreFileWriter createWriter(Configuration conf, FileSystem fs,
558    ColumnFamilyDescriptor family, MobFileName mobFileName, Path basePath, long maxKeyCount,
559    Compression.Algorithm compression, CacheConfig cacheConfig, Encryption.Context cryptoContext,
560    boolean isCompaction) throws IOException {
561    return createWriter(conf, fs, family, new Path(basePath, mobFileName.getFileName()),
562      maxKeyCount, compression, cacheConfig, cryptoContext, StoreUtils.getChecksumType(conf),
563      StoreUtils.getBytesPerChecksum(conf), family.getBlocksize(), BloomType.NONE, isCompaction);
564  }
565
566  /**
567   * Creates a writer for the mob file in temp directory.
568   * @param conf             The current configuration.
569   * @param fs               The current file system.
570   * @param family           The descriptor of the current column family.
571   * @param path             The path for a temp directory.
572   * @param maxKeyCount      The key count.
573   * @param compression      The compression algorithm.
574   * @param cacheConfig      The current cache config.
575   * @param cryptoContext    The encryption context.
576   * @param checksumType     The checksum type.
577   * @param bytesPerChecksum The bytes per checksum.
578   * @param blocksize        The HFile block size.
579   * @param bloomType        The bloom filter type.
580   * @param isCompaction     If the writer is used in compaction.
581   * @return The writer for the mob file.
582   */
583  public static StoreFileWriter createWriter(Configuration conf, FileSystem fs,
584    ColumnFamilyDescriptor family, Path path, long maxKeyCount, Compression.Algorithm compression,
585    CacheConfig cacheConfig, Encryption.Context cryptoContext, ChecksumType checksumType,
586    int bytesPerChecksum, int blocksize, BloomType bloomType, boolean isCompaction)
587    throws IOException {
588    return createWriter(conf, fs, family, path, maxKeyCount, compression, cacheConfig,
589      cryptoContext, checksumType, bytesPerChecksum, blocksize, bloomType, isCompaction, null);
590  }
591
592  /**
593   * Creates a writer for the mob file in temp directory.
594   * @param conf                  The current configuration.
595   * @param fs                    The current file system.
596   * @param family                The descriptor of the current column family.
597   * @param path                  The path for a temp directory.
598   * @param maxKeyCount           The key count.
599   * @param compression           The compression algorithm.
600   * @param cacheConfig           The current cache config.
601   * @param cryptoContext         The encryption context.
602   * @param checksumType          The checksum type.
603   * @param bytesPerChecksum      The bytes per checksum.
604   * @param blocksize             The HFile block size.
605   * @param bloomType             The bloom filter type.
606   * @param isCompaction          If the writer is used in compaction.
607   * @param writerCreationTracker to track the current writer in the store
608   * @return The writer for the mob file.
609   */
610  public static StoreFileWriter createWriter(Configuration conf, FileSystem fs,
611    ColumnFamilyDescriptor family, Path path, long maxKeyCount, Compression.Algorithm compression,
612    CacheConfig cacheConfig, Encryption.Context cryptoContext, ChecksumType checksumType,
613    int bytesPerChecksum, int blocksize, BloomType bloomType, boolean isCompaction,
614    Consumer<Path> writerCreationTracker) throws IOException {
615    if (compression == null) {
616      compression = HFile.DEFAULT_COMPRESSION_ALGORITHM;
617    }
618    final CacheConfig writerCacheConf;
619    if (isCompaction) {
620      writerCacheConf = new CacheConfig(cacheConfig);
621      writerCacheConf.setCacheDataOnWrite(false);
622    } else {
623      writerCacheConf = cacheConfig;
624    }
625    HFileContext hFileContext = new HFileContextBuilder().withCompression(compression)
626      .withIncludesMvcc(true).withIncludesTags(true).withCompressTags(family.isCompressTags())
627      .withChecksumType(checksumType).withBytesPerCheckSum(bytesPerChecksum)
628      .withBlockSize(blocksize).withHBaseCheckSum(true)
629      .withDataBlockEncoding(family.getDataBlockEncoding()).withEncryptionContext(cryptoContext)
630      .withCreateTime(EnvironmentEdgeManager.currentTime()).build();
631
632    StoreFileWriter w = new StoreFileWriter.Builder(conf, writerCacheConf, fs).withFilePath(path)
633      .withBloomType(bloomType).withMaxKeyCount(maxKeyCount).withFileContext(hFileContext)
634      .withWriterCreationTracker(writerCreationTracker).build();
635    return w;
636  }
637
638  /**
639   * Indicates whether the current mob ref cell has a valid value. A mob ref cell has a mob
640   * reference tag. The value of a mob ref cell consists of two parts, real mob value length and mob
641   * file name. The real mob value length takes 4 bytes. The remaining part is the mob file name.
642   * @param cell The mob ref cell.
643   * @return True if the cell has a valid value.
644   */
645  public static boolean hasValidMobRefCellValue(Cell cell) {
646    return cell.getValueLength() > Bytes.SIZEOF_INT;
647  }
648
649  /**
650   * Gets the mob value length from the mob ref cell. A mob ref cell has a mob reference tag. The
651   * value of a mob ref cell consists of two parts, real mob value length and mob file name. The
652   * real mob value length takes 4 bytes. The remaining part is the mob file name.
653   * @param cell The mob ref cell.
654   * @return The real mob value length.
655   */
656  public static int getMobValueLength(Cell cell) {
657    return PrivateCellUtil.getValueAsInt(cell);
658  }
659
660  /**
661   * Gets the mob file name from the mob ref cell. A mob ref cell has a mob reference tag. The value
662   * of a mob ref cell consists of two parts, real mob value length and mob file name. The real mob
663   * value length takes 4 bytes. The remaining part is the mob file name.
664   * @param cell The mob ref cell.
665   * @return The mob file name.
666   */
667  public static String getMobFileName(Cell cell) {
668    return Bytes.toString(cell.getValueArray(), cell.getValueOffset() + Bytes.SIZEOF_INT,
669      cell.getValueLength() - Bytes.SIZEOF_INT);
670  }
671
672  /**
673   * Checks whether this table has mob-enabled columns.
674   * @param htd The current table descriptor.
675   * @return Whether this table has mob-enabled columns.
676   */
677  public static boolean hasMobColumns(TableDescriptor htd) {
678    ColumnFamilyDescriptor[] hcds = htd.getColumnFamilies();
679    for (ColumnFamilyDescriptor hcd : hcds) {
680      if (hcd.isMobEnabled()) {
681        return true;
682      }
683    }
684    return false;
685  }
686
687  /**
688   * Get list of Mob column families (if any exists)
689   * @param htd table descriptor
690   * @return list of Mob column families
691   */
692  public static List<ColumnFamilyDescriptor> getMobColumnFamilies(TableDescriptor htd) {
693
694    List<ColumnFamilyDescriptor> fams = new ArrayList<ColumnFamilyDescriptor>();
695    ColumnFamilyDescriptor[] hcds = htd.getColumnFamilies();
696    for (ColumnFamilyDescriptor hcd : hcds) {
697      if (hcd.isMobEnabled()) {
698        fams.add(hcd);
699      }
700    }
701    return fams;
702  }
703
704  /**
705   * Indicates whether return null value when the mob file is missing or corrupt. The information is
706   * set in the attribute "empty.value.on.mobcell.miss" of scan.
707   * @param scan The current scan.
708   * @return True if the readEmptyValueOnMobCellMiss is enabled.
709   */
710  public static boolean isReadEmptyValueOnMobCellMiss(Scan scan) {
711    byte[] readEmptyValueOnMobCellMiss =
712      scan.getAttribute(MobConstants.EMPTY_VALUE_ON_MOBCELL_MISS);
713    try {
714      return readEmptyValueOnMobCellMiss != null && Bytes.toBoolean(readEmptyValueOnMobCellMiss);
715    } catch (IllegalArgumentException e) {
716      return false;
717    }
718  }
719
720  /**
721   * Checks if the mob file is expired.
722   * @param column   The descriptor of the current column family.
723   * @param current  The current time.
724   * @param fileDate The date string parsed from the mob file name.
725   * @return True if the mob file is expired.
726   */
727  public static boolean isMobFileExpired(ColumnFamilyDescriptor column, long current,
728    String fileDate) {
729    if (column.getMinVersions() > 0) {
730      return false;
731    }
732    long timeToLive = column.getTimeToLive();
733    if (Integer.MAX_VALUE == timeToLive) {
734      return false;
735    }
736
737    Date expireDate = new Date(current - timeToLive * 1000);
738    expireDate = new Date(expireDate.getYear(), expireDate.getMonth(), expireDate.getDate());
739    try {
740      Date date = parseDate(fileDate);
741      if (date.getTime() < expireDate.getTime()) {
742        return true;
743      }
744    } catch (ParseException e) {
745      LOG.warn("Failed to parse the date " + fileDate, e);
746      return false;
747    }
748    return false;
749  }
750
751  /**
752   * Serialize a set of referenced mob hfiles
753   * @param mobRefSet to serialize, may be null
754   * @return byte array to i.e. put into store file metadata. will not be null
755   */
756  public static byte[] serializeMobFileRefs(SetMultimap<TableName, String> mobRefSet) {
757    if (mobRefSet != null && mobRefSet.size() > 0) {
758      // Here we rely on the fact that '/' and ',' are not allowed in either table names nor hfile
759      // names for serialization.
760      //
761      // exampleTable/filename1,filename2//example:table/filename5//otherTable/filename3,filename4
762      //
763      // to approximate the needed capacity we use the fact that there will usually be 1 table name
764      // and each mob filename is around 105 bytes. we pick an arbitrary number to cover "most"
765      // single table name lengths
766      StringBuilder sb = new StringBuilder(100 + mobRefSet.size() * 105);
767      boolean doubleSlash = false;
768      for (TableName tableName : mobRefSet.keySet()) {
769        if (doubleSlash) {
770          sb.append("//");
771        } else {
772          doubleSlash = true;
773        }
774        sb.append(tableName).append("/");
775        boolean comma = false;
776        for (String refs : mobRefSet.get(tableName)) {
777          if (comma) {
778            sb.append(",");
779          } else {
780            comma = true;
781          }
782          sb.append(refs);
783        }
784      }
785      return Bytes.toBytes(sb.toString());
786    } else {
787      return HStoreFile.NULL_VALUE;
788    }
789  }
790
791  /**
792   * Deserialize the set of referenced mob hfiles from store file metadata.
793   * @param bytes compatibly serialized data. can not be null
794   * @return a setmultimap of original table to list of hfile names. will be empty if no values.
795   * @throws IllegalStateException if there are values but no table name
796   */
797  public static ImmutableSetMultimap.Builder<TableName, String> deserializeMobFileRefs(byte[] bytes)
798    throws IllegalStateException {
799    ImmutableSetMultimap.Builder<TableName, String> map = ImmutableSetMultimap.builder();
800    if (bytes.length > 1) {
801      // TODO avoid turning the tablename pieces in to strings.
802      String s = Bytes.toString(bytes);
803      String[] tables = s.split("//");
804      for (String tableEnc : tables) {
805        final int delim = tableEnc.indexOf('/');
806        if (delim <= 0) {
807          throw new IllegalStateException("MOB reference data does not match expected encoding: "
808            + "no table name included before list of mob refs.");
809        }
810        TableName table = TableName.valueOf(tableEnc.substring(0, delim));
811        String[] refs = tableEnc.substring(delim + 1).split(",");
812        map.putAll(table, refs);
813      }
814    } else {
815      if (LOG.isDebugEnabled()) {
816        // array length 1 should be the NULL_VALUE.
817        if (!Arrays.equals(HStoreFile.NULL_VALUE, bytes)) {
818          LOG.debug(
819            "Serialized MOB file refs array was treated as the placeholder 'no entries' but"
820              + " didn't have the expected placeholder byte. expected={} and actual={}",
821            Arrays.toString(HStoreFile.NULL_VALUE), Arrays.toString(bytes));
822        }
823      }
824
825    }
826    return map;
827  }
828
829}