001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.mob; 019 020import static org.apache.hadoop.hbase.mob.MobConstants.DEFAULT_MOB_CLEANER_BATCH_SIZE_UPPER_BOUND; 021import static org.apache.hadoop.hbase.mob.MobConstants.MOB_CLEANER_BATCH_SIZE_UPPER_BOUND; 022 023import java.io.FileNotFoundException; 024import java.io.IOException; 025import java.nio.ByteBuffer; 026import java.text.ParseException; 027import java.text.SimpleDateFormat; 028import java.util.ArrayList; 029import java.util.Arrays; 030import java.util.Calendar; 031import java.util.Collection; 032import java.util.Date; 033import java.util.List; 034import java.util.Optional; 035import java.util.UUID; 036import java.util.function.Consumer; 037import org.apache.hadoop.conf.Configuration; 038import org.apache.hadoop.fs.FileStatus; 039import org.apache.hadoop.fs.FileSystem; 040import org.apache.hadoop.fs.Path; 041import org.apache.hadoop.hbase.Cell; 042import org.apache.hadoop.hbase.HConstants; 043import org.apache.hadoop.hbase.PrivateCellUtil; 044import org.apache.hadoop.hbase.TableName; 045import org.apache.hadoop.hbase.Tag; 046import org.apache.hadoop.hbase.TagType; 047import org.apache.hadoop.hbase.TagUtil; 048import org.apache.hadoop.hbase.backup.HFileArchiver; 049import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 050import org.apache.hadoop.hbase.client.RegionInfo; 051import org.apache.hadoop.hbase.client.RegionInfoBuilder; 052import org.apache.hadoop.hbase.client.Scan; 053import org.apache.hadoop.hbase.client.TableDescriptor; 054import org.apache.hadoop.hbase.io.HFileLink; 055import org.apache.hadoop.hbase.io.compress.Compression; 056import org.apache.hadoop.hbase.io.crypto.Encryption; 057import org.apache.hadoop.hbase.io.hfile.CacheConfig; 058import org.apache.hadoop.hbase.io.hfile.HFile; 059import org.apache.hadoop.hbase.io.hfile.HFileContext; 060import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; 061import org.apache.hadoop.hbase.regionserver.BloomType; 062import org.apache.hadoop.hbase.regionserver.HStoreFile; 063import org.apache.hadoop.hbase.regionserver.StoreFileWriter; 064import org.apache.hadoop.hbase.regionserver.StoreUtils; 065import org.apache.hadoop.hbase.util.Bytes; 066import org.apache.hadoop.hbase.util.ChecksumType; 067import org.apache.hadoop.hbase.util.CommonFSUtils; 068import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 069import org.apache.yetus.audience.InterfaceAudience; 070import org.slf4j.Logger; 071import org.slf4j.LoggerFactory; 072 073import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSetMultimap; 074import org.apache.hbase.thirdparty.com.google.common.collect.SetMultimap; 075 076/** 077 * The mob utilities 078 */ 079@InterfaceAudience.Private 080public final class MobUtils { 081 082 private static final Logger LOG = LoggerFactory.getLogger(MobUtils.class); 083 public static final String SEP = "_"; 084 085 private static final ThreadLocal<SimpleDateFormat> LOCAL_FORMAT = 086 new ThreadLocal<SimpleDateFormat>() { 087 @Override 088 protected SimpleDateFormat initialValue() { 089 return new SimpleDateFormat("yyyyMMdd"); 090 } 091 }; 092 093 /** 094 * Private constructor to keep this class from being instantiated. 095 */ 096 private MobUtils() { 097 } 098 099 /** 100 * Formats a date to a string. 101 * @param date The date. 102 * @return The string format of the date, it's yyyymmdd. 103 */ 104 public static String formatDate(Date date) { 105 return LOCAL_FORMAT.get().format(date); 106 } 107 108 /** 109 * Parses the string to a date. 110 * @param dateString The string format of a date, it's yyyymmdd. 111 * @return A date. 112 */ 113 public static Date parseDate(String dateString) throws ParseException { 114 return LOCAL_FORMAT.get().parse(dateString); 115 } 116 117 /** 118 * Whether the current cell is a mob reference cell. 119 * @param cell The current cell. 120 * @return True if the cell has a mob reference tag, false if it doesn't. 121 */ 122 public static boolean isMobReferenceCell(Cell cell) { 123 if (cell.getTagsLength() > 0) { 124 Optional<Tag> tag = PrivateCellUtil.getTag(cell, TagType.MOB_REFERENCE_TAG_TYPE); 125 if (tag.isPresent()) { 126 return true; 127 } 128 } 129 return false; 130 } 131 132 /** 133 * Gets the table name tag. 134 * @param cell The current cell. 135 * @return The table name tag. 136 */ 137 private static Optional<Tag> getTableNameTag(Cell cell) { 138 Optional<Tag> tag = Optional.empty(); 139 if (cell.getTagsLength() > 0) { 140 tag = PrivateCellUtil.getTag(cell, TagType.MOB_TABLE_NAME_TAG_TYPE); 141 } 142 return tag; 143 } 144 145 /** 146 * Gets the table name from when this cell was written into a mob hfile as a string. 147 * @param cell to extract tag from 148 * @return table name as a string. empty if the tag is not found. 149 */ 150 public static Optional<String> getTableNameString(Cell cell) { 151 Optional<Tag> tag = getTableNameTag(cell); 152 Optional<String> name = Optional.empty(); 153 if (tag.isPresent()) { 154 name = Optional.of(Tag.getValueAsString(tag.get())); 155 } 156 return name; 157 } 158 159 /** 160 * Get the table name from when this cell was written into a mob hfile as a TableName. 161 * @param cell to extract tag from 162 * @return name of table as a TableName. empty if the tag is not found. 163 */ 164 public static Optional<TableName> getTableName(Cell cell) { 165 Optional<Tag> maybe = getTableNameTag(cell); 166 Optional<TableName> name = Optional.empty(); 167 if (maybe.isPresent()) { 168 final Tag tag = maybe.get(); 169 if (tag.hasArray()) { 170 name = Optional 171 .of(TableName.valueOf(tag.getValueArray(), tag.getValueOffset(), tag.getValueLength())); 172 } else { 173 // TODO ByteBuffer handling in tags looks busted. revisit. 174 ByteBuffer buffer = tag.getValueByteBuffer().duplicate(); 175 buffer.mark(); 176 buffer.position(tag.getValueOffset()); 177 buffer.limit(tag.getValueOffset() + tag.getValueLength()); 178 name = Optional.of(TableName.valueOf(buffer)); 179 } 180 } 181 return name; 182 } 183 184 /** 185 * Whether the tag list has a mob reference tag. 186 * @param tags The tag list. 187 * @return True if the list has a mob reference tag, false if it doesn't. 188 */ 189 public static boolean hasMobReferenceTag(List<Tag> tags) { 190 if (!tags.isEmpty()) { 191 for (Tag tag : tags) { 192 if (tag.getType() == TagType.MOB_REFERENCE_TAG_TYPE) { 193 return true; 194 } 195 } 196 } 197 return false; 198 } 199 200 /** 201 * Indicates whether it's a raw scan. The information is set in the attribute "hbase.mob.scan.raw" 202 * of scan. For a mob cell, in a normal scan the scanners retrieves the mob cell from the mob 203 * file. In a raw scan, the scanner directly returns cell in HBase without retrieve the one in the 204 * mob file. 205 * @param scan The current scan. 206 * @return True if it's a raw scan. 207 */ 208 public static boolean isRawMobScan(Scan scan) { 209 byte[] raw = scan.getAttribute(MobConstants.MOB_SCAN_RAW); 210 try { 211 return raw != null && Bytes.toBoolean(raw); 212 } catch (IllegalArgumentException e) { 213 return false; 214 } 215 } 216 217 /** 218 * Indicates whether it's a reference only scan. The information is set in the attribute 219 * "hbase.mob.scan.ref.only" of scan. If it's a ref only scan, only the cells with ref tag are 220 * returned. 221 * @param scan The current scan. 222 * @return True if it's a ref only scan. 223 */ 224 public static boolean isRefOnlyScan(Scan scan) { 225 byte[] refOnly = scan.getAttribute(MobConstants.MOB_SCAN_REF_ONLY); 226 try { 227 return refOnly != null && Bytes.toBoolean(refOnly); 228 } catch (IllegalArgumentException e) { 229 return false; 230 } 231 } 232 233 /** 234 * Indicates whether the scan contains the information of caching blocks. The information is set 235 * in the attribute "hbase.mob.cache.blocks" of scan. 236 * @param scan The current scan. 237 * @return True when the Scan attribute specifies to cache the MOB blocks. 238 */ 239 public static boolean isCacheMobBlocks(Scan scan) { 240 byte[] cache = scan.getAttribute(MobConstants.MOB_CACHE_BLOCKS); 241 try { 242 return cache != null && Bytes.toBoolean(cache); 243 } catch (IllegalArgumentException e) { 244 return false; 245 } 246 } 247 248 /** 249 * Sets the attribute of caching blocks in the scan. 250 * @param scan The current scan. 251 * @param cacheBlocks True, set the attribute of caching blocks into the scan, the scanner with 252 * this scan caches blocks. False, the scanner doesn't cache blocks for this 253 * scan. 254 */ 255 public static void setCacheMobBlocks(Scan scan, boolean cacheBlocks) { 256 scan.setAttribute(MobConstants.MOB_CACHE_BLOCKS, Bytes.toBytes(cacheBlocks)); 257 } 258 259 /** 260 * Cleans the expired mob files. Cleans the files whose creation date is older than (current - 261 * columnFamily.ttl), and the minVersions of that column family is 0. 262 * @param fs The current file system. 263 * @param conf The current configuration. 264 * @param tableName The current table name. 265 * @param columnDescriptor The descriptor of the current column family. 266 * @param cacheConfig The cacheConfig that disables the block cache. 267 * @param current The current time. 268 */ 269 public static void cleanExpiredMobFiles(FileSystem fs, Configuration conf, TableName tableName, 270 ColumnFamilyDescriptor columnDescriptor, CacheConfig cacheConfig, long current) 271 throws IOException { 272 long timeToLive = columnDescriptor.getTimeToLive(); 273 if (Integer.MAX_VALUE == timeToLive) { 274 // no need to clean, because the TTL is not set. 275 return; 276 } 277 278 Calendar calendar = Calendar.getInstance(); 279 calendar.setTimeInMillis(current - timeToLive * 1000); 280 calendar.set(Calendar.HOUR_OF_DAY, 0); 281 calendar.set(Calendar.MINUTE, 0); 282 calendar.set(Calendar.SECOND, 0); 283 284 Date expireDate = calendar.getTime(); 285 286 LOG.info("MOB HFiles older than " + expireDate.toGMTString() + " will be deleted!"); 287 288 FileStatus[] stats = null; 289 Path mobTableDir = CommonFSUtils.getTableDir(getMobHome(conf), tableName); 290 Path path = getMobFamilyPath(conf, tableName, columnDescriptor.getNameAsString()); 291 try { 292 stats = fs.listStatus(path); 293 } catch (FileNotFoundException e) { 294 LOG.warn("Failed to find the mob file " + path, e); 295 } 296 if (null == stats) { 297 // no file found 298 return; 299 } 300 List<HStoreFile> filesToClean = new ArrayList<>(); 301 int deletedFileCount = 0; 302 for (FileStatus file : stats) { 303 String fileName = file.getPath().getName(); 304 try { 305 if (HFileLink.isHFileLink(file.getPath())) { 306 HFileLink hfileLink = HFileLink.buildFromHFileLinkPattern(conf, file.getPath()); 307 fileName = hfileLink.getOriginPath().getName(); 308 } 309 310 Date fileDate = parseDate(MobFileName.getDateFromName(fileName)); 311 312 if (LOG.isDebugEnabled()) { 313 LOG.debug("Checking file {}", fileName); 314 } 315 if (fileDate.getTime() < expireDate.getTime()) { 316 if (LOG.isDebugEnabled()) { 317 LOG.debug("{} is an expired file", fileName); 318 } 319 filesToClean 320 .add(new HStoreFile(fs, file.getPath(), conf, cacheConfig, BloomType.NONE, true)); 321 if ( 322 filesToClean.size() >= conf.getInt(MOB_CLEANER_BATCH_SIZE_UPPER_BOUND, 323 DEFAULT_MOB_CLEANER_BATCH_SIZE_UPPER_BOUND) 324 ) { 325 if ( 326 removeMobFiles(conf, fs, tableName, mobTableDir, columnDescriptor.getName(), 327 filesToClean) 328 ) { 329 deletedFileCount += filesToClean.size(); 330 } 331 filesToClean.clear(); 332 } 333 } 334 } catch (Exception e) { 335 LOG.error("Cannot parse the fileName " + fileName, e); 336 } 337 } 338 if ( 339 !filesToClean.isEmpty() && removeMobFiles(conf, fs, tableName, mobTableDir, 340 columnDescriptor.getName(), filesToClean) 341 ) { 342 deletedFileCount += filesToClean.size(); 343 } 344 LOG.info("Table {} {} expired mob files in total are deleted", tableName, deletedFileCount); 345 } 346 347 /** 348 * Gets the root dir of the mob files. It's {HBASE_DIR}/mobdir. 349 * @param conf The current configuration. 350 * @return the root dir of the mob file. 351 */ 352 public static Path getMobHome(Configuration conf) { 353 Path hbaseDir = new Path(conf.get(HConstants.HBASE_DIR)); 354 return getMobHome(hbaseDir); 355 } 356 357 /** 358 * Gets the root dir of the mob files under the qualified HBase root dir. It's {rootDir}/mobdir. 359 * @param rootDir The qualified path of HBase root directory. 360 * @return The root dir of the mob file. 361 */ 362 public static Path getMobHome(Path rootDir) { 363 return new Path(rootDir, MobConstants.MOB_DIR_NAME); 364 } 365 366 /** 367 * Gets the qualified root dir of the mob files. 368 * @param conf The current configuration. 369 * @return The qualified root dir. 370 */ 371 public static Path getQualifiedMobRootDir(Configuration conf) throws IOException { 372 Path hbaseDir = new Path(conf.get(HConstants.HBASE_DIR)); 373 Path mobRootDir = new Path(hbaseDir, MobConstants.MOB_DIR_NAME); 374 FileSystem fs = mobRootDir.getFileSystem(conf); 375 return mobRootDir.makeQualified(fs.getUri(), fs.getWorkingDirectory()); 376 } 377 378 /** 379 * Gets the table dir of the mob files under the qualified HBase root dir. It's 380 * {rootDir}/mobdir/data/${namespace}/${tableName} 381 * @param rootDir The qualified path of HBase root directory. 382 * @param tableName The name of table. 383 * @return The table dir of the mob file. 384 */ 385 public static Path getMobTableDir(Path rootDir, TableName tableName) { 386 return CommonFSUtils.getTableDir(getMobHome(rootDir), tableName); 387 } 388 389 /** 390 * Gets the region dir of the mob files. It's 391 * {HBASE_DIR}/mobdir/data/{namespace}/{tableName}/{regionEncodedName}. 392 * @param conf The current configuration. 393 * @param tableName The current table name. 394 * @return The region dir of the mob files. 395 */ 396 public static Path getMobRegionPath(Configuration conf, TableName tableName) { 397 return getMobRegionPath(new Path(conf.get(HConstants.HBASE_DIR)), tableName); 398 } 399 400 /** 401 * Gets the region dir of the mob files under the specified root dir. It's 402 * {rootDir}/mobdir/data/{namespace}/{tableName}/{regionEncodedName}. 403 * @param rootDir The qualified path of HBase root directory. 404 * @param tableName The current table name. 405 * @return The region dir of the mob files. 406 */ 407 public static Path getMobRegionPath(Path rootDir, TableName tableName) { 408 Path tablePath = CommonFSUtils.getTableDir(getMobHome(rootDir), tableName); 409 RegionInfo regionInfo = getMobRegionInfo(tableName); 410 return new Path(tablePath, regionInfo.getEncodedName()); 411 } 412 413 /** 414 * Gets the family dir of the mob files. It's 415 * {HBASE_DIR}/mobdir/{namespace}/{tableName}/{regionEncodedName}/{columnFamilyName}. 416 * @param conf The current configuration. 417 * @param tableName The current table name. 418 * @param familyName The current family name. 419 * @return The family dir of the mob files. 420 */ 421 public static Path getMobFamilyPath(Configuration conf, TableName tableName, String familyName) { 422 return new Path(getMobRegionPath(conf, tableName), familyName); 423 } 424 425 /** 426 * Gets the family dir of the mob files. It's 427 * {HBASE_DIR}/mobdir/{namespace}/{tableName}/{regionEncodedName}/{columnFamilyName}. 428 * @param regionPath The path of mob region which is a dummy one. 429 * @param familyName The current family name. 430 * @return The family dir of the mob files. 431 */ 432 public static Path getMobFamilyPath(Path regionPath, String familyName) { 433 return new Path(regionPath, familyName); 434 } 435 436 /** 437 * Gets the RegionInfo of the mob files. This is a dummy region. The mob files are not saved in a 438 * region in HBase. It's internally used only. 439 */ 440 public static RegionInfo getMobRegionInfo(TableName tableName) { 441 return RegionInfoBuilder.newBuilder(tableName).setStartKey(MobConstants.MOB_REGION_NAME_BYTES) 442 .setEndKey(HConstants.EMPTY_END_ROW).setSplit(false).setRegionId(0).build(); 443 } 444 445 /** 446 * Gets whether the current RegionInfo is a mob one. 447 * @param regionInfo The current RegionInfo. 448 * @return If true, the current RegionInfo is a mob one. 449 */ 450 public static boolean isMobRegionInfo(RegionInfo regionInfo) { 451 return regionInfo == null 452 ? false 453 : getMobRegionInfo(regionInfo.getTable()).getEncodedName() 454 .equals(regionInfo.getEncodedName()); 455 } 456 457 /** 458 * Gets whether the current region name follows the pattern of a mob region name. 459 * @param tableName The current table name. 460 * @param regionName The current region name. 461 * @return True if the current region name follows the pattern of a mob region name. 462 */ 463 public static boolean isMobRegionName(TableName tableName, byte[] regionName) { 464 return Bytes.equals(regionName, getMobRegionInfo(tableName).getRegionName()); 465 } 466 467 /** 468 * Archives the mob files. 469 * @param conf The current configuration. 470 * @param fs The current file system. 471 * @param tableName The table name. 472 * @param tableDir The table directory. 473 * @param family The name of the column family. 474 * @param storeFiles The files to be deleted. 475 */ 476 public static boolean removeMobFiles(Configuration conf, FileSystem fs, TableName tableName, 477 Path tableDir, byte[] family, Collection<HStoreFile> storeFiles) { 478 try { 479 HFileArchiver.archiveStoreFiles(conf, fs, getMobRegionInfo(tableName), tableDir, family, 480 storeFiles); 481 LOG.info("Table {} {} expired mob files are deleted", tableName, storeFiles.size()); 482 return true; 483 } catch (IOException e) { 484 LOG.error("Failed to delete the mob files, table {}", tableName, e); 485 } 486 return false; 487 } 488 489 /** 490 * Creates a mob reference KeyValue. The value of the mob reference KeyValue is mobCellValueSize + 491 * mobFileName. 492 * @param cell The original Cell. 493 * @param fileName The mob file name where the mob reference KeyValue is written. 494 * @param tableNameTag The tag of the current table name. It's very important in cloning the 495 * snapshot. 496 * @return The mob reference KeyValue. 497 */ 498 public static Cell createMobRefCell(Cell cell, byte[] fileName, Tag tableNameTag) { 499 // Append the tags to the KeyValue. 500 // The key is same, the value is the filename of the mob file 501 List<Tag> tags = new ArrayList<>(); 502 // Add the ref tag as the 1st one. 503 tags.add(MobConstants.MOB_REF_TAG); 504 // Add the tag of the source table name, this table is where this mob file is flushed 505 // from. 506 // It's very useful in cloning the snapshot. When reading from the cloning table, we need to 507 // find the original mob files by this table name. For details please see cloning 508 // snapshot for mob files. 509 tags.add(tableNameTag); 510 return createMobRefCell(cell, fileName, TagUtil.fromList(tags)); 511 } 512 513 public static Cell createMobRefCell(Cell cell, byte[] fileName, byte[] refCellTags) { 514 byte[] refValue = Bytes.add(Bytes.toBytes(cell.getValueLength()), fileName); 515 return PrivateCellUtil.createCell(cell, refValue, TagUtil.concatTags(refCellTags, cell)); 516 } 517 518 /** 519 * Creates a writer for the mob file in temp directory. 520 * @param conf The current configuration. 521 * @param fs The current file system. 522 * @param family The descriptor of the current column family. 523 * @param date The date string, its format is yyyymmmdd. 524 * @param basePath The basic path for a temp directory. 525 * @param maxKeyCount The key count. 526 * @param compression The compression algorithm. 527 * @param startKey The hex string of the start key. 528 * @param cacheConfig The current cache config. 529 * @param cryptoContext The encryption context. 530 * @param isCompaction If the writer is used in compaction. 531 * @return The writer for the mob file. 532 */ 533 public static StoreFileWriter createWriter(Configuration conf, FileSystem fs, 534 ColumnFamilyDescriptor family, String date, Path basePath, long maxKeyCount, 535 Compression.Algorithm compression, String startKey, CacheConfig cacheConfig, 536 Encryption.Context cryptoContext, boolean isCompaction, String regionName) throws IOException { 537 MobFileName mobFileName = MobFileName.create(startKey, date, 538 UUID.randomUUID().toString().replaceAll("-", ""), regionName); 539 return createWriter(conf, fs, family, mobFileName, basePath, maxKeyCount, compression, 540 cacheConfig, cryptoContext, isCompaction); 541 } 542 543 /** 544 * Creates a writer for the mob file in temp directory. 545 * @param conf The current configuration. 546 * @param fs The current file system. 547 * @param family The descriptor of the current column family. 548 * @param mobFileName The mob file name. 549 * @param basePath The basic path for a temp directory. 550 * @param maxKeyCount The key count. 551 * @param compression The compression algorithm. 552 * @param cacheConfig The current cache config. 553 * @param cryptoContext The encryption context. 554 * @param isCompaction If the writer is used in compaction. 555 * @return The writer for the mob file. 556 */ 557 public static StoreFileWriter createWriter(Configuration conf, FileSystem fs, 558 ColumnFamilyDescriptor family, MobFileName mobFileName, Path basePath, long maxKeyCount, 559 Compression.Algorithm compression, CacheConfig cacheConfig, Encryption.Context cryptoContext, 560 boolean isCompaction) throws IOException { 561 return createWriter(conf, fs, family, new Path(basePath, mobFileName.getFileName()), 562 maxKeyCount, compression, cacheConfig, cryptoContext, StoreUtils.getChecksumType(conf), 563 StoreUtils.getBytesPerChecksum(conf), family.getBlocksize(), BloomType.NONE, isCompaction); 564 } 565 566 /** 567 * Creates a writer for the mob file in temp directory. 568 * @param conf The current configuration. 569 * @param fs The current file system. 570 * @param family The descriptor of the current column family. 571 * @param path The path for a temp directory. 572 * @param maxKeyCount The key count. 573 * @param compression The compression algorithm. 574 * @param cacheConfig The current cache config. 575 * @param cryptoContext The encryption context. 576 * @param checksumType The checksum type. 577 * @param bytesPerChecksum The bytes per checksum. 578 * @param blocksize The HFile block size. 579 * @param bloomType The bloom filter type. 580 * @param isCompaction If the writer is used in compaction. 581 * @return The writer for the mob file. 582 */ 583 public static StoreFileWriter createWriter(Configuration conf, FileSystem fs, 584 ColumnFamilyDescriptor family, Path path, long maxKeyCount, Compression.Algorithm compression, 585 CacheConfig cacheConfig, Encryption.Context cryptoContext, ChecksumType checksumType, 586 int bytesPerChecksum, int blocksize, BloomType bloomType, boolean isCompaction) 587 throws IOException { 588 return createWriter(conf, fs, family, path, maxKeyCount, compression, cacheConfig, 589 cryptoContext, checksumType, bytesPerChecksum, blocksize, bloomType, isCompaction, null); 590 } 591 592 /** 593 * Creates a writer for the mob file in temp directory. 594 * @param conf The current configuration. 595 * @param fs The current file system. 596 * @param family The descriptor of the current column family. 597 * @param path The path for a temp directory. 598 * @param maxKeyCount The key count. 599 * @param compression The compression algorithm. 600 * @param cacheConfig The current cache config. 601 * @param cryptoContext The encryption context. 602 * @param checksumType The checksum type. 603 * @param bytesPerChecksum The bytes per checksum. 604 * @param blocksize The HFile block size. 605 * @param bloomType The bloom filter type. 606 * @param isCompaction If the writer is used in compaction. 607 * @param writerCreationTracker to track the current writer in the store 608 * @return The writer for the mob file. 609 */ 610 public static StoreFileWriter createWriter(Configuration conf, FileSystem fs, 611 ColumnFamilyDescriptor family, Path path, long maxKeyCount, Compression.Algorithm compression, 612 CacheConfig cacheConfig, Encryption.Context cryptoContext, ChecksumType checksumType, 613 int bytesPerChecksum, int blocksize, BloomType bloomType, boolean isCompaction, 614 Consumer<Path> writerCreationTracker) throws IOException { 615 if (compression == null) { 616 compression = HFile.DEFAULT_COMPRESSION_ALGORITHM; 617 } 618 final CacheConfig writerCacheConf; 619 if (isCompaction) { 620 writerCacheConf = new CacheConfig(cacheConfig); 621 writerCacheConf.setCacheDataOnWrite(false); 622 } else { 623 writerCacheConf = cacheConfig; 624 } 625 HFileContext hFileContext = new HFileContextBuilder().withCompression(compression) 626 .withIncludesMvcc(true).withIncludesTags(true).withCompressTags(family.isCompressTags()) 627 .withChecksumType(checksumType).withBytesPerCheckSum(bytesPerChecksum) 628 .withBlockSize(blocksize).withHBaseCheckSum(true) 629 .withDataBlockEncoding(family.getDataBlockEncoding()).withEncryptionContext(cryptoContext) 630 .withCreateTime(EnvironmentEdgeManager.currentTime()).build(); 631 632 StoreFileWriter w = new StoreFileWriter.Builder(conf, writerCacheConf, fs).withFilePath(path) 633 .withBloomType(bloomType).withMaxKeyCount(maxKeyCount).withFileContext(hFileContext) 634 .withWriterCreationTracker(writerCreationTracker).build(); 635 return w; 636 } 637 638 /** 639 * Indicates whether the current mob ref cell has a valid value. A mob ref cell has a mob 640 * reference tag. The value of a mob ref cell consists of two parts, real mob value length and mob 641 * file name. The real mob value length takes 4 bytes. The remaining part is the mob file name. 642 * @param cell The mob ref cell. 643 * @return True if the cell has a valid value. 644 */ 645 public static boolean hasValidMobRefCellValue(Cell cell) { 646 return cell.getValueLength() > Bytes.SIZEOF_INT; 647 } 648 649 /** 650 * Gets the mob value length from the mob ref cell. A mob ref cell has a mob reference tag. The 651 * value of a mob ref cell consists of two parts, real mob value length and mob file name. The 652 * real mob value length takes 4 bytes. The remaining part is the mob file name. 653 * @param cell The mob ref cell. 654 * @return The real mob value length. 655 */ 656 public static int getMobValueLength(Cell cell) { 657 return PrivateCellUtil.getValueAsInt(cell); 658 } 659 660 /** 661 * Gets the mob file name from the mob ref cell. A mob ref cell has a mob reference tag. The value 662 * of a mob ref cell consists of two parts, real mob value length and mob file name. The real mob 663 * value length takes 4 bytes. The remaining part is the mob file name. 664 * @param cell The mob ref cell. 665 * @return The mob file name. 666 */ 667 public static String getMobFileName(Cell cell) { 668 return Bytes.toString(cell.getValueArray(), cell.getValueOffset() + Bytes.SIZEOF_INT, 669 cell.getValueLength() - Bytes.SIZEOF_INT); 670 } 671 672 /** 673 * Checks whether this table has mob-enabled columns. 674 * @param htd The current table descriptor. 675 * @return Whether this table has mob-enabled columns. 676 */ 677 public static boolean hasMobColumns(TableDescriptor htd) { 678 ColumnFamilyDescriptor[] hcds = htd.getColumnFamilies(); 679 for (ColumnFamilyDescriptor hcd : hcds) { 680 if (hcd.isMobEnabled()) { 681 return true; 682 } 683 } 684 return false; 685 } 686 687 /** 688 * Get list of Mob column families (if any exists) 689 * @param htd table descriptor 690 * @return list of Mob column families 691 */ 692 public static List<ColumnFamilyDescriptor> getMobColumnFamilies(TableDescriptor htd) { 693 694 List<ColumnFamilyDescriptor> fams = new ArrayList<ColumnFamilyDescriptor>(); 695 ColumnFamilyDescriptor[] hcds = htd.getColumnFamilies(); 696 for (ColumnFamilyDescriptor hcd : hcds) { 697 if (hcd.isMobEnabled()) { 698 fams.add(hcd); 699 } 700 } 701 return fams; 702 } 703 704 /** 705 * Indicates whether return null value when the mob file is missing or corrupt. The information is 706 * set in the attribute "empty.value.on.mobcell.miss" of scan. 707 * @param scan The current scan. 708 * @return True if the readEmptyValueOnMobCellMiss is enabled. 709 */ 710 public static boolean isReadEmptyValueOnMobCellMiss(Scan scan) { 711 byte[] readEmptyValueOnMobCellMiss = 712 scan.getAttribute(MobConstants.EMPTY_VALUE_ON_MOBCELL_MISS); 713 try { 714 return readEmptyValueOnMobCellMiss != null && Bytes.toBoolean(readEmptyValueOnMobCellMiss); 715 } catch (IllegalArgumentException e) { 716 return false; 717 } 718 } 719 720 /** 721 * Checks if the mob file is expired. 722 * @param column The descriptor of the current column family. 723 * @param current The current time. 724 * @param fileDate The date string parsed from the mob file name. 725 * @return True if the mob file is expired. 726 */ 727 public static boolean isMobFileExpired(ColumnFamilyDescriptor column, long current, 728 String fileDate) { 729 if (column.getMinVersions() > 0) { 730 return false; 731 } 732 long timeToLive = column.getTimeToLive(); 733 if (Integer.MAX_VALUE == timeToLive) { 734 return false; 735 } 736 737 Date expireDate = new Date(current - timeToLive * 1000); 738 expireDate = new Date(expireDate.getYear(), expireDate.getMonth(), expireDate.getDate()); 739 try { 740 Date date = parseDate(fileDate); 741 if (date.getTime() < expireDate.getTime()) { 742 return true; 743 } 744 } catch (ParseException e) { 745 LOG.warn("Failed to parse the date " + fileDate, e); 746 return false; 747 } 748 return false; 749 } 750 751 /** 752 * Serialize a set of referenced mob hfiles 753 * @param mobRefSet to serialize, may be null 754 * @return byte array to i.e. put into store file metadata. will not be null 755 */ 756 public static byte[] serializeMobFileRefs(SetMultimap<TableName, String> mobRefSet) { 757 if (mobRefSet != null && mobRefSet.size() > 0) { 758 // Here we rely on the fact that '/' and ',' are not allowed in either table names nor hfile 759 // names for serialization. 760 // 761 // exampleTable/filename1,filename2//example:table/filename5//otherTable/filename3,filename4 762 // 763 // to approximate the needed capacity we use the fact that there will usually be 1 table name 764 // and each mob filename is around 105 bytes. we pick an arbitrary number to cover "most" 765 // single table name lengths 766 StringBuilder sb = new StringBuilder(100 + mobRefSet.size() * 105); 767 boolean doubleSlash = false; 768 for (TableName tableName : mobRefSet.keySet()) { 769 if (doubleSlash) { 770 sb.append("//"); 771 } else { 772 doubleSlash = true; 773 } 774 sb.append(tableName).append("/"); 775 boolean comma = false; 776 for (String refs : mobRefSet.get(tableName)) { 777 if (comma) { 778 sb.append(","); 779 } else { 780 comma = true; 781 } 782 sb.append(refs); 783 } 784 } 785 return Bytes.toBytes(sb.toString()); 786 } else { 787 return HStoreFile.NULL_VALUE; 788 } 789 } 790 791 /** 792 * Deserialize the set of referenced mob hfiles from store file metadata. 793 * @param bytes compatibly serialized data. can not be null 794 * @return a setmultimap of original table to list of hfile names. will be empty if no values. 795 * @throws IllegalStateException if there are values but no table name 796 */ 797 public static ImmutableSetMultimap.Builder<TableName, String> deserializeMobFileRefs(byte[] bytes) 798 throws IllegalStateException { 799 ImmutableSetMultimap.Builder<TableName, String> map = ImmutableSetMultimap.builder(); 800 if (bytes.length > 1) { 801 // TODO avoid turning the tablename pieces in to strings. 802 String s = Bytes.toString(bytes); 803 String[] tables = s.split("//"); 804 for (String tableEnc : tables) { 805 final int delim = tableEnc.indexOf('/'); 806 if (delim <= 0) { 807 throw new IllegalStateException("MOB reference data does not match expected encoding: " 808 + "no table name included before list of mob refs."); 809 } 810 TableName table = TableName.valueOf(tableEnc.substring(0, delim)); 811 String[] refs = tableEnc.substring(delim + 1).split(","); 812 map.putAll(table, refs); 813 } 814 } else { 815 if (LOG.isDebugEnabled()) { 816 // array length 1 should be the NULL_VALUE. 817 if (!Arrays.equals(HStoreFile.NULL_VALUE, bytes)) { 818 LOG.debug( 819 "Serialized MOB file refs array was treated as the placeholder 'no entries' but" 820 + " didn't have the expected placeholder byte. expected={} and actual={}", 821 Arrays.toString(HStoreFile.NULL_VALUE), Arrays.toString(bytes)); 822 } 823 } 824 825 } 826 return map; 827 } 828 829}