001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import java.io.DataOutput; 021import java.io.DataOutputStream; 022import java.io.IOException; 023import java.net.InetSocketAddress; 024import java.nio.ByteBuffer; 025import java.util.ArrayList; 026import java.util.List; 027import org.apache.hadoop.conf.Configuration; 028import org.apache.hadoop.fs.FSDataOutputStream; 029import org.apache.hadoop.fs.FileSystem; 030import org.apache.hadoop.fs.Path; 031import org.apache.hadoop.fs.permission.FsPermission; 032import org.apache.hadoop.hbase.ByteBufferExtendedCell; 033import org.apache.hadoop.hbase.Cell; 034import org.apache.hadoop.hbase.CellComparator; 035import org.apache.hadoop.hbase.HConstants; 036import org.apache.hadoop.hbase.KeyValueUtil; 037import org.apache.hadoop.hbase.MetaCellComparator; 038import org.apache.hadoop.hbase.PrivateCellUtil; 039import org.apache.hadoop.hbase.io.compress.Compression; 040import org.apache.hadoop.hbase.io.crypto.Encryption; 041import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 042import org.apache.hadoop.hbase.io.hfile.HFileBlock.BlockWritable; 043import org.apache.hadoop.hbase.security.EncryptionUtil; 044import org.apache.hadoop.hbase.security.User; 045import org.apache.hadoop.hbase.util.BloomFilterWriter; 046import org.apache.hadoop.hbase.util.ByteBufferUtils; 047import org.apache.hadoop.hbase.util.Bytes; 048import org.apache.hadoop.hbase.util.CommonFSUtils; 049import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 050import org.apache.hadoop.hbase.util.FSUtils; 051import org.apache.hadoop.io.Writable; 052import org.apache.yetus.audience.InterfaceAudience; 053import org.slf4j.Logger; 054import org.slf4j.LoggerFactory; 055 056/** 057 * Common functionality needed by all versions of {@link HFile} writers. 058 */ 059@InterfaceAudience.Private 060public class HFileWriterImpl implements HFile.Writer { 061 private static final Logger LOG = LoggerFactory.getLogger(HFileWriterImpl.class); 062 063 private static final long UNSET = -1; 064 065 /** if this feature is enabled, preCalculate encoded data size before real encoding happens */ 066 public static final String UNIFIED_ENCODED_BLOCKSIZE_RATIO = 067 "hbase.writer.unified.encoded.blocksize.ratio"; 068 069 /** Block size limit after encoding, used to unify encoded block Cache entry size */ 070 private final int encodedBlockSizeLimit; 071 072 /** The Cell previously appended. Becomes the last cell in the file. */ 073 protected Cell lastCell = null; 074 075 /** FileSystem stream to write into. */ 076 protected FSDataOutputStream outputStream; 077 078 /** True if we opened the <code>outputStream</code> (and so will close it). */ 079 protected final boolean closeOutputStream; 080 081 /** A "file info" block: a key-value map of file-wide metadata. */ 082 protected HFileInfo fileInfo = new HFileInfo(); 083 084 /** Total # of key/value entries, i.e. how many times add() was called. */ 085 protected long entryCount = 0; 086 087 /** Used for calculating the average key length. */ 088 protected long totalKeyLength = 0; 089 090 /** Used for calculating the average value length. */ 091 protected long totalValueLength = 0; 092 093 /** Total uncompressed bytes, maybe calculate a compression ratio later. */ 094 protected long totalUncompressedBytes = 0; 095 096 /** Meta block names. */ 097 protected List<byte[]> metaNames = new ArrayList<>(); 098 099 /** {@link Writable}s representing meta block data. */ 100 protected List<Writable> metaData = new ArrayList<>(); 101 102 /** 103 * First cell in a block. This reference should be short-lived since we write hfiles in a burst. 104 */ 105 protected Cell firstCellInBlock = null; 106 107 /** May be null if we were passed a stream. */ 108 protected final Path path; 109 110 /** Cache configuration for caching data on write. */ 111 protected final CacheConfig cacheConf; 112 113 /** 114 * Name for this object used when logging or in toString. Is either the result of a toString on 115 * stream or else name of passed file Path. 116 */ 117 protected final String name; 118 119 /** 120 * The data block encoding which will be used. {@link NoOpDataBlockEncoder#INSTANCE} if there is 121 * no encoding. 122 */ 123 protected final HFileDataBlockEncoder blockEncoder; 124 125 protected final HFileContext hFileContext; 126 127 private int maxTagsLength = 0; 128 129 /** KeyValue version in FileInfo */ 130 public static final byte[] KEY_VALUE_VERSION = Bytes.toBytes("KEY_VALUE_VERSION"); 131 132 /** Version for KeyValue which includes memstore timestamp */ 133 public static final int KEY_VALUE_VER_WITH_MEMSTORE = 1; 134 135 /** Inline block writers for multi-level block index and compound Blooms. */ 136 private List<InlineBlockWriter> inlineBlockWriters = new ArrayList<>(); 137 138 /** block writer */ 139 protected HFileBlock.Writer blockWriter; 140 141 private HFileBlockIndex.BlockIndexWriter dataBlockIndexWriter; 142 private HFileBlockIndex.BlockIndexWriter metaBlockIndexWriter; 143 144 /** The offset of the first data block or -1 if the file is empty. */ 145 private long firstDataBlockOffset = UNSET; 146 147 /** The offset of the last data block or 0 if the file is empty. */ 148 protected long lastDataBlockOffset = UNSET; 149 150 /** 151 * The last(stop) Cell of the previous data block. This reference should be short-lived since we 152 * write hfiles in a burst. 153 */ 154 private Cell lastCellOfPreviousBlock = null; 155 156 /** Additional data items to be written to the "load-on-open" section. */ 157 private List<BlockWritable> additionalLoadOnOpenData = new ArrayList<>(); 158 159 protected long maxMemstoreTS = 0; 160 161 public HFileWriterImpl(final Configuration conf, CacheConfig cacheConf, Path path, 162 FSDataOutputStream outputStream, HFileContext fileContext) { 163 this.outputStream = outputStream; 164 this.path = path; 165 this.name = path != null ? path.getName() : outputStream.toString(); 166 this.hFileContext = fileContext; 167 DataBlockEncoding encoding = hFileContext.getDataBlockEncoding(); 168 if (encoding != DataBlockEncoding.NONE) { 169 this.blockEncoder = new HFileDataBlockEncoderImpl(encoding); 170 } else { 171 this.blockEncoder = NoOpDataBlockEncoder.INSTANCE; 172 } 173 closeOutputStream = path != null; 174 this.cacheConf = cacheConf; 175 float encodeBlockSizeRatio = conf.getFloat(UNIFIED_ENCODED_BLOCKSIZE_RATIO, 1f); 176 this.encodedBlockSizeLimit = (int) (hFileContext.getBlocksize() * encodeBlockSizeRatio); 177 finishInit(conf); 178 if (LOG.isTraceEnabled()) { 179 LOG.trace("Writer" + (path != null ? " for " + path : "") + " initialized with cacheConf: " 180 + cacheConf + " fileContext: " + fileContext); 181 } 182 } 183 184 /** 185 * Add to the file info. All added key/value pairs can be obtained using 186 * {@link HFile.Reader#getHFileInfo()}. 187 * @param k Key 188 * @param v Value 189 * @throws IOException in case the key or the value are invalid 190 */ 191 @Override 192 public void appendFileInfo(final byte[] k, final byte[] v) throws IOException { 193 fileInfo.append(k, v, true); 194 } 195 196 /** 197 * Sets the file info offset in the trailer, finishes up populating fields in the file info, and 198 * writes the file info into the given data output. The reason the data output is not always 199 * {@link #outputStream} is that we store file info as a block in version 2. 200 * @param trailer fixed file trailer 201 * @param out the data output to write the file info to 202 */ 203 protected final void writeFileInfo(FixedFileTrailer trailer, DataOutputStream out) 204 throws IOException { 205 trailer.setFileInfoOffset(outputStream.getPos()); 206 finishFileInfo(); 207 long startTime = EnvironmentEdgeManager.currentTime(); 208 fileInfo.write(out); 209 HFile.updateWriteLatency(EnvironmentEdgeManager.currentTime() - startTime); 210 } 211 212 public long getPos() throws IOException { 213 return outputStream.getPos(); 214 215 } 216 217 /** 218 * Checks that the given Cell's key does not violate the key order. 219 * @param cell Cell whose key to check. 220 * @return true if the key is duplicate 221 * @throws IOException if the key or the key order is wrong 222 */ 223 protected boolean checkKey(final Cell cell) throws IOException { 224 boolean isDuplicateKey = false; 225 226 if (cell == null) { 227 throw new IOException("Key cannot be null or empty"); 228 } 229 if (lastCell != null) { 230 int keyComp = PrivateCellUtil.compareKeyIgnoresMvcc(this.hFileContext.getCellComparator(), 231 lastCell, cell); 232 if (keyComp > 0) { 233 String message = getLexicalErrorMessage(cell); 234 throw new IOException(message); 235 } else if (keyComp == 0) { 236 isDuplicateKey = true; 237 } 238 } 239 return isDuplicateKey; 240 } 241 242 private String getLexicalErrorMessage(Cell cell) { 243 StringBuilder sb = new StringBuilder(); 244 sb.append("Added a key not lexically larger than previous. Current cell = "); 245 sb.append(cell); 246 sb.append(", lastCell = "); 247 sb.append(lastCell); 248 // file context includes HFile path and optionally table and CF of file being written 249 sb.append("fileContext="); 250 sb.append(hFileContext); 251 return sb.toString(); 252 } 253 254 /** Checks the given value for validity. */ 255 protected void checkValue(final byte[] value, final int offset, final int length) 256 throws IOException { 257 if (value == null) { 258 throw new IOException("Value cannot be null"); 259 } 260 } 261 262 /** Returns Path or null if we were passed a stream rather than a Path. */ 263 @Override 264 public Path getPath() { 265 return path; 266 } 267 268 @Override 269 public String toString() { 270 return "writer=" + (path != null ? path.toString() : null) + ", name=" + name + ", compression=" 271 + hFileContext.getCompression().getName(); 272 } 273 274 public static Compression.Algorithm compressionByName(String algoName) { 275 if (algoName == null) { 276 return HFile.DEFAULT_COMPRESSION_ALGORITHM; 277 } 278 return Compression.getCompressionAlgorithmByName(algoName); 279 } 280 281 /** A helper method to create HFile output streams in constructors */ 282 protected static FSDataOutputStream createOutputStream(Configuration conf, FileSystem fs, 283 Path path, InetSocketAddress[] favoredNodes) throws IOException { 284 FsPermission perms = CommonFSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY); 285 return FSUtils.create(conf, fs, path, perms, favoredNodes); 286 } 287 288 /** Additional initialization steps */ 289 protected void finishInit(final Configuration conf) { 290 if (blockWriter != null) { 291 throw new IllegalStateException("finishInit called twice"); 292 } 293 blockWriter = 294 new HFileBlock.Writer(conf, blockEncoder, hFileContext, cacheConf.getByteBuffAllocator()); 295 // Data block index writer 296 boolean cacheIndexesOnWrite = cacheConf.shouldCacheIndexesOnWrite(); 297 dataBlockIndexWriter = new HFileBlockIndex.BlockIndexWriter(blockWriter, 298 cacheIndexesOnWrite ? cacheConf : null, cacheIndexesOnWrite ? name : null); 299 dataBlockIndexWriter.setMaxChunkSize(HFileBlockIndex.getMaxChunkSize(conf)); 300 dataBlockIndexWriter.setMinIndexNumEntries(HFileBlockIndex.getMinIndexNumEntries(conf)); 301 inlineBlockWriters.add(dataBlockIndexWriter); 302 303 // Meta data block index writer 304 metaBlockIndexWriter = new HFileBlockIndex.BlockIndexWriter(); 305 LOG.trace("Initialized with {}", cacheConf); 306 } 307 308 /** 309 * At a block boundary, write all the inline blocks and opens new block. 310 */ 311 protected void checkBlockBoundary() throws IOException { 312 // For encoder like prefixTree, encoded size is not available, so we have to compare both 313 // encoded size and unencoded size to blocksize limit. 314 if ( 315 blockWriter.encodedBlockSizeWritten() >= encodedBlockSizeLimit 316 || blockWriter.blockSizeWritten() >= hFileContext.getBlocksize() 317 ) { 318 finishBlock(); 319 writeInlineBlocks(false); 320 newBlock(); 321 } 322 } 323 324 /** Clean up the data block that is currently being written. */ 325 private void finishBlock() throws IOException { 326 if (!blockWriter.isWriting() || blockWriter.blockSizeWritten() == 0) { 327 return; 328 } 329 330 // Update the first data block offset if UNSET; used scanning. 331 if (firstDataBlockOffset == UNSET) { 332 firstDataBlockOffset = outputStream.getPos(); 333 } 334 // Update the last data block offset each time through here. 335 lastDataBlockOffset = outputStream.getPos(); 336 blockWriter.writeHeaderAndData(outputStream); 337 int onDiskSize = blockWriter.getOnDiskSizeWithHeader(); 338 Cell indexEntry = 339 getMidpoint(this.hFileContext.getCellComparator(), lastCellOfPreviousBlock, firstCellInBlock); 340 dataBlockIndexWriter.addEntry(PrivateCellUtil.getCellKeySerializedAsKeyValueKey(indexEntry), 341 lastDataBlockOffset, onDiskSize); 342 totalUncompressedBytes += blockWriter.getUncompressedSizeWithHeader(); 343 if (cacheConf.shouldCacheDataOnWrite()) { 344 doCacheOnWrite(lastDataBlockOffset); 345 } 346 } 347 348 /** 349 * Try to return a Cell that falls between <code>left</code> and <code>right</code> but that is 350 * shorter; i.e. takes up less space. This trick is used building HFile block index. Its an 351 * optimization. It does not always work. In this case we'll just return the <code>right</code> 352 * cell. 353 * @return A cell that sorts between <code>left</code> and <code>right</code>. 354 */ 355 public static Cell getMidpoint(final CellComparator comparator, final Cell left, 356 final Cell right) { 357 if (right == null) { 358 throw new IllegalArgumentException("right cell can not be null"); 359 } 360 if (left == null) { 361 return right; 362 } 363 // If Cells from meta table, don't mess around. meta table Cells have schema 364 // (table,startrow,hash) so can't be treated as plain byte arrays. Just skip 365 // out without trying to do this optimization. 366 if (comparator instanceof MetaCellComparator) { 367 return right; 368 } 369 byte[] midRow; 370 boolean bufferBacked = 371 left instanceof ByteBufferExtendedCell && right instanceof ByteBufferExtendedCell; 372 if (bufferBacked) { 373 midRow = getMinimumMidpointArray(((ByteBufferExtendedCell) left).getRowByteBuffer(), 374 ((ByteBufferExtendedCell) left).getRowPosition(), left.getRowLength(), 375 ((ByteBufferExtendedCell) right).getRowByteBuffer(), 376 ((ByteBufferExtendedCell) right).getRowPosition(), right.getRowLength()); 377 } else { 378 midRow = getMinimumMidpointArray(left.getRowArray(), left.getRowOffset(), left.getRowLength(), 379 right.getRowArray(), right.getRowOffset(), right.getRowLength()); 380 } 381 if (midRow != null) { 382 return PrivateCellUtil.createFirstOnRow(midRow); 383 } 384 // Rows are same. Compare on families. 385 if (bufferBacked) { 386 midRow = getMinimumMidpointArray(((ByteBufferExtendedCell) left).getFamilyByteBuffer(), 387 ((ByteBufferExtendedCell) left).getFamilyPosition(), left.getFamilyLength(), 388 ((ByteBufferExtendedCell) right).getFamilyByteBuffer(), 389 ((ByteBufferExtendedCell) right).getFamilyPosition(), right.getFamilyLength()); 390 } else { 391 midRow = getMinimumMidpointArray(left.getFamilyArray(), left.getFamilyOffset(), 392 left.getFamilyLength(), right.getFamilyArray(), right.getFamilyOffset(), 393 right.getFamilyLength()); 394 } 395 if (midRow != null) { 396 return PrivateCellUtil.createFirstOnRowFamily(right, midRow, 0, midRow.length); 397 } 398 // Families are same. Compare on qualifiers. 399 if (bufferBacked) { 400 midRow = getMinimumMidpointArray(((ByteBufferExtendedCell) left).getQualifierByteBuffer(), 401 ((ByteBufferExtendedCell) left).getQualifierPosition(), left.getQualifierLength(), 402 ((ByteBufferExtendedCell) right).getQualifierByteBuffer(), 403 ((ByteBufferExtendedCell) right).getQualifierPosition(), right.getQualifierLength()); 404 } else { 405 midRow = getMinimumMidpointArray(left.getQualifierArray(), left.getQualifierOffset(), 406 left.getQualifierLength(), right.getQualifierArray(), right.getQualifierOffset(), 407 right.getQualifierLength()); 408 } 409 if (midRow != null) { 410 return PrivateCellUtil.createFirstOnRowCol(right, midRow, 0, midRow.length); 411 } 412 // No opportunity for optimization. Just return right key. 413 return right; 414 } 415 416 /** 417 * Try to get a byte array that falls between left and right as short as possible with 418 * lexicographical order; 419 * @return Return a new array that is between left and right and minimally sized else just return 420 * null if left == right. 421 */ 422 private static byte[] getMinimumMidpointArray(final byte[] leftArray, final int leftOffset, 423 final int leftLength, final byte[] rightArray, final int rightOffset, final int rightLength) { 424 int minLength = leftLength < rightLength ? leftLength : rightLength; 425 int diffIdx = 0; 426 for (; diffIdx < minLength; diffIdx++) { 427 byte leftByte = leftArray[leftOffset + diffIdx]; 428 byte rightByte = rightArray[rightOffset + diffIdx]; 429 if ((leftByte & 0xff) > (rightByte & 0xff)) { 430 throw new IllegalArgumentException("Left byte array sorts after right row; left=" 431 + Bytes.toStringBinary(leftArray, leftOffset, leftLength) + ", right=" 432 + Bytes.toStringBinary(rightArray, rightOffset, rightLength)); 433 } else if (leftByte != rightByte) { 434 break; 435 } 436 } 437 if (diffIdx == minLength) { 438 if (leftLength > rightLength) { 439 // right is prefix of left 440 throw new IllegalArgumentException("Left byte array sorts after right row; left=" 441 + Bytes.toStringBinary(leftArray, leftOffset, leftLength) + ", right=" 442 + Bytes.toStringBinary(rightArray, rightOffset, rightLength)); 443 } else if (leftLength < rightLength) { 444 // left is prefix of right. 445 byte[] minimumMidpointArray = new byte[minLength + 1]; 446 System.arraycopy(rightArray, rightOffset, minimumMidpointArray, 0, minLength + 1); 447 minimumMidpointArray[minLength] = 0x00; 448 return minimumMidpointArray; 449 } else { 450 // left == right 451 return null; 452 } 453 } 454 // Note that left[diffIdx] can never be equal to 0xff since left < right 455 byte[] minimumMidpointArray = new byte[diffIdx + 1]; 456 System.arraycopy(leftArray, leftOffset, minimumMidpointArray, 0, diffIdx + 1); 457 minimumMidpointArray[diffIdx] = (byte) (minimumMidpointArray[diffIdx] + 1); 458 return minimumMidpointArray; 459 } 460 461 /** 462 * Try to create a new byte array that falls between left and right as short as possible with 463 * lexicographical order. 464 * @return Return a new array that is between left and right and minimally sized else just return 465 * null if left == right. 466 */ 467 private static byte[] getMinimumMidpointArray(ByteBuffer left, int leftOffset, int leftLength, 468 ByteBuffer right, int rightOffset, int rightLength) { 469 int minLength = leftLength < rightLength ? leftLength : rightLength; 470 int diffIdx = 0; 471 for (; diffIdx < minLength; diffIdx++) { 472 int leftByte = ByteBufferUtils.toByte(left, leftOffset + diffIdx); 473 int rightByte = ByteBufferUtils.toByte(right, rightOffset + diffIdx); 474 if ((leftByte & 0xff) > (rightByte & 0xff)) { 475 throw new IllegalArgumentException("Left byte array sorts after right row; left=" 476 + ByteBufferUtils.toStringBinary(left, leftOffset, leftLength) + ", right=" 477 + ByteBufferUtils.toStringBinary(right, rightOffset, rightLength)); 478 } else if (leftByte != rightByte) { 479 break; 480 } 481 } 482 if (diffIdx == minLength) { 483 if (leftLength > rightLength) { 484 // right is prefix of left 485 throw new IllegalArgumentException("Left byte array sorts after right row; left=" 486 + ByteBufferUtils.toStringBinary(left, leftOffset, leftLength) + ", right=" 487 + ByteBufferUtils.toStringBinary(right, rightOffset, rightLength)); 488 } else if (leftLength < rightLength) { 489 // left is prefix of right. 490 byte[] minimumMidpointArray = new byte[minLength + 1]; 491 ByteBufferUtils.copyFromBufferToArray(minimumMidpointArray, right, rightOffset, 0, 492 minLength + 1); 493 minimumMidpointArray[minLength] = 0x00; 494 return minimumMidpointArray; 495 } else { 496 // left == right 497 return null; 498 } 499 } 500 // Note that left[diffIdx] can never be equal to 0xff since left < right 501 byte[] minimumMidpointArray = new byte[diffIdx + 1]; 502 ByteBufferUtils.copyFromBufferToArray(minimumMidpointArray, left, leftOffset, 0, diffIdx + 1); 503 minimumMidpointArray[diffIdx] = (byte) (minimumMidpointArray[diffIdx] + 1); 504 return minimumMidpointArray; 505 } 506 507 /** Gives inline block writers an opportunity to contribute blocks. */ 508 private void writeInlineBlocks(boolean closing) throws IOException { 509 for (InlineBlockWriter ibw : inlineBlockWriters) { 510 while (ibw.shouldWriteBlock(closing)) { 511 long offset = outputStream.getPos(); 512 boolean cacheThisBlock = ibw.getCacheOnWrite(); 513 ibw.writeInlineBlock(blockWriter.startWriting(ibw.getInlineBlockType())); 514 blockWriter.writeHeaderAndData(outputStream); 515 ibw.blockWritten(offset, blockWriter.getOnDiskSizeWithHeader(), 516 blockWriter.getUncompressedSizeWithoutHeader()); 517 totalUncompressedBytes += blockWriter.getUncompressedSizeWithHeader(); 518 519 if (cacheThisBlock) { 520 doCacheOnWrite(offset); 521 } 522 } 523 } 524 } 525 526 /** 527 * Caches the last written HFile block. 528 * @param offset the offset of the block we want to cache. Used to determine the cache key. 529 */ 530 private void doCacheOnWrite(long offset) { 531 cacheConf.getBlockCache().ifPresent(cache -> { 532 HFileBlock cacheFormatBlock = blockWriter.getBlockForCaching(cacheConf); 533 try { 534 cache.cacheBlock(new BlockCacheKey(name, offset, true, cacheFormatBlock.getBlockType()), 535 cacheFormatBlock); 536 } finally { 537 // refCnt will auto increase when block add to Cache, see RAMCache#putIfAbsent 538 cacheFormatBlock.release(); 539 } 540 }); 541 } 542 543 /** 544 * Ready a new block for writing. 545 */ 546 protected void newBlock() throws IOException { 547 // This is where the next block begins. 548 blockWriter.startWriting(BlockType.DATA); 549 firstCellInBlock = null; 550 if (lastCell != null) { 551 lastCellOfPreviousBlock = lastCell; 552 } 553 } 554 555 /** 556 * Add a meta block to the end of the file. Call before close(). Metadata blocks are expensive. 557 * Fill one with a bunch of serialized data rather than do a metadata block per metadata instance. 558 * If metadata is small, consider adding to file info using 559 * {@link #appendFileInfo(byte[], byte[])} n * name of the block n * will call readFields to get 560 * data later (DO NOT REUSE) 561 */ 562 @Override 563 public void appendMetaBlock(String metaBlockName, Writable content) { 564 byte[] key = Bytes.toBytes(metaBlockName); 565 int i; 566 for (i = 0; i < metaNames.size(); ++i) { 567 // stop when the current key is greater than our own 568 byte[] cur = metaNames.get(i); 569 if (Bytes.BYTES_RAWCOMPARATOR.compare(cur, 0, cur.length, key, 0, key.length) > 0) { 570 break; 571 } 572 } 573 metaNames.add(i, key); 574 metaData.add(i, content); 575 } 576 577 @Override 578 public void close() throws IOException { 579 if (outputStream == null) { 580 return; 581 } 582 // Save data block encoder metadata in the file info. 583 blockEncoder.saveMetadata(this); 584 // Write out the end of the data blocks, then write meta data blocks. 585 // followed by fileinfo, data block index and meta block index. 586 587 finishBlock(); 588 writeInlineBlocks(true); 589 590 FixedFileTrailer trailer = new FixedFileTrailer(getMajorVersion(), getMinorVersion()); 591 592 // Write out the metadata blocks if any. 593 if (!metaNames.isEmpty()) { 594 for (int i = 0; i < metaNames.size(); ++i) { 595 // store the beginning offset 596 long offset = outputStream.getPos(); 597 // write the metadata content 598 DataOutputStream dos = blockWriter.startWriting(BlockType.META); 599 metaData.get(i).write(dos); 600 601 blockWriter.writeHeaderAndData(outputStream); 602 totalUncompressedBytes += blockWriter.getUncompressedSizeWithHeader(); 603 604 // Add the new meta block to the meta index. 605 metaBlockIndexWriter.addEntry(metaNames.get(i), offset, 606 blockWriter.getOnDiskSizeWithHeader()); 607 } 608 } 609 610 // Load-on-open section. 611 612 // Data block index. 613 // 614 // In version 2, this section of the file starts with the root level data 615 // block index. We call a function that writes intermediate-level blocks 616 // first, then root level, and returns the offset of the root level block 617 // index. 618 619 long rootIndexOffset = dataBlockIndexWriter.writeIndexBlocks(outputStream); 620 trailer.setLoadOnOpenOffset(rootIndexOffset); 621 622 // Meta block index. 623 metaBlockIndexWriter.writeSingleLevelIndex(blockWriter.startWriting(BlockType.ROOT_INDEX), 624 "meta"); 625 blockWriter.writeHeaderAndData(outputStream); 626 totalUncompressedBytes += blockWriter.getUncompressedSizeWithHeader(); 627 628 if (this.hFileContext.isIncludesMvcc()) { 629 appendFileInfo(MAX_MEMSTORE_TS_KEY, Bytes.toBytes(maxMemstoreTS)); 630 appendFileInfo(KEY_VALUE_VERSION, Bytes.toBytes(KEY_VALUE_VER_WITH_MEMSTORE)); 631 } 632 633 // File info 634 writeFileInfo(trailer, blockWriter.startWriting(BlockType.FILE_INFO)); 635 blockWriter.writeHeaderAndData(outputStream); 636 totalUncompressedBytes += blockWriter.getUncompressedSizeWithHeader(); 637 638 // Load-on-open data supplied by higher levels, e.g. Bloom filters. 639 for (BlockWritable w : additionalLoadOnOpenData) { 640 blockWriter.writeBlock(w, outputStream); 641 totalUncompressedBytes += blockWriter.getUncompressedSizeWithHeader(); 642 } 643 644 // Now finish off the trailer. 645 trailer.setNumDataIndexLevels(dataBlockIndexWriter.getNumLevels()); 646 trailer.setUncompressedDataIndexSize(dataBlockIndexWriter.getTotalUncompressedSize()); 647 trailer.setFirstDataBlockOffset(firstDataBlockOffset); 648 trailer.setLastDataBlockOffset(lastDataBlockOffset); 649 trailer.setComparatorClass(this.hFileContext.getCellComparator().getClass()); 650 trailer.setDataIndexCount(dataBlockIndexWriter.getNumRootEntries()); 651 652 finishClose(trailer); 653 654 blockWriter.release(); 655 } 656 657 @Override 658 public void addInlineBlockWriter(InlineBlockWriter ibw) { 659 inlineBlockWriters.add(ibw); 660 } 661 662 @Override 663 public void addGeneralBloomFilter(final BloomFilterWriter bfw) { 664 this.addBloomFilter(bfw, BlockType.GENERAL_BLOOM_META); 665 } 666 667 @Override 668 public void addDeleteFamilyBloomFilter(final BloomFilterWriter bfw) { 669 this.addBloomFilter(bfw, BlockType.DELETE_FAMILY_BLOOM_META); 670 } 671 672 private void addBloomFilter(final BloomFilterWriter bfw, final BlockType blockType) { 673 if (bfw.getKeyCount() <= 0) { 674 return; 675 } 676 677 if ( 678 blockType != BlockType.GENERAL_BLOOM_META && blockType != BlockType.DELETE_FAMILY_BLOOM_META 679 ) { 680 throw new RuntimeException("Block Type: " + blockType.toString() + "is not supported"); 681 } 682 additionalLoadOnOpenData.add(new BlockWritable() { 683 @Override 684 public BlockType getBlockType() { 685 return blockType; 686 } 687 688 @Override 689 public void writeToBlock(DataOutput out) throws IOException { 690 bfw.getMetaWriter().write(out); 691 Writable dataWriter = bfw.getDataWriter(); 692 if (dataWriter != null) { 693 dataWriter.write(out); 694 } 695 } 696 }); 697 } 698 699 @Override 700 public HFileContext getFileContext() { 701 return hFileContext; 702 } 703 704 /** 705 * Add key/value to file. Keys must be added in an order that agrees with the Comparator passed on 706 * construction. n * Cell to add. Cannot be empty nor null. 707 */ 708 @Override 709 public void append(final Cell cell) throws IOException { 710 // checkKey uses comparator to check we are writing in order. 711 boolean dupKey = checkKey(cell); 712 if (!dupKey) { 713 checkBlockBoundary(); 714 } 715 716 if (!blockWriter.isWriting()) { 717 newBlock(); 718 } 719 720 blockWriter.write(cell); 721 722 totalKeyLength += PrivateCellUtil.estimatedSerializedSizeOfKey(cell); 723 totalValueLength += cell.getValueLength(); 724 725 // Are we the first key in this block? 726 if (firstCellInBlock == null) { 727 // If cell is big, block will be closed and this firstCellInBlock reference will only last 728 // a short while. 729 firstCellInBlock = cell; 730 } 731 732 // TODO: What if cell is 10MB and we write infrequently? We hold on to cell here indefinitely? 733 lastCell = cell; 734 entryCount++; 735 this.maxMemstoreTS = Math.max(this.maxMemstoreTS, cell.getSequenceId()); 736 int tagsLength = cell.getTagsLength(); 737 if (tagsLength > this.maxTagsLength) { 738 this.maxTagsLength = tagsLength; 739 } 740 } 741 742 @Override 743 public void beforeShipped() throws IOException { 744 this.blockWriter.beforeShipped(); 745 // Add clone methods for every cell 746 if (this.lastCell != null) { 747 this.lastCell = KeyValueUtil.toNewKeyCell(this.lastCell); 748 } 749 if (this.firstCellInBlock != null) { 750 this.firstCellInBlock = KeyValueUtil.toNewKeyCell(this.firstCellInBlock); 751 } 752 if (this.lastCellOfPreviousBlock != null) { 753 this.lastCellOfPreviousBlock = KeyValueUtil.toNewKeyCell(this.lastCellOfPreviousBlock); 754 } 755 } 756 757 public Cell getLastCell() { 758 return lastCell; 759 } 760 761 protected void finishFileInfo() throws IOException { 762 if (lastCell != null) { 763 // Make a copy. The copy is stuffed into our fileinfo map. Needs a clean 764 // byte buffer. Won't take a tuple. 765 byte[] lastKey = PrivateCellUtil.getCellKeySerializedAsKeyValueKey(this.lastCell); 766 fileInfo.append(HFileInfo.LASTKEY, lastKey, false); 767 } 768 769 // Average key length. 770 int avgKeyLen = entryCount == 0 ? 0 : (int) (totalKeyLength / entryCount); 771 fileInfo.append(HFileInfo.AVG_KEY_LEN, Bytes.toBytes(avgKeyLen), false); 772 fileInfo.append(HFileInfo.CREATE_TIME_TS, Bytes.toBytes(hFileContext.getFileCreateTime()), 773 false); 774 775 // Average value length. 776 int avgValueLen = entryCount == 0 ? 0 : (int) (totalValueLength / entryCount); 777 fileInfo.append(HFileInfo.AVG_VALUE_LEN, Bytes.toBytes(avgValueLen), false); 778 if (hFileContext.isIncludesTags()) { 779 // When tags are not being written in this file, MAX_TAGS_LEN is excluded 780 // from the FileInfo 781 fileInfo.append(HFileInfo.MAX_TAGS_LEN, Bytes.toBytes(this.maxTagsLength), false); 782 boolean tagsCompressed = (hFileContext.getDataBlockEncoding() != DataBlockEncoding.NONE) 783 && hFileContext.isCompressTags(); 784 fileInfo.append(HFileInfo.TAGS_COMPRESSED, Bytes.toBytes(tagsCompressed), false); 785 } 786 } 787 788 protected int getMajorVersion() { 789 return 3; 790 } 791 792 protected int getMinorVersion() { 793 return HFileReaderImpl.MAX_MINOR_VERSION; 794 } 795 796 protected void finishClose(FixedFileTrailer trailer) throws IOException { 797 // Write out encryption metadata before finalizing if we have a valid crypto context 798 Encryption.Context cryptoContext = hFileContext.getEncryptionContext(); 799 if (cryptoContext != Encryption.Context.NONE) { 800 // Wrap the context's key and write it as the encryption metadata, the wrapper includes 801 // all information needed for decryption 802 trailer.setEncryptionKey(EncryptionUtil.wrapKey( 803 cryptoContext.getConf(), cryptoContext.getConf() 804 .get(HConstants.CRYPTO_MASTERKEY_NAME_CONF_KEY, User.getCurrent().getShortName()), 805 cryptoContext.getKey())); 806 } 807 // Now we can finish the close 808 trailer.setMetaIndexCount(metaNames.size()); 809 trailer.setTotalUncompressedBytes(totalUncompressedBytes + trailer.getTrailerSize()); 810 trailer.setEntryCount(entryCount); 811 trailer.setCompressionCodec(hFileContext.getCompression()); 812 813 long startTime = EnvironmentEdgeManager.currentTime(); 814 trailer.serialize(outputStream); 815 HFile.updateWriteLatency(EnvironmentEdgeManager.currentTime() - startTime); 816 817 if (closeOutputStream) { 818 outputStream.close(); 819 outputStream = null; 820 } 821 } 822}