001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import java.io.ByteArrayInputStream;
021import java.io.DataInputStream;
022import java.io.DataOutputStream;
023import java.io.IOException;
024import java.io.SequenceInputStream;
025import java.security.Key;
026import java.util.ArrayList;
027import java.util.Collection;
028import java.util.Comparator;
029import java.util.List;
030import java.util.Map;
031import java.util.Objects;
032import java.util.Set;
033import java.util.SortedMap;
034import java.util.TreeMap;
035import org.apache.commons.io.IOUtils;
036import org.apache.hadoop.conf.Configuration;
037import org.apache.hadoop.fs.Path;
038import org.apache.hadoop.hbase.Cell;
039import org.apache.hadoop.hbase.CellUtil;
040import org.apache.hadoop.hbase.KeyValue;
041import org.apache.hadoop.hbase.io.crypto.Cipher;
042import org.apache.hadoop.hbase.io.crypto.Encryption;
043import org.apache.hadoop.hbase.protobuf.ProtobufMagic;
044import org.apache.hadoop.hbase.security.EncryptionUtil;
045import org.apache.hadoop.hbase.util.Bytes;
046import org.apache.yetus.audience.InterfaceAudience;
047import org.slf4j.Logger;
048import org.slf4j.LoggerFactory;
049
050import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations;
051
052import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
053import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
054import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.BytesBytesPair;
055import org.apache.hadoop.hbase.shaded.protobuf.generated.HFileProtos;
056
057/**
058 * Metadata Map of attributes for HFile written out as HFile Trailer. Created by the Writer and
059 * added to the tail of the file just before close. Metadata includes core attributes such as last
060 * key seen, comparator used writing the file, etc. Clients can add their own attributes via
061 * {@link #append(byte[], byte[], boolean)} and they'll be persisted and available at read time.
062 * Reader creates the HFileInfo on open by reading the tail of the HFile. The parse of the HFile
063 * trailer also creates a {@link HFileContext}, a read-only data structure that includes bulk of the
064 * HFileInfo and extras that is safe to pass around when working on HFiles.
065 * @see HFileContext
066 */
067@InterfaceAudience.Private
068public class HFileInfo implements SortedMap<byte[], byte[]> {
069
070  private static final Logger LOG = LoggerFactory.getLogger(HFileInfo.class);
071
072  static final String RESERVED_PREFIX = "hfile.";
073  static final byte[] RESERVED_PREFIX_BYTES = Bytes.toBytes(RESERVED_PREFIX);
074  static final byte[] LASTKEY = Bytes.toBytes(RESERVED_PREFIX + "LASTKEY");
075  static final byte[] AVG_KEY_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_KEY_LEN");
076  static final byte[] AVG_VALUE_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_VALUE_LEN");
077  static final byte[] CREATE_TIME_TS = Bytes.toBytes(RESERVED_PREFIX + "CREATE_TIME_TS");
078  static final byte[] TAGS_COMPRESSED = Bytes.toBytes(RESERVED_PREFIX + "TAGS_COMPRESSED");
079  static final byte[] KEY_OF_BIGGEST_CELL = Bytes.toBytes(RESERVED_PREFIX + "KEY_OF_BIGGEST_CELL");
080  static final byte[] LEN_OF_BIGGEST_CELL = Bytes.toBytes(RESERVED_PREFIX + "LEN_OF_BIGGEST_CELL");
081  public static final byte[] MAX_TAGS_LEN = Bytes.toBytes(RESERVED_PREFIX + "MAX_TAGS_LEN");
082  private final SortedMap<byte[], byte[]> map = new TreeMap<>(Bytes.BYTES_COMPARATOR);
083
084  /**
085   * We can read files whose major version is v2 IFF their minor version is at least 3.
086   */
087  private static final int MIN_V2_MINOR_VERSION_WITH_PB = 3;
088
089  /** Maximum minor version supported by this HFile format */
090  // We went to version 2 when we moved to pb'ing fileinfo and the trailer on
091  // the file. This version can read Writables version 1.
092  static final int MAX_MINOR_VERSION = 3;
093
094  /** Last key in the file. Filled in when we read in the file info */
095  private Cell lastKeyCell = null;
096  /** Average key length read from file info */
097  private int avgKeyLen = -1;
098  /** Average value length read from file info */
099  private int avgValueLen = -1;
100  /** Biggest Cell in the file, key only. Filled in when we read in the file info */
101  private Cell biggestCell = null;
102  /** Length of the biggest Cell */
103  private long lenOfBiggestCell = -1;
104  private boolean includesMemstoreTS = false;
105  private boolean decodeMemstoreTS = false;
106
107  /**
108   * Blocks read from the load-on-open section, excluding data root index, meta index, and file
109   * info.
110   */
111  private List<HFileBlock> loadOnOpenBlocks = new ArrayList<>();
112
113  /**
114   * The iterator will track all blocks in load-on-open section, since we use the
115   * {@link org.apache.hadoop.hbase.io.ByteBuffAllocator} to manage the ByteBuffers in block now, so
116   * we must ensure that deallocate all ByteBuffers in the end.
117   */
118  private HFileBlock.BlockIterator blockIter;
119
120  private HFileBlockIndex.CellBasedKeyBlockIndexReader dataIndexReader;
121  private HFileBlockIndex.ByteArrayKeyBlockIndexReader metaIndexReader;
122
123  private FixedFileTrailer trailer;
124  private HFileContext hfileContext;
125
126  public HFileInfo() {
127    super();
128  }
129
130  public HFileInfo(ReaderContext context, Configuration conf) throws IOException {
131    this.initTrailerAndContext(context, conf);
132  }
133
134  /**
135   * Append the given key/value pair to the file info, optionally checking the key prefix.
136   * @param k           key to add
137   * @param v           value to add
138   * @param checkPrefix whether to check that the provided key does not start with the reserved
139   *                    prefix
140   * @return this file info object
141   * @throws IOException          if the key or value is invalid
142   * @throws NullPointerException if {@code key} or {@code value} is {@code null}
143   */
144  public HFileInfo append(final byte[] k, final byte[] v, final boolean checkPrefix)
145    throws IOException {
146    Objects.requireNonNull(k, "key cannot be null");
147    Objects.requireNonNull(v, "value cannot be null");
148
149    if (checkPrefix && isReservedFileInfoKey(k)) {
150      throw new IOException("Keys with a " + HFileInfo.RESERVED_PREFIX + " are reserved");
151    }
152    put(k, v);
153    return this;
154  }
155
156  /** Return true if the given file info key is reserved for internal use. */
157  public static boolean isReservedFileInfoKey(byte[] key) {
158    return Bytes.startsWith(key, HFileInfo.RESERVED_PREFIX_BYTES);
159  }
160
161  @Override
162  public void clear() {
163    this.map.clear();
164  }
165
166  @Override
167  public Comparator<? super byte[]> comparator() {
168    return map.comparator();
169  }
170
171  @Override
172  public boolean containsKey(Object key) {
173    return map.containsKey(key);
174  }
175
176  @Override
177  public boolean containsValue(Object value) {
178    return map.containsValue(value);
179  }
180
181  @Override
182  public Set<java.util.Map.Entry<byte[], byte[]>> entrySet() {
183    return map.entrySet();
184  }
185
186  @Override
187  public boolean equals(Object o) {
188    return map.equals(o);
189  }
190
191  @Override
192  public byte[] firstKey() {
193    return map.firstKey();
194  }
195
196  @Override
197  public byte[] get(Object key) {
198    return map.get(key);
199  }
200
201  @Override
202  public int hashCode() {
203    return map.hashCode();
204  }
205
206  @Override
207  public SortedMap<byte[], byte[]> headMap(byte[] toKey) {
208    return this.map.headMap(toKey);
209  }
210
211  @Override
212  public boolean isEmpty() {
213    return map.isEmpty();
214  }
215
216  @Override
217  public Set<byte[]> keySet() {
218    return map.keySet();
219  }
220
221  @Override
222  public byte[] lastKey() {
223    return map.lastKey();
224  }
225
226  @Override
227  public byte[] put(byte[] key, byte[] value) {
228    return this.map.put(key, value);
229  }
230
231  @Override
232  public void putAll(Map<? extends byte[], ? extends byte[]> m) {
233    this.map.putAll(m);
234  }
235
236  @Override
237  public byte[] remove(Object key) {
238    return this.map.remove(key);
239  }
240
241  @Override
242  public int size() {
243    return map.size();
244  }
245
246  @Override
247  public SortedMap<byte[], byte[]> subMap(byte[] fromKey, byte[] toKey) {
248    return this.map.subMap(fromKey, toKey);
249  }
250
251  @Override
252  public SortedMap<byte[], byte[]> tailMap(byte[] fromKey) {
253    return this.map.tailMap(fromKey);
254  }
255
256  @Override
257  public Collection<byte[]> values() {
258    return map.values();
259  }
260
261  /**
262   * Write out this instance on the passed in <code>out</code> stream. We write it as a protobuf.
263   * @see #read(DataInputStream)
264   */
265  void write(final DataOutputStream out) throws IOException {
266    HFileProtos.FileInfoProto.Builder builder = HFileProtos.FileInfoProto.newBuilder();
267    for (Map.Entry<byte[], byte[]> e : this.map.entrySet()) {
268      HBaseProtos.BytesBytesPair.Builder bbpBuilder = HBaseProtos.BytesBytesPair.newBuilder();
269      bbpBuilder.setFirst(UnsafeByteOperations.unsafeWrap(e.getKey()));
270      bbpBuilder.setSecond(UnsafeByteOperations.unsafeWrap(e.getValue()));
271      builder.addMapEntry(bbpBuilder.build());
272    }
273    out.write(ProtobufMagic.PB_MAGIC);
274    builder.build().writeDelimitedTo(out);
275  }
276
277  /**
278   * Populate this instance with what we find on the passed in <code>in</code> stream. Can
279   * deserialize protobuf of old Writables format.
280   * @see #write(DataOutputStream)
281   */
282  void read(final DataInputStream in) throws IOException {
283    // This code is tested over in TestHFileReaderV1 where we read an old hfile w/ this new code.
284    int pblen = ProtobufUtil.lengthOfPBMagic();
285    byte[] pbuf = new byte[pblen];
286    if (in.markSupported()) {
287      in.mark(pblen);
288    }
289    int read = in.read(pbuf);
290    if (read != pblen) {
291      throw new IOException("read=" + read + ", wanted=" + pblen);
292    }
293    if (ProtobufUtil.isPBMagicPrefix(pbuf)) {
294      parsePB(HFileProtos.FileInfoProto.parseDelimitedFrom(in));
295    } else {
296      if (in.markSupported()) {
297        in.reset();
298        parseWritable(in);
299      } else {
300        // We cannot use BufferedInputStream, it consumes more than we read from the underlying IS
301        ByteArrayInputStream bais = new ByteArrayInputStream(pbuf);
302        SequenceInputStream sis = new SequenceInputStream(bais, in); // Concatenate input streams
303        // TODO: Am I leaking anything here wrapping the passed in stream? We are not calling
304        // close on the wrapped streams but they should be let go after we leave this context?
305        // I see that we keep a reference to the passed in inputstream but since we no longer
306        // have a reference to this after we leave, we should be ok.
307        parseWritable(new DataInputStream(sis));
308      }
309    }
310  }
311
312  /**
313   * Now parse the old Writable format. It was a list of Map entries. Each map entry was a key and a
314   * value of a byte []. The old map format had a byte before each entry that held a code which was
315   * short for the key or value type. We know it was a byte [] so in below we just read and dump it.
316   */
317  void parseWritable(final DataInputStream in) throws IOException {
318    // First clear the map.
319    // Otherwise we will just accumulate entries every time this method is called.
320    this.map.clear();
321    // Read the number of entries in the map
322    int entries = in.readInt();
323    // Then read each key/value pair
324    for (int i = 0; i < entries; i++) {
325      byte[] key = Bytes.readByteArray(in);
326      // We used to read a byte that encoded the class type.
327      // Read and ignore it because it is always byte [] in hfile
328      in.readByte();
329      byte[] value = Bytes.readByteArray(in);
330      this.map.put(key, value);
331    }
332  }
333
334  /**
335   * Fill our map with content of the pb we read off disk
336   * @param fip protobuf message to read
337   */
338  void parsePB(final HFileProtos.FileInfoProto fip) {
339    this.map.clear();
340    for (BytesBytesPair pair : fip.getMapEntryList()) {
341      this.map.put(pair.getFirst().toByteArray(), pair.getSecond().toByteArray());
342    }
343  }
344
345  public void initTrailerAndContext(ReaderContext context, Configuration conf) throws IOException {
346    try {
347      boolean isHBaseChecksum = context.getInputStreamWrapper().shouldUseHBaseChecksum();
348      trailer = FixedFileTrailer.readFromStream(
349        context.getInputStreamWrapper().getStream(isHBaseChecksum), context.getFileSize());
350      Path path = context.getFilePath();
351      checkFileVersion(path);
352      this.hfileContext = createHFileContext(path, trailer, conf);
353      context.getInputStreamWrapper().unbuffer();
354    } catch (Throwable t) {
355      IOUtils.closeQuietly(context.getInputStreamWrapper(),
356        e -> LOG.warn("failed to close input stream wrapper", e));
357      throw new CorruptHFileException(
358        "Problem reading HFile Trailer from file " + context.getFilePath(), t);
359    }
360  }
361
362  /**
363   * should be called after initTrailerAndContext
364   */
365  public void initMetaAndIndex(HFile.Reader reader) throws IOException {
366    ReaderContext context = reader.getContext();
367    try {
368      HFileBlock.FSReader blockReader = reader.getUncachedBlockReader();
369      // Initialize an block iterator, and parse load-on-open blocks in the following.
370      blockIter = blockReader.blockRange(trailer.getLoadOnOpenDataOffset(),
371        context.getFileSize() - trailer.getTrailerSize());
372      // Data index. We also read statistics about the block index written after
373      // the root level.
374      HFileBlock dataBlockRootIndex = blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX);
375      HFileBlock metaBlockIndex = blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX);
376      loadMetaInfo(blockIter, hfileContext);
377
378      HFileIndexBlockEncoder indexBlockEncoder =
379        HFileIndexBlockEncoderImpl.createFromFileInfo(this);
380      this.dataIndexReader = new HFileBlockIndex.CellBasedKeyBlockIndexReaderV2(
381        trailer.createComparator(), trailer.getNumDataIndexLevels(), indexBlockEncoder);
382      dataIndexReader.readMultiLevelIndexRoot(dataBlockRootIndex, trailer.getDataIndexCount());
383      reader.setDataBlockIndexReader(dataIndexReader);
384      // Meta index.
385      this.metaIndexReader = new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1);
386      metaIndexReader.readRootIndex(metaBlockIndex, trailer.getMetaIndexCount());
387      reader.setMetaBlockIndexReader(metaIndexReader);
388
389      reader.setDataBlockEncoder(HFileDataBlockEncoderImpl.createFromFileInfo(this));
390      // Load-On-Open info
391      HFileBlock b;
392      while ((b = blockIter.nextBlock()) != null) {
393        loadOnOpenBlocks.add(b);
394      }
395      // close the block reader
396      context.getInputStreamWrapper().unbuffer();
397    } catch (Throwable t) {
398      IOUtils.closeQuietly(context.getInputStreamWrapper(),
399        e -> LOG.warn("failed to close input stream wrapper", e));
400      throw new CorruptHFileException(
401        "Problem reading data index and meta index from file " + context.getFilePath(), t);
402    }
403  }
404
405  private HFileContext createHFileContext(Path path, FixedFileTrailer trailer, Configuration conf)
406    throws IOException {
407    HFileContextBuilder builder = new HFileContextBuilder().withHBaseCheckSum(true)
408      .withHFileName(path.getName()).withCompression(trailer.getCompressionCodec())
409      .withCellComparator(FixedFileTrailer.createComparator(trailer.getComparatorClassName()));
410    // Check for any key material available
411    byte[] keyBytes = trailer.getEncryptionKey();
412    if (keyBytes != null) {
413      Encryption.Context cryptoContext = Encryption.newContext(conf);
414      Key key = EncryptionUtil.unwrapKey(conf, keyBytes);
415      // Use the algorithm the key wants
416      Cipher cipher = Encryption.getCipher(conf, key.getAlgorithm());
417      if (cipher == null) {
418        throw new IOException(
419          "Cipher '" + key.getAlgorithm() + "' is not available" + ", path=" + path);
420      }
421      cryptoContext.setCipher(cipher);
422      cryptoContext.setKey(key);
423      builder.withEncryptionContext(cryptoContext);
424    }
425    HFileContext context = builder.build();
426    return context;
427  }
428
429  private void loadMetaInfo(HFileBlock.BlockIterator blockIter, HFileContext hfileContext)
430    throws IOException {
431    read(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream());
432    byte[] creationTimeBytes = get(HFileInfo.CREATE_TIME_TS);
433    hfileContext.setFileCreateTime(creationTimeBytes == null ? 0 : Bytes.toLong(creationTimeBytes));
434    byte[] tmp = get(HFileInfo.MAX_TAGS_LEN);
435    // max tag length is not present in the HFile means tags were not at all written to file.
436    if (tmp != null) {
437      hfileContext.setIncludesTags(true);
438      tmp = get(HFileInfo.TAGS_COMPRESSED);
439      if (tmp != null && Bytes.toBoolean(tmp)) {
440        hfileContext.setCompressTags(true);
441      }
442    }
443    // parse meta info
444    if (get(HFileInfo.LASTKEY) != null) {
445      lastKeyCell = new KeyValue.KeyOnlyKeyValue(get(HFileInfo.LASTKEY));
446    }
447    if (get(HFileInfo.KEY_OF_BIGGEST_CELL) != null) {
448      biggestCell = new KeyValue.KeyOnlyKeyValue(get(HFileInfo.KEY_OF_BIGGEST_CELL));
449      lenOfBiggestCell = Bytes.toLong(get(HFileInfo.LEN_OF_BIGGEST_CELL));
450    }
451    avgKeyLen = Bytes.toInt(get(HFileInfo.AVG_KEY_LEN));
452    avgValueLen = Bytes.toInt(get(HFileInfo.AVG_VALUE_LEN));
453    byte[] keyValueFormatVersion = get(HFileWriterImpl.KEY_VALUE_VERSION);
454    includesMemstoreTS = keyValueFormatVersion != null
455      && Bytes.toInt(keyValueFormatVersion) == HFileWriterImpl.KEY_VALUE_VER_WITH_MEMSTORE;
456    hfileContext.setIncludesMvcc(includesMemstoreTS);
457    if (includesMemstoreTS) {
458      decodeMemstoreTS = Bytes.toLong(get(HFileWriterImpl.MAX_MEMSTORE_TS_KEY)) > 0;
459    }
460  }
461
462  /**
463   * File version check is a little sloppy. We read v3 files but can also read v2 files if their
464   * content has been pb'd; files written with 0.98.
465   */
466  private void checkFileVersion(Path path) {
467    int majorVersion = trailer.getMajorVersion();
468    if (majorVersion == getMajorVersion()) {
469      return;
470    }
471    int minorVersion = trailer.getMinorVersion();
472    if (majorVersion == 2 && minorVersion >= MIN_V2_MINOR_VERSION_WITH_PB) {
473      return;
474    }
475    // We can read v3 or v2 versions of hfile.
476    throw new IllegalArgumentException("Invalid HFile version: major=" + trailer.getMajorVersion()
477      + ", minor=" + trailer.getMinorVersion() + ": expected at least " + "major=2 and minor="
478      + MAX_MINOR_VERSION + ", path=" + path);
479  }
480
481  public void close() {
482    if (blockIter != null) {
483      blockIter.freeBlocks();
484    }
485  }
486
487  public int getMajorVersion() {
488    return 3;
489  }
490
491  public void setTrailer(FixedFileTrailer trailer) {
492    this.trailer = trailer;
493  }
494
495  public FixedFileTrailer getTrailer() {
496    return this.trailer;
497  }
498
499  public HFileBlockIndex.CellBasedKeyBlockIndexReader getDataBlockIndexReader() {
500    return this.dataIndexReader;
501  }
502
503  public HFileBlockIndex.ByteArrayKeyBlockIndexReader getMetaBlockIndexReader() {
504    return this.metaIndexReader;
505  }
506
507  public HFileContext getHFileContext() {
508    return this.hfileContext;
509  }
510
511  public List<HFileBlock> getLoadOnOpenBlocks() {
512    return loadOnOpenBlocks;
513  }
514
515  public Cell getLastKeyCell() {
516    return lastKeyCell;
517  }
518
519  public int getAvgKeyLen() {
520    return avgKeyLen;
521  }
522
523  public int getAvgValueLen() {
524    return avgValueLen;
525  }
526
527  public String getKeyOfBiggestCell() {
528    return CellUtil.toString(biggestCell, false);
529  }
530
531  public long getLenOfBiggestCell() {
532    return lenOfBiggestCell;
533  }
534
535  public boolean shouldIncludeMemStoreTS() {
536    return includesMemstoreTS;
537  }
538
539  public boolean isDecodeMemstoreTS() {
540    return decodeMemstoreTS;
541  }
542}