View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.io.hfile;
20  
21  import java.io.ByteArrayInputStream;
22  import java.io.ByteArrayOutputStream;
23  import java.io.DataInput;
24  import java.io.DataInputStream;
25  import java.io.DataOutputStream;
26  import java.io.IOException;
27  import java.nio.ByteBuffer;
28  
29  import org.apache.hadoop.hbase.util.ByteStringer;
30  import org.apache.hadoop.hbase.classification.InterfaceAudience;
31  import org.apache.hadoop.fs.FSDataInputStream;
32  import org.apache.hadoop.hbase.KeyValue;
33  import org.apache.hadoop.hbase.KeyValue.KVComparator;
34  import org.apache.hadoop.hbase.io.compress.Compression;
35  import org.apache.hadoop.hbase.protobuf.generated.HFileProtos;
36  import org.apache.hadoop.hbase.util.Bytes;
37  
38  /**
39   * The {@link HFile} has a fixed trailer which contains offsets to other
40   * variable parts of the file. Also includes basic metadata on this file. The
41   * trailer size is fixed within a given {@link HFile} format version only, but
42   * we always store the version number as the last four-byte integer of the file.
43   * The version number itself is split into two portions, a major 
44   * version and a minor version. 
45   * The last three bytes of a file is the major
46   * version and a single preceding byte is the minor number. The major version
47   * determines which readers/writers to use to read/write a hfile while a minor
48   * version determines smaller changes in hfile format that do not need a new
49   * reader/writer type.
50   */
51  @InterfaceAudience.Private
52  public class FixedFileTrailer {
53  
54    /**
55     * We store the comparator class name as a fixed-length field in the trailer.
56     */
57    private static final int MAX_COMPARATOR_NAME_LENGTH = 128;
58  
59    /**
60     * Offset to the fileinfo data, a small block of vitals. Necessary in v1 but
61     * only potentially useful for pretty-printing in v2.
62     */
63    private long fileInfoOffset;
64  
65    /**
66     * In version 1, the offset to the data block index. Starting from version 2,
67     * the meaning of this field is the offset to the section of the file that
68     * should be loaded at the time the file is being opened, and as of the time
69     * of writing, this happens to be the offset of the file info section.
70     */
71    private long loadOnOpenDataOffset;
72  
73    /** The number of entries in the root data index. */
74    private int dataIndexCount;
75  
76    /** Total uncompressed size of all blocks of the data index */
77    private long uncompressedDataIndexSize;
78  
79    /** The number of entries in the meta index */
80    private int metaIndexCount;
81  
82    /** The total uncompressed size of keys/values stored in the file. */
83    private long totalUncompressedBytes;
84  
85    /**
86     * The number of key/value pairs in the file. This field was int in version 1,
87     * but is now long.
88     */
89    private long entryCount;
90  
91    /** The compression codec used for all blocks. */
92    private Compression.Algorithm compressionCodec = Compression.Algorithm.NONE;
93  
94    /**
95     * The number of levels in the potentially multi-level data index. Used from
96     * version 2 onwards.
97     */
98    private int numDataIndexLevels;
99  
100   /** The offset of the first data block. */
101   private long firstDataBlockOffset;
102 
103   /**
104    * It is guaranteed that no key/value data blocks start after this offset in
105    * the file.
106    */
107   private long lastDataBlockOffset;
108 
109   /** Raw key comparator class name in version 3 */
110   private String comparatorClassName = KeyValue.COMPARATOR.getLegacyKeyComparatorName();
111 
112   /** The encryption key */
113   private byte[] encryptionKey;
114 
115   /** The {@link HFile} format major version. */
116   private final int majorVersion;
117 
118   /** The {@link HFile} format minor version. */
119   private final int minorVersion;
120 
121   FixedFileTrailer(int majorVersion, int minorVersion) {
122     this.majorVersion = majorVersion;
123     this.minorVersion = minorVersion;
124     HFile.checkFormatVersion(majorVersion);
125   }
126 
127   private static int[] computeTrailerSizeByVersion() {
128     int versionToSize[] = new int[HFile.MAX_FORMAT_VERSION + 1];
129     // We support only 2 major versions now. ie. V2, V3
130     versionToSize[2] = 212;
131     for (int version = 3; version <= HFile.MAX_FORMAT_VERSION; version++) {
132       // Max FFT size for V3 and above is taken as 4KB for future enhancements
133       // if any.
134       // Unless the trailer size exceeds 4K this can continue
135       versionToSize[version] = 1024 * 4;
136     }
137     return versionToSize;
138   }
139 
140   private static int getMaxTrailerSize() {
141     int maxSize = 0;
142     for (int version = HFile.MIN_FORMAT_VERSION;
143          version <= HFile.MAX_FORMAT_VERSION;
144          ++version)
145       maxSize = Math.max(getTrailerSize(version), maxSize);
146     return maxSize;
147   }
148 
149   private static final int TRAILER_SIZE[] = computeTrailerSizeByVersion();
150   private static final int MAX_TRAILER_SIZE = getMaxTrailerSize();
151 
152   private static final int NOT_PB_SIZE = BlockType.MAGIC_LENGTH + Bytes.SIZEOF_INT;
153 
154   static int getTrailerSize(int version) {
155     return TRAILER_SIZE[version];
156   }
157 
158   public int getTrailerSize() {
159     return getTrailerSize(majorVersion);
160   }
161 
162   /**
163    * Write the trailer to a data stream. We support writing version 1 for
164    * testing and for determining version 1 trailer size. It is also easy to see
165    * what fields changed in version 2.
166    *
167    * @param outputStream
168    * @throws IOException
169    */
170   void serialize(DataOutputStream outputStream) throws IOException {
171     HFile.checkFormatVersion(majorVersion);
172 
173     ByteArrayOutputStream baos = new ByteArrayOutputStream();
174     DataOutputStream baosDos = new DataOutputStream(baos);
175 
176     BlockType.TRAILER.write(baosDos);
177     serializeAsPB(baosDos);
178 
179     // The last 4 bytes of the file encode the major and minor version universally
180     baosDos.writeInt(materializeVersion(majorVersion, minorVersion));
181 
182     baos.writeTo(outputStream);
183   }
184 
185   /**
186    * Write trailer data as protobuf
187    * @param outputStream
188    * @throws IOException
189    */
190   void serializeAsPB(DataOutputStream output) throws IOException {
191     ByteArrayOutputStream baos = new ByteArrayOutputStream();
192     HFileProtos.FileTrailerProto.Builder builder = HFileProtos.FileTrailerProto.newBuilder()
193       .setFileInfoOffset(fileInfoOffset)
194       .setLoadOnOpenDataOffset(loadOnOpenDataOffset)
195       .setUncompressedDataIndexSize(uncompressedDataIndexSize)
196       .setTotalUncompressedBytes(totalUncompressedBytes)
197       .setDataIndexCount(dataIndexCount)
198       .setMetaIndexCount(metaIndexCount)
199       .setEntryCount(entryCount)
200       .setNumDataIndexLevels(numDataIndexLevels)
201       .setFirstDataBlockOffset(firstDataBlockOffset)
202       .setLastDataBlockOffset(lastDataBlockOffset)
203       // TODO this is a classname encoded into an  HFile's trailer. We are going to need to have 
204       // some compat code here.
205       .setComparatorClassName(comparatorClassName)
206       .setCompressionCodec(compressionCodec.ordinal());
207     if (encryptionKey != null) {
208       builder.setEncryptionKey(ByteStringer.wrap(encryptionKey));
209     }
210     // We need this extra copy unfortunately to determine the final size of the
211     // delimited output, see use of baos.size() below.
212     builder.build().writeDelimitedTo(baos);
213     baos.writeTo(output);
214     // Pad to make up the difference between variable PB encoding length and the
215     // length when encoded as writable under earlier V2 formats. Failure to pad
216     // properly or if the PB encoding is too big would mean the trailer wont be read
217     // in properly by HFile.
218     int padding = getTrailerSize() - NOT_PB_SIZE - baos.size();
219     if (padding < 0) {
220       throw new IOException("Pbuf encoding size exceeded fixed trailer size limit");
221     }
222     for (int i = 0; i < padding; i++) {
223       output.write(0);
224     }
225   }
226 
227   /**
228    * Deserialize the fixed file trailer from the given stream. The version needs
229    * to already be specified. Make sure this is consistent with
230    * {@link #serialize(DataOutputStream)}.
231    *
232    * @param inputStream
233    * @throws IOException
234    */
235   void deserialize(DataInputStream inputStream) throws IOException {
236     HFile.checkFormatVersion(majorVersion);
237 
238     BlockType.TRAILER.readAndCheck(inputStream);
239 
240     if (majorVersion > 2
241         || (majorVersion == 2 && minorVersion >= HFileReaderV2.PBUF_TRAILER_MINOR_VERSION)) {
242       deserializeFromPB(inputStream);
243     } else {
244       deserializeFromWritable(inputStream);
245     }
246 
247     // The last 4 bytes of the file encode the major and minor version universally
248     int version = inputStream.readInt();
249     expectMajorVersion(extractMajorVersion(version));
250     expectMinorVersion(extractMinorVersion(version));
251   }
252 
253   /**
254    * Deserialize the file trailer as protobuf
255    * @param inputStream
256    * @throws IOException
257    */
258   void deserializeFromPB(DataInputStream inputStream) throws IOException {
259     // read PB and skip padding
260     int start = inputStream.available();
261     HFileProtos.FileTrailerProto trailerProto =
262         HFileProtos.FileTrailerProto.PARSER.parseDelimitedFrom(inputStream);
263     int size = start - inputStream.available();
264     inputStream.skip(getTrailerSize() - NOT_PB_SIZE - size);
265 
266     // process the PB
267     if (trailerProto.hasFileInfoOffset()) {
268       fileInfoOffset = trailerProto.getFileInfoOffset();
269     }
270     if (trailerProto.hasLoadOnOpenDataOffset()) {
271       loadOnOpenDataOffset = trailerProto.getLoadOnOpenDataOffset();
272     }
273     if (trailerProto.hasUncompressedDataIndexSize()) {
274       uncompressedDataIndexSize = trailerProto.getUncompressedDataIndexSize();
275     }
276     if (trailerProto.hasTotalUncompressedBytes()) {
277       totalUncompressedBytes = trailerProto.getTotalUncompressedBytes();
278     }
279     if (trailerProto.hasDataIndexCount()) {
280       dataIndexCount = trailerProto.getDataIndexCount();
281     }
282     if (trailerProto.hasMetaIndexCount()) {
283       metaIndexCount = trailerProto.getMetaIndexCount();
284     }
285     if (trailerProto.hasEntryCount()) {
286       entryCount = trailerProto.getEntryCount();
287     }
288     if (trailerProto.hasNumDataIndexLevels()) {
289       numDataIndexLevels = trailerProto.getNumDataIndexLevels();
290     }
291     if (trailerProto.hasFirstDataBlockOffset()) {
292       firstDataBlockOffset = trailerProto.getFirstDataBlockOffset();
293     }
294     if (trailerProto.hasLastDataBlockOffset()) {
295       lastDataBlockOffset = trailerProto.getLastDataBlockOffset();
296     }
297     if (trailerProto.hasComparatorClassName()) {
298       // TODO this is a classname encoded into an  HFile's trailer. We are going to need to have 
299       // some compat code here.
300       setComparatorClass(getComparatorClass(trailerProto.getComparatorClassName()));
301     }
302     if (trailerProto.hasCompressionCodec()) {
303       compressionCodec = Compression.Algorithm.values()[trailerProto.getCompressionCodec()];
304     } else {
305       compressionCodec = Compression.Algorithm.NONE;
306     }
307     if (trailerProto.hasEncryptionKey()) {
308       encryptionKey = trailerProto.getEncryptionKey().toByteArray();
309     }
310   }
311 
312   /**
313    * Deserialize the file trailer as writable data
314    * @param input
315    * @throws IOException
316    */
317   void deserializeFromWritable(DataInput input) throws IOException {
318     fileInfoOffset = input.readLong();
319     loadOnOpenDataOffset = input.readLong();
320     dataIndexCount = input.readInt();
321     uncompressedDataIndexSize = input.readLong();
322     metaIndexCount = input.readInt();
323 
324     totalUncompressedBytes = input.readLong();
325     entryCount = input.readLong();
326     compressionCodec = Compression.Algorithm.values()[input.readInt()];
327     numDataIndexLevels = input.readInt();
328     firstDataBlockOffset = input.readLong();
329     lastDataBlockOffset = input.readLong();
330     // TODO this is a classname encoded into an  HFile's trailer. We are going to need to have 
331     // some compat code here.
332     setComparatorClass(getComparatorClass(Bytes.readStringFixedSize(input,
333         MAX_COMPARATOR_NAME_LENGTH)));
334   }
335   
336   private void append(StringBuilder sb, String s) {
337     if (sb.length() > 0)
338       sb.append(", ");
339     sb.append(s);
340   }
341 
342   @Override
343   public String toString() {
344     StringBuilder sb = new StringBuilder();
345     append(sb, "fileinfoOffset=" + fileInfoOffset);
346     append(sb, "loadOnOpenDataOffset=" + loadOnOpenDataOffset);
347     append(sb, "dataIndexCount=" + dataIndexCount);
348     append(sb, "metaIndexCount=" + metaIndexCount);
349     append(sb, "totalUncomressedBytes=" + totalUncompressedBytes);
350     append(sb, "entryCount=" + entryCount);
351     append(sb, "compressionCodec=" + compressionCodec);
352     append(sb, "uncompressedDataIndexSize=" + uncompressedDataIndexSize);
353     append(sb, "numDataIndexLevels=" + numDataIndexLevels);
354     append(sb, "firstDataBlockOffset=" + firstDataBlockOffset);
355     append(sb, "lastDataBlockOffset=" + lastDataBlockOffset);
356     append(sb, "comparatorClassName=" + comparatorClassName);
357     if (majorVersion >= 3) {
358       append(sb, "encryptionKey=" + (encryptionKey != null ? "PRESENT" : "NONE"));
359     }
360     append(sb, "majorVersion=" + majorVersion);
361     append(sb, "minorVersion=" + minorVersion);
362 
363     return sb.toString();
364   }
365 
366   /**
367    * Reads a file trailer from the given file.
368    *
369    * @param istream the input stream with the ability to seek. Does not have to
370    *          be buffered, as only one read operation is made.
371    * @param fileSize the file size. Can be obtained using
372    *          {@link org.apache.hadoop.fs.FileSystem#getFileStatus(
373    *          org.apache.hadoop.fs.Path)}.
374    * @return the fixed file trailer read
375    * @throws IOException if failed to read from the underlying stream, or the
376    *           trailer is corrupted, or the version of the trailer is
377    *           unsupported
378    */
379   public static FixedFileTrailer readFromStream(FSDataInputStream istream,
380       long fileSize) throws IOException {
381     int bufferSize = MAX_TRAILER_SIZE;
382     long seekPoint = fileSize - bufferSize;
383     if (seekPoint < 0) {
384       // It is hard to imagine such a small HFile.
385       seekPoint = 0;
386       bufferSize = (int) fileSize;
387     }
388 
389     istream.seek(seekPoint);
390     ByteBuffer buf = ByteBuffer.allocate(bufferSize);
391     istream.readFully(buf.array(), buf.arrayOffset(),
392         buf.arrayOffset() + buf.limit());
393 
394     // Read the version from the last int of the file.
395     buf.position(buf.limit() - Bytes.SIZEOF_INT);
396     int version = buf.getInt();
397 
398     // Extract the major and minor versions.
399     int majorVersion = extractMajorVersion(version);
400     int minorVersion = extractMinorVersion(version);
401 
402     HFile.checkFormatVersion(majorVersion); // throws IAE if invalid
403 
404     int trailerSize = getTrailerSize(majorVersion);
405 
406     FixedFileTrailer fft = new FixedFileTrailer(majorVersion, minorVersion);
407     fft.deserialize(new DataInputStream(new ByteArrayInputStream(buf.array(),
408         buf.arrayOffset() + bufferSize - trailerSize, trailerSize)));
409     return fft;
410   }
411 
412   public void expectMajorVersion(int expected) {
413     if (majorVersion != expected) {
414       throw new IllegalArgumentException("Invalid HFile major version: "
415           + majorVersion 
416           + " (expected: " + expected + ")");
417     }
418   }
419 
420   public void expectMinorVersion(int expected) {
421     if (minorVersion != expected) {
422       throw new IllegalArgumentException("Invalid HFile minor version: "
423           + minorVersion + " (expected: " + expected + ")");
424     }
425   }
426 
427   public void expectAtLeastMajorVersion(int lowerBound) {
428     if (majorVersion < lowerBound) {
429       throw new IllegalArgumentException("Invalid HFile major version: "
430           + majorVersion
431           + " (expected: " + lowerBound + " or higher).");
432     }
433   }
434 
435   public long getFileInfoOffset() {
436     return fileInfoOffset;
437   }
438 
439   public void setFileInfoOffset(long fileInfoOffset) {
440     this.fileInfoOffset = fileInfoOffset;
441   }
442 
443   public long getLoadOnOpenDataOffset() {
444     return loadOnOpenDataOffset;
445   }
446 
447   public void setLoadOnOpenOffset(long loadOnOpenDataOffset) {
448     this.loadOnOpenDataOffset = loadOnOpenDataOffset;
449   }
450 
451   public int getDataIndexCount() {
452     return dataIndexCount;
453   }
454 
455   public void setDataIndexCount(int dataIndexCount) {
456     this.dataIndexCount = dataIndexCount;
457   }
458 
459   public int getMetaIndexCount() {
460     return metaIndexCount;
461   }
462 
463   public void setMetaIndexCount(int metaIndexCount) {
464     this.metaIndexCount = metaIndexCount;
465   }
466 
467   public long getTotalUncompressedBytes() {
468     return totalUncompressedBytes;
469   }
470 
471   public void setTotalUncompressedBytes(long totalUncompressedBytes) {
472     this.totalUncompressedBytes = totalUncompressedBytes;
473   }
474 
475   public long getEntryCount() {
476     return entryCount;
477   }
478 
479   public void setEntryCount(long newEntryCount) {
480     entryCount = newEntryCount;
481   }
482 
483   public Compression.Algorithm getCompressionCodec() {
484     return compressionCodec;
485   }
486 
487   public void setCompressionCodec(Compression.Algorithm compressionCodec) {
488     this.compressionCodec = compressionCodec;
489   }
490 
491   public int getNumDataIndexLevels() {
492     expectAtLeastMajorVersion(2);
493     return numDataIndexLevels;
494   }
495 
496   public void setNumDataIndexLevels(int numDataIndexLevels) {
497     expectAtLeastMajorVersion(2);
498     this.numDataIndexLevels = numDataIndexLevels;
499   }
500 
501   public long getLastDataBlockOffset() {
502     expectAtLeastMajorVersion(2);
503     return lastDataBlockOffset;
504   }
505 
506   public void setLastDataBlockOffset(long lastDataBlockOffset) {
507     expectAtLeastMajorVersion(2);
508     this.lastDataBlockOffset = lastDataBlockOffset;
509   }
510 
511   public long getFirstDataBlockOffset() {
512     expectAtLeastMajorVersion(2);
513     return firstDataBlockOffset;
514   }
515 
516   public void setFirstDataBlockOffset(long firstDataBlockOffset) {
517     expectAtLeastMajorVersion(2);
518     this.firstDataBlockOffset = firstDataBlockOffset;
519   }
520 
521   public String getComparatorClassName() {
522     return comparatorClassName;
523   }
524 
525   /**
526    * Returns the major version of this HFile format
527    */
528   public int getMajorVersion() {
529     return majorVersion;
530   }
531 
532   /**
533    * Returns the minor version of this HFile format
534    */
535   public int getMinorVersion() {
536     return minorVersion;
537   }
538 
539   public void setComparatorClass(Class<? extends KVComparator> klass) {
540     // Is the comparator instantiable?
541     try {
542       KVComparator comp = klass.newInstance();
543 
544       // HFile V2 legacy comparator class names.
545       if (KeyValue.COMPARATOR.getClass().equals(klass)) {
546         comparatorClassName = KeyValue.COMPARATOR.getLegacyKeyComparatorName();
547       } else if (KeyValue.META_COMPARATOR.getClass().equals(klass)) {
548         comparatorClassName = KeyValue.META_COMPARATOR.getLegacyKeyComparatorName();
549       } else if (KeyValue.RAW_COMPARATOR.getClass().equals(klass)) {
550         comparatorClassName = KeyValue.RAW_COMPARATOR.getLegacyKeyComparatorName();
551       } else {
552         // if the name wasn't one of the legacy names, maybe its a legit new kind of comparator.
553         comparatorClassName = klass.getName();
554       }
555 
556     } catch (Exception e) {
557       throw new RuntimeException("Comparator class " + klass.getName() +
558         " is not instantiable", e);
559     }
560 
561   }
562 
563   @SuppressWarnings("unchecked")
564   private static Class<? extends KVComparator> getComparatorClass(
565       String comparatorClassName) throws IOException {
566     try {
567       // HFile V2 legacy comparator class names.
568       if (comparatorClassName.equals(KeyValue.COMPARATOR.getLegacyKeyComparatorName())) {
569         comparatorClassName = KeyValue.COMPARATOR.getClass().getName();
570       } else if (comparatorClassName.equals(KeyValue.META_COMPARATOR.getLegacyKeyComparatorName())) {
571         comparatorClassName = KeyValue.META_COMPARATOR.getClass().getName();
572       } else if (comparatorClassName.equals(KeyValue.RAW_COMPARATOR.getLegacyKeyComparatorName())) {
573         comparatorClassName = KeyValue.RAW_COMPARATOR.getClass().getName();
574       }
575 
576       // if the name wasn't one of the legacy names, maybe its a legit new kind of comparator.
577 
578       return (Class<? extends KVComparator>)
579           Class.forName(comparatorClassName);
580     } catch (ClassNotFoundException ex) {
581       throw new IOException(ex);
582     }
583   }
584 
585   public static KVComparator createComparator(
586       String comparatorClassName) throws IOException {
587     try {
588       return getComparatorClass(comparatorClassName).newInstance();
589     } catch (InstantiationException e) {
590       throw new IOException("Comparator class " + comparatorClassName +
591         " is not instantiable", e);
592     } catch (IllegalAccessException e) {
593       throw new IOException("Comparator class " + comparatorClassName +
594         " is not instantiable", e);
595     }
596   }
597 
598   KVComparator createComparator() throws IOException {
599     expectAtLeastMajorVersion(2);
600     return createComparator(comparatorClassName);
601   }
602 
603   public long getUncompressedDataIndexSize() {
604     return uncompressedDataIndexSize;
605   }
606 
607   public void setUncompressedDataIndexSize(
608       long uncompressedDataIndexSize) {
609     expectAtLeastMajorVersion(2);
610     this.uncompressedDataIndexSize = uncompressedDataIndexSize;
611   }
612 
613   public byte[] getEncryptionKey() {
614     expectAtLeastMajorVersion(3);
615     return encryptionKey;
616   }
617 
618   public void setEncryptionKey(byte[] keyBytes) {
619     this.encryptionKey = keyBytes;
620   }
621 
622   /**
623    * Extracts the major version for a 4-byte serialized version data.
624    * The major version is the 3 least significant bytes
625    */
626   private static int extractMajorVersion(int serializedVersion) {
627     return (serializedVersion & 0x00ffffff);
628   }
629 
630   /**
631    * Extracts the minor version for a 4-byte serialized version data.
632    * The major version are the 3 the most significant bytes
633    */
634   private static int extractMinorVersion(int serializedVersion) {
635     return (serializedVersion >>> 24);
636   }
637 
638   /**
639    * Create a 4 byte serialized version number by combining the
640    * minor and major version numbers.
641    */
642   static int materializeVersion(int majorVersion, int minorVersion) {
643     return ((majorVersion & 0x00ffffff) | (minorVersion << 24));
644   }
645 }