View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.io.hfile;
20  
21  import java.io.ByteArrayInputStream;
22  import java.io.ByteArrayOutputStream;
23  import java.io.DataInput;
24  import java.io.DataInputStream;
25  import java.io.DataOutputStream;
26  import java.io.IOException;
27  import java.nio.ByteBuffer;
28  
29  import org.apache.hadoop.hbase.util.ByteStringer;
30  import org.apache.hadoop.hbase.classification.InterfaceAudience;
31  import org.apache.hadoop.fs.FSDataInputStream;
32  import org.apache.hadoop.hbase.KeyValue;
33  import org.apache.hadoop.hbase.KeyValue.KVComparator;
34  import org.apache.hadoop.hbase.io.compress.Compression;
35  import org.apache.hadoop.hbase.protobuf.generated.HFileProtos;
36  import org.apache.hadoop.hbase.util.Bytes;
37  
38  
39  /**
40   * The {@link HFile} has a fixed trailer which contains offsets to other
41   * variable parts of the file. Also includes basic metadata on this file. The
42   * trailer size is fixed within a given {@link HFile} format version only, but
43   * we always store the version number as the last four-byte integer of the file.
44   * The version number itself is split into two portions, a major 
45   * version and a minor version. 
46   * The last three bytes of a file is the major
47   * version and a single preceding byte is the minor number. The major version
48   * determines which readers/writers to use to read/write a hfile while a minor
49   * version determines smaller changes in hfile format that do not need a new
50   * reader/writer type.
51   */
52  @InterfaceAudience.Private
53  public class FixedFileTrailer {
54  
55    /**
56     * We store the comparator class name as a fixed-length field in the trailer.
57     */
58    private static final int MAX_COMPARATOR_NAME_LENGTH = 128;
59  
60    /**
61     * Offset to the fileinfo data, a small block of vitals. Necessary in v1 but
62     * only potentially useful for pretty-printing in v2.
63     */
64    private long fileInfoOffset;
65  
66    /**
67     * In version 1, the offset to the data block index. Starting from version 2,
68     * the meaning of this field is the offset to the section of the file that
69     * should be loaded at the time the file is being opened, and as of the time
70     * of writing, this happens to be the offset of the file info section.
71     */
72    private long loadOnOpenDataOffset;
73  
74    /** The number of entries in the root data index. */
75    private int dataIndexCount;
76  
77    /** Total uncompressed size of all blocks of the data index */
78    private long uncompressedDataIndexSize;
79  
80    /** The number of entries in the meta index */
81    private int metaIndexCount;
82  
83    /** The total uncompressed size of keys/values stored in the file. */
84    private long totalUncompressedBytes;
85  
86    /**
87     * The number of key/value pairs in the file. This field was int in version 1,
88     * but is now long.
89     */
90    private long entryCount;
91  
92    /** The compression codec used for all blocks. */
93    private Compression.Algorithm compressionCodec = Compression.Algorithm.NONE;
94  
95    /**
96     * The number of levels in the potentially multi-level data index. Used from
97     * version 2 onwards.
98     */
99    private int numDataIndexLevels;
100 
101   /** The offset of the first data block. */
102   private long firstDataBlockOffset;
103 
104   /**
105    * It is guaranteed that no key/value data blocks start after this offset in
106    * the file.
107    */
108   private long lastDataBlockOffset;
109 
110   /** Raw key comparator class name in version 3 */
111   // We could write the actual class name from 2.0 onwards and handle BC
112   private String comparatorClassName = KeyValue.COMPARATOR.getClass().getName();
113 
114   /** The encryption key */
115   private byte[] encryptionKey;
116 
117   /** The {@link HFile} format major version. */
118   private final int majorVersion;
119 
120   /** The {@link HFile} format minor version. */
121   private final int minorVersion;
122 
123   FixedFileTrailer(int majorVersion, int minorVersion) {
124     this.majorVersion = majorVersion;
125     this.minorVersion = minorVersion;
126     HFile.checkFormatVersion(majorVersion);
127   }
128 
129   private static int[] computeTrailerSizeByVersion() {
130     int versionToSize[] = new int[HFile.MAX_FORMAT_VERSION + 1];
131     // We support only 2 major versions now. ie. V2, V3
132     versionToSize[2] = 212;
133     for (int version = 3; version <= HFile.MAX_FORMAT_VERSION; version++) {
134       // Max FFT size for V3 and above is taken as 4KB for future enhancements
135       // if any.
136       // Unless the trailer size exceeds 4K this can continue
137       versionToSize[version] = 1024 * 4;
138     }
139     return versionToSize;
140   }
141 
142   private static int getMaxTrailerSize() {
143     int maxSize = 0;
144     for (int version = HFile.MIN_FORMAT_VERSION;
145          version <= HFile.MAX_FORMAT_VERSION;
146          ++version)
147       maxSize = Math.max(getTrailerSize(version), maxSize);
148     return maxSize;
149   }
150 
151   private static final int TRAILER_SIZE[] = computeTrailerSizeByVersion();
152   private static final int MAX_TRAILER_SIZE = getMaxTrailerSize();
153 
154   private static final int NOT_PB_SIZE = BlockType.MAGIC_LENGTH + Bytes.SIZEOF_INT;
155 
156   static int getTrailerSize(int version) {
157     return TRAILER_SIZE[version];
158   }
159 
160   public int getTrailerSize() {
161     return getTrailerSize(majorVersion);
162   }
163 
164   /**
165    * Write the trailer to a data stream. We support writing version 1 for
166    * testing and for determining version 1 trailer size. It is also easy to see
167    * what fields changed in version 2.
168    *
169    * @param outputStream
170    * @throws IOException
171    */
172   void serialize(DataOutputStream outputStream) throws IOException {
173     HFile.checkFormatVersion(majorVersion);
174 
175     ByteArrayOutputStream baos = new ByteArrayOutputStream();
176     DataOutputStream baosDos = new DataOutputStream(baos);
177 
178     BlockType.TRAILER.write(baosDos);
179     serializeAsPB(baosDos);
180 
181     // The last 4 bytes of the file encode the major and minor version universally
182     baosDos.writeInt(materializeVersion(majorVersion, minorVersion));
183 
184     baos.writeTo(outputStream);
185   }
186 
187   /**
188    * Write trailer data as protobuf
189    * @param outputStream
190    * @throws IOException
191    */
192   void serializeAsPB(DataOutputStream output) throws IOException {
193     ByteArrayOutputStream baos = new ByteArrayOutputStream();
194     HFileProtos.FileTrailerProto.Builder builder = HFileProtos.FileTrailerProto.newBuilder()
195       .setFileInfoOffset(fileInfoOffset)
196       .setLoadOnOpenDataOffset(loadOnOpenDataOffset)
197       .setUncompressedDataIndexSize(uncompressedDataIndexSize)
198       .setTotalUncompressedBytes(totalUncompressedBytes)
199       .setDataIndexCount(dataIndexCount)
200       .setMetaIndexCount(metaIndexCount)
201       .setEntryCount(entryCount)
202       .setNumDataIndexLevels(numDataIndexLevels)
203       .setFirstDataBlockOffset(firstDataBlockOffset)
204       .setLastDataBlockOffset(lastDataBlockOffset)
205       // TODO this is a classname encoded into an  HFile's trailer. We are going to need to have
206       // some compat code here.
207       .setComparatorClassName(comparatorClassName)
208       .setCompressionCodec(compressionCodec.ordinal());
209     if (encryptionKey != null) {
210       builder.setEncryptionKey(ByteStringer.wrap(encryptionKey));
211     }
212     // We need this extra copy unfortunately to determine the final size of the
213     // delimited output, see use of baos.size() below.
214     builder.build().writeDelimitedTo(baos);
215     baos.writeTo(output);
216     // Pad to make up the difference between variable PB encoding length and the
217     // length when encoded as writable under earlier V2 formats. Failure to pad
218     // properly or if the PB encoding is too big would mean the trailer wont be read
219     // in properly by HFile.
220     int padding = getTrailerSize() - NOT_PB_SIZE - baos.size();
221     if (padding < 0) {
222       throw new IOException("Pbuf encoding size exceeded fixed trailer size limit");
223     }
224     for (int i = 0; i < padding; i++) {
225       output.write(0);
226     }
227   }
228 
229   /**
230    * Deserialize the fixed file trailer from the given stream. The version needs
231    * to already be specified. Make sure this is consistent with
232    * {@link #serialize(DataOutputStream)}.
233    *
234    * @param inputStream
235    * @throws IOException
236    */
237   void deserialize(DataInputStream inputStream) throws IOException {
238     HFile.checkFormatVersion(majorVersion);
239 
240     BlockType.TRAILER.readAndCheck(inputStream);
241 
242     if (majorVersion > 2
243         || (majorVersion == 2 && minorVersion >= HFileReaderImpl.PBUF_TRAILER_MINOR_VERSION)) {
244       deserializeFromPB(inputStream);
245     } else {
246       deserializeFromWritable(inputStream);
247     }
248 
249     // The last 4 bytes of the file encode the major and minor version universally
250     int version = inputStream.readInt();
251     expectMajorVersion(extractMajorVersion(version));
252     expectMinorVersion(extractMinorVersion(version));
253   }
254 
255   /**
256    * Deserialize the file trailer as protobuf
257    * @param inputStream
258    * @throws IOException
259    */
260   void deserializeFromPB(DataInputStream inputStream) throws IOException {
261     // read PB and skip padding
262     int start = inputStream.available();
263     HFileProtos.FileTrailerProto trailerProto =
264         HFileProtos.FileTrailerProto.PARSER.parseDelimitedFrom(inputStream);
265     int size = start - inputStream.available();
266     inputStream.skip(getTrailerSize() - NOT_PB_SIZE - size);
267 
268     // process the PB
269     if (trailerProto.hasFileInfoOffset()) {
270       fileInfoOffset = trailerProto.getFileInfoOffset();
271     }
272     if (trailerProto.hasLoadOnOpenDataOffset()) {
273       loadOnOpenDataOffset = trailerProto.getLoadOnOpenDataOffset();
274     }
275     if (trailerProto.hasUncompressedDataIndexSize()) {
276       uncompressedDataIndexSize = trailerProto.getUncompressedDataIndexSize();
277     }
278     if (trailerProto.hasTotalUncompressedBytes()) {
279       totalUncompressedBytes = trailerProto.getTotalUncompressedBytes();
280     }
281     if (trailerProto.hasDataIndexCount()) {
282       dataIndexCount = trailerProto.getDataIndexCount();
283     }
284     if (trailerProto.hasMetaIndexCount()) {
285       metaIndexCount = trailerProto.getMetaIndexCount();
286     }
287     if (trailerProto.hasEntryCount()) {
288       entryCount = trailerProto.getEntryCount();
289     }
290     if (trailerProto.hasNumDataIndexLevels()) {
291       numDataIndexLevels = trailerProto.getNumDataIndexLevels();
292     }
293     if (trailerProto.hasFirstDataBlockOffset()) {
294       firstDataBlockOffset = trailerProto.getFirstDataBlockOffset();
295     }
296     if (trailerProto.hasLastDataBlockOffset()) {
297       lastDataBlockOffset = trailerProto.getLastDataBlockOffset();
298     }
299     if (trailerProto.hasComparatorClassName()) {
300       // TODO this is a classname encoded into an  HFile's trailer. We are going to need to have 
301       // some compat code here.
302       setComparatorClass(getComparatorClass(trailerProto.getComparatorClassName()));
303     }
304     if (trailerProto.hasCompressionCodec()) {
305       compressionCodec = Compression.Algorithm.values()[trailerProto.getCompressionCodec()];
306     } else {
307       compressionCodec = Compression.Algorithm.NONE;
308     }
309     if (trailerProto.hasEncryptionKey()) {
310       encryptionKey = trailerProto.getEncryptionKey().toByteArray();
311     }
312   }
313 
314   /**
315    * Deserialize the file trailer as writable data
316    * @param input
317    * @throws IOException
318    */
319   void deserializeFromWritable(DataInput input) throws IOException {
320     fileInfoOffset = input.readLong();
321     loadOnOpenDataOffset = input.readLong();
322     dataIndexCount = input.readInt();
323     uncompressedDataIndexSize = input.readLong();
324     metaIndexCount = input.readInt();
325 
326     totalUncompressedBytes = input.readLong();
327     entryCount = input.readLong();
328     compressionCodec = Compression.Algorithm.values()[input.readInt()];
329     numDataIndexLevels = input.readInt();
330     firstDataBlockOffset = input.readLong();
331     lastDataBlockOffset = input.readLong();
332     // TODO this is a classname encoded into an  HFile's trailer. We are going to need to have 
333     // some compat code here.
334     setComparatorClass(getComparatorClass(Bytes.readStringFixedSize(input,
335         MAX_COMPARATOR_NAME_LENGTH)));
336   }
337   
338   private void append(StringBuilder sb, String s) {
339     if (sb.length() > 0)
340       sb.append(", ");
341     sb.append(s);
342   }
343 
344   @Override
345   public String toString() {
346     StringBuilder sb = new StringBuilder();
347     append(sb, "fileinfoOffset=" + fileInfoOffset);
348     append(sb, "loadOnOpenDataOffset=" + loadOnOpenDataOffset);
349     append(sb, "dataIndexCount=" + dataIndexCount);
350     append(sb, "metaIndexCount=" + metaIndexCount);
351     append(sb, "totalUncomressedBytes=" + totalUncompressedBytes);
352     append(sb, "entryCount=" + entryCount);
353     append(sb, "compressionCodec=" + compressionCodec);
354     append(sb, "uncompressedDataIndexSize=" + uncompressedDataIndexSize);
355     append(sb, "numDataIndexLevels=" + numDataIndexLevels);
356     append(sb, "firstDataBlockOffset=" + firstDataBlockOffset);
357     append(sb, "lastDataBlockOffset=" + lastDataBlockOffset);
358     append(sb, "comparatorClassName=" + comparatorClassName);
359     if (majorVersion >= 3) {
360       append(sb, "encryptionKey=" + (encryptionKey != null ? "PRESENT" : "NONE"));
361     }
362     append(sb, "majorVersion=" + majorVersion);
363     append(sb, "minorVersion=" + minorVersion);
364 
365     return sb.toString();
366   }
367 
368   /**
369    * Reads a file trailer from the given file.
370    *
371    * @param istream the input stream with the ability to seek. Does not have to
372    *          be buffered, as only one read operation is made.
373    * @param fileSize the file size. Can be obtained using
374    *          {@link org.apache.hadoop.fs.FileSystem#getFileStatus(
375    *          org.apache.hadoop.fs.Path)}.
376    * @return the fixed file trailer read
377    * @throws IOException if failed to read from the underlying stream, or the
378    *           trailer is corrupted, or the version of the trailer is
379    *           unsupported
380    */
381   public static FixedFileTrailer readFromStream(FSDataInputStream istream,
382       long fileSize) throws IOException {
383     int bufferSize = MAX_TRAILER_SIZE;
384     long seekPoint = fileSize - bufferSize;
385     if (seekPoint < 0) {
386       // It is hard to imagine such a small HFile.
387       seekPoint = 0;
388       bufferSize = (int) fileSize;
389     }
390 
391     istream.seek(seekPoint);
392     ByteBuffer buf = ByteBuffer.allocate(bufferSize);
393     istream.readFully(buf.array(), buf.arrayOffset(),
394         buf.arrayOffset() + buf.limit());
395 
396     // Read the version from the last int of the file.
397     buf.position(buf.limit() - Bytes.SIZEOF_INT);
398     int version = buf.getInt();
399 
400     // Extract the major and minor versions.
401     int majorVersion = extractMajorVersion(version);
402     int minorVersion = extractMinorVersion(version);
403 
404     HFile.checkFormatVersion(majorVersion); // throws IAE if invalid
405 
406     int trailerSize = getTrailerSize(majorVersion);
407 
408     FixedFileTrailer fft = new FixedFileTrailer(majorVersion, minorVersion);
409     fft.deserialize(new DataInputStream(new ByteArrayInputStream(buf.array(),
410         buf.arrayOffset() + bufferSize - trailerSize, trailerSize)));
411     return fft;
412   }
413 
414   public void expectMajorVersion(int expected) {
415     if (majorVersion != expected) {
416       throw new IllegalArgumentException("Invalid HFile major version: "
417           + majorVersion 
418           + " (expected: " + expected + ")");
419     }
420   }
421 
422   public void expectMinorVersion(int expected) {
423     if (minorVersion != expected) {
424       throw new IllegalArgumentException("Invalid HFile minor version: "
425           + minorVersion + " (expected: " + expected + ")");
426     }
427   }
428 
429   public void expectAtLeastMajorVersion(int lowerBound) {
430     if (majorVersion < lowerBound) {
431       throw new IllegalArgumentException("Invalid HFile major version: "
432           + majorVersion
433           + " (expected: " + lowerBound + " or higher).");
434     }
435   }
436 
437   public long getFileInfoOffset() {
438     return fileInfoOffset;
439   }
440 
441   public void setFileInfoOffset(long fileInfoOffset) {
442     this.fileInfoOffset = fileInfoOffset;
443   }
444 
445   public long getLoadOnOpenDataOffset() {
446     return loadOnOpenDataOffset;
447   }
448 
449   public void setLoadOnOpenOffset(long loadOnOpenDataOffset) {
450     this.loadOnOpenDataOffset = loadOnOpenDataOffset;
451   }
452 
453   public int getDataIndexCount() {
454     return dataIndexCount;
455   }
456 
457   public void setDataIndexCount(int dataIndexCount) {
458     this.dataIndexCount = dataIndexCount;
459   }
460 
461   public int getMetaIndexCount() {
462     return metaIndexCount;
463   }
464 
465   public void setMetaIndexCount(int metaIndexCount) {
466     this.metaIndexCount = metaIndexCount;
467   }
468 
469   public long getTotalUncompressedBytes() {
470     return totalUncompressedBytes;
471   }
472 
473   public void setTotalUncompressedBytes(long totalUncompressedBytes) {
474     this.totalUncompressedBytes = totalUncompressedBytes;
475   }
476 
477   public long getEntryCount() {
478     return entryCount;
479   }
480 
481   public void setEntryCount(long newEntryCount) {
482     entryCount = newEntryCount;
483   }
484 
485   public Compression.Algorithm getCompressionCodec() {
486     return compressionCodec;
487   }
488 
489   public void setCompressionCodec(Compression.Algorithm compressionCodec) {
490     this.compressionCodec = compressionCodec;
491   }
492 
493   public int getNumDataIndexLevels() {
494     expectAtLeastMajorVersion(2);
495     return numDataIndexLevels;
496   }
497 
498   public void setNumDataIndexLevels(int numDataIndexLevels) {
499     expectAtLeastMajorVersion(2);
500     this.numDataIndexLevels = numDataIndexLevels;
501   }
502 
503   public long getLastDataBlockOffset() {
504     expectAtLeastMajorVersion(2);
505     return lastDataBlockOffset;
506   }
507 
508   public void setLastDataBlockOffset(long lastDataBlockOffset) {
509     expectAtLeastMajorVersion(2);
510     this.lastDataBlockOffset = lastDataBlockOffset;
511   }
512 
513   public long getFirstDataBlockOffset() {
514     expectAtLeastMajorVersion(2);
515     return firstDataBlockOffset;
516   }
517 
518   public void setFirstDataBlockOffset(long firstDataBlockOffset) {
519     expectAtLeastMajorVersion(2);
520     this.firstDataBlockOffset = firstDataBlockOffset;
521   }
522 
523   public String getComparatorClassName() {
524     return comparatorClassName;
525   }
526 
527   /**
528    * Returns the major version of this HFile format
529    */
530   public int getMajorVersion() {
531     return majorVersion;
532   }
533 
534   /**
535    * Returns the minor version of this HFile format
536    */
537   public int getMinorVersion() {
538     return minorVersion;
539   }
540 
541   public void setComparatorClass(Class<? extends KVComparator> klass) {
542     // Is the comparator instantiable?
543     try {
544       // If null, it should be the Bytes.BYTES_RAWCOMPARATOR
545       if (klass != null) {
546         KVComparator comp = klass.newInstance();
547         // if the name wasn't one of the legacy names, maybe its a legit new
548         // kind of comparator.
549         comparatorClassName = klass.getName();
550       }
551 
552     } catch (Exception e) {
553       throw new RuntimeException("Comparator class " + klass.getName() + " is not instantiable", e);
554     }
555   }
556 
557   @SuppressWarnings("unchecked")
558   private static Class<? extends KVComparator> getComparatorClass(
559       String comparatorClassName) throws IOException {
560     try {
561       // HFile V2 legacy comparator class names.
562       if (comparatorClassName.equals(KeyValue.COMPARATOR.getLegacyKeyComparatorName())) {
563         comparatorClassName = KeyValue.COMPARATOR.getClass().getName();
564       } else if (comparatorClassName.equals(KeyValue.META_COMPARATOR.getLegacyKeyComparatorName())) {
565         comparatorClassName = KeyValue.META_COMPARATOR.getClass().getName();
566       } else if (comparatorClassName.equals(KeyValue.RAW_COMPARATOR.getLegacyKeyComparatorName())) {
567         return null;
568       }
569 
570       // if the name wasn't one of the legacy names, maybe its a legit new kind of comparator.
571       if (comparatorClassName.equals(KeyValue.RAW_COMPARATOR.getClass().getName())) {
572         // Return null for Bytes.BYTES_RAWCOMPARATOR
573         return null;
574       } else {
575         return (Class<? extends KVComparator>) Class.forName(comparatorClassName);
576       }
577     } catch (ClassNotFoundException ex) {
578       throw new IOException(ex);
579     }
580   }
581 
582   public static KVComparator createComparator(
583       String comparatorClassName) throws IOException {
584     try {
585       Class<? extends KVComparator> comparatorClass = getComparatorClass(comparatorClassName);
586       return comparatorClass != null ? comparatorClass.newInstance() : null;
587     } catch (InstantiationException e) {
588       throw new IOException("Comparator class " + comparatorClassName +
589         " is not instantiable", e);
590     } catch (IllegalAccessException e) {
591       throw new IOException("Comparator class " + comparatorClassName +
592         " is not instantiable", e);
593     }
594   }
595 
596   KVComparator createComparator() throws IOException {
597     expectAtLeastMajorVersion(2);
598     return createComparator(comparatorClassName);
599   }
600 
601   public long getUncompressedDataIndexSize() {
602     return uncompressedDataIndexSize;
603   }
604 
605   public void setUncompressedDataIndexSize(
606       long uncompressedDataIndexSize) {
607     expectAtLeastMajorVersion(2);
608     this.uncompressedDataIndexSize = uncompressedDataIndexSize;
609   }
610 
611   public byte[] getEncryptionKey() {
612     // This is a v3 feature but if reading a v2 file the encryptionKey will just be null which
613     // if fine for this feature.
614     expectAtLeastMajorVersion(2);
615     return encryptionKey;
616   }
617 
618   public void setEncryptionKey(byte[] keyBytes) {
619     this.encryptionKey = keyBytes;
620   }
621 
622   /**
623    * Extracts the major version for a 4-byte serialized version data.
624    * The major version is the 3 least significant bytes
625    */
626   private static int extractMajorVersion(int serializedVersion) {
627     return (serializedVersion & 0x00ffffff);
628   }
629 
630   /**
631    * Extracts the minor version for a 4-byte serialized version data.
632    * The major version are the 3 the most significant bytes
633    */
634   private static int extractMinorVersion(int serializedVersion) {
635     return (serializedVersion >>> 24);
636   }
637 
638   /**
639    * Create a 4 byte serialized version number by combining the
640    * minor and major version numbers.
641    */
642   static int materializeVersion(int majorVersion, int minorVersion) {
643     return ((majorVersion & 0x00ffffff) | (minorVersion << 24));
644   }
645 }