View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.io.hfile;
18  
19  import java.io.ByteArrayOutputStream;
20  import java.io.DataOutputStream;
21  import java.io.IOException;
22  import java.nio.ByteBuffer;
23  
24  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
25  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
26  import org.apache.hadoop.hbase.io.hfile.HFileBlock;
27  import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
28  import org.apache.hadoop.hbase.util.Bytes;
29  import org.apache.hadoop.hbase.util.Pair;
30  
31  import com.google.common.base.Preconditions;
32  
33  /**
34   * Do different kinds of data block encoding according to column family
35   * options.
36   */
37  public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
38    private final DataBlockEncoding onDisk;
39    private final DataBlockEncoding inCache;
40  
41    public HFileDataBlockEncoderImpl(DataBlockEncoding encoding) {
42      this(encoding, encoding);
43    }
44  
45    /**
46     * Do data block encoding with specified options.
47     * @param onDisk What kind of data block encoding will be used before writing
48     *          HFileBlock to disk. This must be either the same as inCache or
49     *          {@link DataBlockEncoding#NONE}.
50     * @param inCache What kind of data block encoding will be used in block
51     *          cache.
52     */
53    public HFileDataBlockEncoderImpl(DataBlockEncoding onDisk,
54        DataBlockEncoding inCache) {
55      this.onDisk = onDisk != null ?
56          onDisk : DataBlockEncoding.NONE;
57      this.inCache = inCache != null ?
58          inCache : DataBlockEncoding.NONE;
59      Preconditions.checkArgument(onDisk == DataBlockEncoding.NONE ||
60          onDisk == inCache, "on-disk encoding (" + onDisk + ") must be " +
61          "either the same as in-cache encoding (" + inCache + ") or " +
62          DataBlockEncoding.NONE);
63    }
64  
65    public static HFileDataBlockEncoder createFromFileInfo(
66        FileInfo fileInfo, DataBlockEncoding preferredEncodingInCache)
67        throws IOException {
68      
69      boolean hasPreferredCacheEncoding = preferredEncodingInCache != null
70          && preferredEncodingInCache != DataBlockEncoding.NONE;
71  
72      byte[] dataBlockEncodingType = fileInfo.get(DATA_BLOCK_ENCODING);
73      if (dataBlockEncodingType == null && !hasPreferredCacheEncoding) {
74        return NoOpDataBlockEncoder.INSTANCE;
75      }
76  
77      DataBlockEncoding onDisk;
78      if (dataBlockEncodingType == null) {
79        onDisk = DataBlockEncoding.NONE;
80      }else {
81        String dataBlockEncodingStr = Bytes.toString(dataBlockEncodingType);
82        try {
83          onDisk = DataBlockEncoding.valueOf(dataBlockEncodingStr);
84        } catch (IllegalArgumentException ex) {
85          throw new IOException("Invalid data block encoding type in file info: "
86              + dataBlockEncodingStr, ex);
87        }
88      }
89  
90      DataBlockEncoding inCache;
91      if (onDisk == DataBlockEncoding.NONE) {
92        // This is an "in-cache-only" encoding or fully-unencoded scenario.
93        // Either way, we use the given encoding (possibly NONE) specified by
94        // the column family in cache.
95        inCache = preferredEncodingInCache;
96      } else {
97        // Leave blocks in cache encoded the same way as they are on disk.
98        // If we switch encoding type for the CF or the in-cache-only encoding
99        // flag, old files will keep their encoding both on disk and in cache,
100       // but new files will be generated with the new encoding.
101       inCache = onDisk;
102     }
103     return new HFileDataBlockEncoderImpl(onDisk, inCache);
104   }
105 
106   @Override
107   public void saveMetadata(HFile.Writer writer) throws IOException {
108     writer.appendFileInfo(DATA_BLOCK_ENCODING, onDisk.getNameInBytes());
109   }
110 
111   @Override
112   public DataBlockEncoding getEncodingOnDisk() {
113     return onDisk;
114   }
115 
116   @Override
117   public DataBlockEncoding getEncodingInCache() {
118     return inCache;
119   }
120 
121   @Override
122   public DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction) {
123     if (!useEncodedScanner(isCompaction)) {
124       return DataBlockEncoding.NONE;
125     }
126     return inCache;
127   }
128 
129   @Override
130   public HFileBlock diskToCacheFormat(HFileBlock block, boolean isCompaction) {
131     if (block.getBlockType() == BlockType.DATA) {
132       if (!useEncodedScanner(isCompaction)) {
133         // Unencoded block, and we don't want to encode in cache.
134         return block;
135       }
136       // Encode the unencoded block with the in-cache encoding.
137       return encodeDataBlock(block, inCache, block.doesIncludeMemstoreTS());
138     }
139 
140     if (block.getBlockType() == BlockType.ENCODED_DATA) {
141       if (block.getDataBlockEncodingId() == onDisk.getId()) {
142         // The block is already in the desired in-cache encoding.
143         return block;
144       }
145       // We don't want to re-encode a block in a different encoding. The HFile
146       // reader should have been instantiated in such a way that we would not
147       // have to do this.
148       throw new AssertionError("Expected on-disk data block encoding " +
149           onDisk + ", got " + block.getDataBlockEncoding());
150     }
151     return block;
152   }
153 
154   /**
155    * Precondition: a non-encoded buffer.
156    * Postcondition: on-disk encoding.
157    */
158   @Override
159   public Pair<ByteBuffer, BlockType> beforeWriteToDisk(ByteBuffer in,
160       boolean includesMemstoreTS, byte[] dummyHeader) {
161     if (onDisk == DataBlockEncoding.NONE) {
162       // there is no need to encode the block before writing it to disk
163       return new Pair<ByteBuffer, BlockType>(in, BlockType.DATA);
164     }
165 
166     ByteBuffer encodedBuffer = encodeBufferToHFileBlockBuffer(in,
167         onDisk, includesMemstoreTS, dummyHeader);
168     return new Pair<ByteBuffer, BlockType>(encodedBuffer,
169         BlockType.ENCODED_DATA);
170   }
171 
172   @Override
173   public boolean useEncodedScanner(boolean isCompaction) {
174     if (isCompaction && onDisk == DataBlockEncoding.NONE) {
175       return false;
176     }
177     return inCache != DataBlockEncoding.NONE;
178   }
179 
180   private ByteBuffer encodeBufferToHFileBlockBuffer(ByteBuffer in,
181       DataBlockEncoding algo, boolean includesMemstoreTS,
182       byte[] dummyHeader) {
183     ByteArrayOutputStream encodedStream = new ByteArrayOutputStream();
184     DataOutputStream dataOut = new DataOutputStream(encodedStream);
185     DataBlockEncoder encoder = algo.getEncoder();
186     try {
187       encodedStream.write(dummyHeader);
188       algo.writeIdInBytes(dataOut);
189       encoder.compressKeyValues(dataOut, in,
190           includesMemstoreTS);
191     } catch (IOException e) {
192       throw new RuntimeException(String.format("Bug in data block encoder " +
193           "'%s', it probably requested too much data", algo.toString()), e);
194     }
195     return ByteBuffer.wrap(encodedStream.toByteArray());
196   }
197 
198   private HFileBlock encodeDataBlock(HFileBlock block,
199       DataBlockEncoding algo, boolean includesMemstoreTS) {
200     ByteBuffer compressedBuffer = encodeBufferToHFileBlockBuffer(
201         block.getBufferWithoutHeader(), algo, includesMemstoreTS,
202         block.getDummyHeaderForVersion());
203     int sizeWithoutHeader = compressedBuffer.limit() - block.headerSize();
204     HFileBlock encodedBlock = new HFileBlock(BlockType.ENCODED_DATA,
205         block.getOnDiskSizeWithoutHeader(),
206         sizeWithoutHeader, block.getPrevBlockOffset(),
207         compressedBuffer, HFileBlock.FILL_HEADER, block.getOffset(),
208         includesMemstoreTS, block.getMinorVersion(),
209         block.getBytesPerChecksum(), block.getChecksumType(),
210         block.getOnDiskDataSizeWithHeader());
211     block.passSchemaMetricsTo(encodedBlock);
212     return encodedBlock;
213   }
214 
215   @Override
216   public String toString() {
217     return getClass().getSimpleName() + "(onDisk=" + onDisk + ", inCache=" +
218         inCache + ")";
219   }
220 
221 }