View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.io.encoding;
18  
19  import java.io.ByteArrayInputStream;
20  import java.io.ByteArrayOutputStream;
21  import java.io.DataInputStream;
22  import java.io.DataOutputStream;
23  import java.io.IOException;
24  import java.nio.ByteBuffer;
25  import java.util.Iterator;
26  
27  import org.apache.commons.lang.NotImplementedException;
28  import org.apache.hadoop.hbase.KeyValue;
29  import org.apache.hadoop.io.compress.Compressor;
30  
31  /**
32   * Encapsulates a data block compressed using a particular encoding algorithm.
33   * Useful for testing and benchmarking.
34   */
35  public class EncodedDataBlock {
36    private static final int BUFFER_SIZE = 4 * 1024;
37    protected DataBlockEncoder dataBlockEncoder;
38    ByteArrayOutputStream uncompressedOutputStream;
39    ByteBuffer uncompressedBuffer;
40    private byte[] cacheCompressData;
41    private ByteArrayOutputStream compressedStream = new ByteArrayOutputStream();
42    private boolean includesMemstoreTS;
43  
44    /**
45     * Create a buffer which will be encoded using dataBlockEncoder.
46     * @param dataBlockEncoder Algorithm used for compression.
47     */
48    public EncodedDataBlock(DataBlockEncoder dataBlockEncoder,
49        boolean includesMemstoreTS) {
50      this.dataBlockEncoder = dataBlockEncoder;
51      uncompressedOutputStream = new ByteArrayOutputStream(BUFFER_SIZE);
52    }
53  
54    /**
55     * Add KeyValue and compress it.
56     * @param kv Item to be added and compressed.
57     */
58    public void addKv(KeyValue kv) {
59      cacheCompressData = null;
60      uncompressedOutputStream.write(
61          kv.getBuffer(), kv.getOffset(), kv.getLength());
62    }
63  
64    /**
65     * Provides access to compressed value.
66     * @return Forwards sequential iterator.
67     */
68    public Iterator<KeyValue> getIterator() {
69      final int uncompressedSize = uncompressedOutputStream.size();
70      final ByteArrayInputStream bais = new ByteArrayInputStream(
71          getCompressedData());
72      final DataInputStream dis = new DataInputStream(bais);
73  
74  
75      return new Iterator<KeyValue>() {
76        private ByteBuffer decompressedData = null;
77  
78        @Override
79        public boolean hasNext() {
80          if (decompressedData == null) {
81            return uncompressedSize > 0;
82          }
83          return decompressedData.hasRemaining();
84        }
85  
86        @Override
87        public KeyValue next() {
88          if (decompressedData == null) {
89            try {
90              decompressedData = dataBlockEncoder.uncompressKeyValues(
91                  dis, includesMemstoreTS);
92            } catch (IOException e) {
93              throw new RuntimeException("Problem with data block encoder, " +
94                  "most likely it requested more bytes than are available.", e);
95            }
96            decompressedData.rewind();
97          }
98  
99          int offset = decompressedData.position();
100         KeyValue kv = new KeyValue(decompressedData.array(), offset);
101         decompressedData.position(offset + kv.getLength());
102 
103         return kv;
104       }
105 
106       @Override
107       public void remove() {
108         throw new NotImplementedException("remove() is not supported!");
109       }
110 
111       @Override
112       public String toString() {
113         return "Iterator of: " + dataBlockEncoder.getClass().getName();
114       }
115 
116     };
117   }
118 
119   /**
120    * Find the size of minimal buffer that could store compressed data.
121    * @return Size in bytes of compressed data.
122    */
123   public int getSize() {
124     return getCompressedData().length;
125   }
126 
127   /**
128    * Find the size of compressed data assuming that buffer will be compressed
129    * using given algorithm.
130    * @param compressor Algorithm used for compression.
131    * @param buffer Array to be compressed.
132    * @param offset Offset to beginning of the data.
133    * @param length Length to be compressed.
134    * @return Size of compressed data in bytes.
135    */
136   public static int checkCompressedSize(Compressor compressor, byte[] buffer,
137       int offset, int length) {
138     byte[] compressedBuffer = new byte[buffer.length];
139     // in fact the buffer could be of any positive size
140     compressor.setInput(buffer, offset, length);
141     compressor.finish();
142     int currentPos = 0;
143     while (!compressor.finished()) {
144       try {
145         // we don't care about compressed data,
146         // we just want to callculate number of bytes
147         currentPos += compressor.compress(compressedBuffer, 0,
148             compressedBuffer.length);
149       } catch (IOException e) {
150         throw new RuntimeException(
151             "For some reason compressor couldn't read data. " +
152             "It is likely a problem with " +
153             compressor.getClass().getName(), e);
154       }
155     }
156     return currentPos;
157   }
158 
159   /**
160    * Estimate size after second stage of compression (e.g. LZO).
161    * @param compressor Algorithm which will be used for compressions.
162    * @return Size after second stage of compression.
163    */
164   public int checkCompressedSize(Compressor compressor) {
165     // compress
166     byte[] compressedBytes = getCompressedData();
167     return checkCompressedSize(compressor, compressedBytes, 0,
168         compressedBytes.length);
169   }
170 
171   private byte[] getCompressedData() {
172     // is cached
173     if (cacheCompressData != null) {
174       return cacheCompressData;
175     }
176     cacheCompressData = doCompressData();
177 
178     return cacheCompressData;
179   }
180 
181   private ByteBuffer getUncompressedBuffer() {
182     if (uncompressedBuffer == null ||
183         uncompressedBuffer.limit() < uncompressedOutputStream.size()) {
184       uncompressedBuffer = ByteBuffer.wrap(
185           uncompressedOutputStream.toByteArray());
186     }
187     return uncompressedBuffer;
188   }
189 
190   /**
191    * Do the compression.
192    * @return Compressed byte buffer.
193    */
194   public byte[] doCompressData() {
195     compressedStream.reset();
196     DataOutputStream dataOut = new DataOutputStream(compressedStream);
197     try {
198       this.dataBlockEncoder.compressKeyValues(
199           dataOut, getUncompressedBuffer(), includesMemstoreTS);
200     } catch (IOException e) {
201       throw new RuntimeException(String.format(
202           "Bug in decoding part of algorithm %s. " +
203           "Probably it requested more bytes than are available.",
204           toString()), e);
205     }
206     return compressedStream.toByteArray();
207   }
208 
209   @Override
210   public String toString() {
211     return dataBlockEncoder.toString();
212   }
213 
214   /**
215    * Get uncompressed buffer.
216    * @return The buffer.
217    */
218   public byte[] getRawKeyValues() {
219     return uncompressedOutputStream.toByteArray();
220   }
221 }