001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.encoding;
019
020import java.nio.ByteBuffer;
021import org.apache.hadoop.hbase.ByteBufferKeyOnlyKeyValue;
022import org.apache.hadoop.hbase.Cell;
023import org.apache.hadoop.hbase.CellComparator;
024import org.apache.hadoop.hbase.CellUtil;
025import org.apache.hadoop.hbase.HConstants;
026import org.apache.hadoop.hbase.KeyValue;
027import org.apache.hadoop.hbase.PrivateCellUtil;
028import org.apache.hadoop.hbase.SizeCachedByteBufferKeyValue;
029import org.apache.hadoop.hbase.SizeCachedKeyValue;
030import org.apache.hadoop.hbase.SizeCachedNoTagsByteBufferKeyValue;
031import org.apache.hadoop.hbase.SizeCachedNoTagsKeyValue;
032import org.apache.hadoop.hbase.io.encoding.AbstractDataBlockEncoder.AbstractEncodedSeeker;
033import org.apache.hadoop.hbase.nio.ByteBuff;
034import org.apache.hadoop.hbase.util.ByteBufferUtils;
035import org.apache.hadoop.hbase.util.Bytes;
036import org.apache.hadoop.hbase.util.ObjectIntPair;
037import org.apache.yetus.audience.InterfaceAudience;
038
039@InterfaceAudience.Private
040public class RowIndexSeekerV1 extends AbstractEncodedSeeker {
041
042  // A temp pair object which will be reused by ByteBuff#asSubByteBuffer calls. This avoids too
043  // many object creations.
044  protected final ObjectIntPair<ByteBuffer> tmpPair = new ObjectIntPair<>();
045
046  private ByteBuff currentBuffer;
047  private SeekerState current = new SeekerState(); // always valid
048  private SeekerState previous = new SeekerState(); // may not be valid
049
050  private int rowNumber;
051  private ByteBuff rowOffsets = null;
052  private final CellComparator cellComparator;
053
054  public RowIndexSeekerV1(HFileBlockDecodingContext decodingCtx) {
055    super(decodingCtx);
056    this.cellComparator = decodingCtx.getHFileContext().getCellComparator();
057  }
058
059  @Override
060  public void setCurrentBuffer(ByteBuff buffer) {
061    int onDiskSize = buffer.getInt(buffer.limit() - Bytes.SIZEOF_INT);
062
063    // Data part
064    ByteBuff dup = buffer.duplicate();
065    dup.position(buffer.position());
066    dup.limit(buffer.position() + onDiskSize);
067    currentBuffer = dup.slice();
068    current.currentBuffer = currentBuffer;
069    buffer.skip(onDiskSize);
070
071    // Row offset
072    rowNumber = buffer.getInt();
073    int totalRowOffsetsLength = Bytes.SIZEOF_INT * rowNumber;
074    ByteBuff rowDup = buffer.duplicate();
075    rowDup.position(buffer.position());
076    rowDup.limit(buffer.position() + totalRowOffsetsLength);
077    rowOffsets = rowDup.slice();
078
079    decodeFirst();
080  }
081
082  @Override
083  public Cell getKey() {
084    if (current.keyBuffer.hasArray()) {
085      return new KeyValue.KeyOnlyKeyValue(current.keyBuffer.array(),
086        current.keyBuffer.arrayOffset() + current.keyBuffer.position(), current.keyLength);
087    } else {
088      byte[] key = new byte[current.keyLength];
089      ByteBufferUtils.copyFromBufferToArray(key, current.keyBuffer, current.keyBuffer.position(), 0,
090        current.keyLength);
091      return new KeyValue.KeyOnlyKeyValue(key, 0, current.keyLength);
092    }
093  }
094
095  @Override
096  public ByteBuffer getValueShallowCopy() {
097    currentBuffer.asSubByteBuffer(current.valueOffset, current.valueLength, tmpPair);
098    ByteBuffer dup = tmpPair.getFirst().duplicate();
099    dup.position(tmpPair.getSecond());
100    dup.limit(tmpPair.getSecond() + current.valueLength);
101    return dup.slice();
102  }
103
104  @Override
105  public Cell getCell() {
106    return current.toCell();
107  }
108
109  @Override
110  public void rewind() {
111    currentBuffer.rewind();
112    decodeFirst();
113  }
114
115  @Override
116  public boolean next() {
117    if (!currentBuffer.hasRemaining()) {
118      return false;
119    }
120    decodeNext();
121    previous.invalidate();
122    return true;
123  }
124
125  private int binarySearch(Cell seekCell, boolean seekBefore) {
126    int low = 0;
127    int high = rowNumber - 1;
128    int mid = low + ((high - low) >> 1);
129    int comp = 0;
130    while (low <= high) {
131      mid = low + ((high - low) >> 1);
132      comp = this.cellComparator.compareRows(getRow(mid), seekCell);
133      if (comp < 0) {
134        low = mid + 1;
135      } else if (comp > 0) {
136        high = mid - 1;
137      } else {
138        // key found
139        if (seekBefore) {
140          return mid - 1;
141        } else {
142          return mid;
143        }
144      }
145    }
146    // key not found.
147    if (comp > 0) {
148      return mid - 1;
149    } else {
150      return mid;
151    }
152  }
153
154  private ByteBuffer getRow(int index) {
155    int offset = rowOffsets.getIntAfterPosition(index * Bytes.SIZEOF_INT);
156    ByteBuff block = currentBuffer.duplicate();
157    block.position(offset + Bytes.SIZEOF_LONG);
158    short rowLen = block.getShort();
159    block.asSubByteBuffer(block.position(), rowLen, tmpPair);
160    ByteBuffer row = tmpPair.getFirst();
161    row.position(tmpPair.getSecond()).limit(tmpPair.getSecond() + rowLen);
162    return row;
163  }
164
165  @Override
166  public int seekToKeyInBlock(Cell seekCell, boolean seekBefore) {
167    previous.invalidate();
168    int index = binarySearch(seekCell, seekBefore);
169    if (index < 0) {
170      return HConstants.INDEX_KEY_MAGIC; // using optimized index key
171    } else {
172      int offset = rowOffsets.getIntAfterPosition(index * Bytes.SIZEOF_INT);
173      if (offset != 0) {
174        decodeAtPosition(offset);
175      }
176    }
177    do {
178      int comp =
179        PrivateCellUtil.compareKeyIgnoresMvcc(this.cellComparator, seekCell, current.currentKey);
180      if (comp == 0) { // exact match
181        if (seekBefore) {
182          if (!previous.isValid()) {
183            // The caller (seekBefore) has to ensure that we are not at the
184            // first key in the block.
185            throw new IllegalStateException(
186              "Cannot seekBefore if " + "positioned at the first key in the block: key="
187                + Bytes.toStringBinary(seekCell.getRowArray()));
188          }
189          moveToPrevious();
190          return 1;
191        }
192        return 0;
193      }
194
195      if (comp < 0) { // already too large, check previous
196        if (previous.isValid()) {
197          moveToPrevious();
198        } else {
199          return HConstants.INDEX_KEY_MAGIC; // using optimized index key
200        }
201        return 1;
202      }
203
204      // move to next, if more data is available
205      if (currentBuffer.hasRemaining()) {
206        previous.copyFromNext(current);
207        decodeNext();
208      } else {
209        break;
210      }
211    } while (true);
212
213    // we hit the end of the block, not an exact match
214    return 1;
215  }
216
217  private void moveToPrevious() {
218    if (!previous.isValid()) {
219      throw new IllegalStateException("Can move back only once and not in first key in the block.");
220    }
221
222    SeekerState tmp = previous;
223    previous = current;
224    current = tmp;
225
226    // move after last key value
227    currentBuffer.position(current.nextKvOffset);
228    previous.invalidate();
229  }
230
231  @Override
232  public int compareKey(CellComparator comparator, Cell key) {
233    return PrivateCellUtil.compareKeyIgnoresMvcc(comparator, key, current.currentKey);
234  }
235
236  protected void decodeFirst() {
237    decodeNext();
238    previous.invalidate();
239  }
240
241  protected void decodeAtPosition(int position) {
242    currentBuffer.position(position);
243    decodeNext();
244    previous.invalidate();
245  }
246
247  protected void decodeNext() {
248    current.startOffset = currentBuffer.position();
249    long ll = currentBuffer.getLongAfterPosition(0);
250    // Read top half as an int of key length and bottom int as value length
251    current.keyLength = (int) (ll >> Integer.SIZE);
252    current.valueLength = (int) (Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll);
253    currentBuffer.skip(Bytes.SIZEOF_LONG);
254    // key part
255    currentBuffer.asSubByteBuffer(currentBuffer.position(), current.keyLength, tmpPair);
256    ByteBuffer key = tmpPair.getFirst().duplicate();
257    key.position(tmpPair.getSecond()).limit(tmpPair.getSecond() + current.keyLength);
258    current.keyBuffer = key;
259    currentBuffer.skip(current.keyLength);
260    // value part
261    current.valueOffset = currentBuffer.position();
262    currentBuffer.skip(current.valueLength);
263    if (includesTags()) {
264      decodeTags();
265    }
266    if (includesMvcc()) {
267      current.memstoreTS = ByteBufferUtils.readVLong(currentBuffer);
268    } else {
269      current.memstoreTS = 0;
270    }
271    current.nextKvOffset = currentBuffer.position();
272    current.currentKey.setKey(current.keyBuffer, tmpPair.getSecond(), current.keyLength);
273  }
274
275  protected void decodeTags() {
276    current.tagsLength = currentBuffer.getShortAfterPosition(0);
277    currentBuffer.skip(Bytes.SIZEOF_SHORT);
278    current.tagsOffset = currentBuffer.position();
279    currentBuffer.skip(current.tagsLength);
280  }
281
282  private class SeekerState {
283    /**
284     * The size of a (key length, value length) tuple that prefixes each entry in a data block.
285     */
286    public final static int KEY_VALUE_LEN_SIZE = 2 * Bytes.SIZEOF_INT;
287
288    protected ByteBuff currentBuffer;
289    protected int startOffset = -1;
290    protected int valueOffset = -1;
291    protected int keyLength;
292    protected int valueLength;
293    protected int tagsLength = 0;
294    protected int tagsOffset = -1;
295
296    protected ByteBuffer keyBuffer = null;
297    protected long memstoreTS;
298    protected int nextKvOffset;
299    // buffer backed keyonlyKV
300    private ByteBufferKeyOnlyKeyValue currentKey = new ByteBufferKeyOnlyKeyValue();
301
302    protected boolean isValid() {
303      return valueOffset != -1;
304    }
305
306    protected void invalidate() {
307      valueOffset = -1;
308      currentKey = new ByteBufferKeyOnlyKeyValue();
309      currentBuffer = null;
310    }
311
312    /**
313     * Copy the state from the next one into this instance (the previous state placeholder). Used to
314     * save the previous state when we are advancing the seeker to the next key/value.
315     */
316    protected void copyFromNext(SeekerState nextState) {
317      keyBuffer = nextState.keyBuffer;
318      currentKey.setKey(nextState.keyBuffer,
319        nextState.currentKey.getRowPosition() - Bytes.SIZEOF_SHORT, nextState.keyLength);
320
321      startOffset = nextState.startOffset;
322      valueOffset = nextState.valueOffset;
323      keyLength = nextState.keyLength;
324      valueLength = nextState.valueLength;
325      nextKvOffset = nextState.nextKvOffset;
326      memstoreTS = nextState.memstoreTS;
327      currentBuffer = nextState.currentBuffer;
328      tagsOffset = nextState.tagsOffset;
329      tagsLength = nextState.tagsLength;
330    }
331
332    @Override
333    public String toString() {
334      return CellUtil.getCellKeyAsString(toCell());
335    }
336
337    protected int getCellBufSize() {
338      int kvBufSize = KEY_VALUE_LEN_SIZE + keyLength + valueLength;
339      if (includesTags() && tagsLength > 0) {
340        kvBufSize += Bytes.SIZEOF_SHORT + tagsLength;
341      }
342      return kvBufSize;
343    }
344
345    public Cell toCell() {
346      Cell ret;
347      int cellBufSize = getCellBufSize();
348      long seqId = 0L;
349      if (includesMvcc()) {
350        seqId = memstoreTS;
351      }
352      if (currentBuffer.hasArray()) {
353        // TODO : reduce the varieties of KV here. Check if based on a boolean
354        // we can handle the 'no tags' case.
355        if (tagsLength > 0) {
356          // TODO : getRow len here.
357          ret = new SizeCachedKeyValue(currentBuffer.array(),
358            currentBuffer.arrayOffset() + startOffset, cellBufSize, seqId, keyLength);
359        } else {
360          ret = new SizeCachedNoTagsKeyValue(currentBuffer.array(),
361            currentBuffer.arrayOffset() + startOffset, cellBufSize, seqId, keyLength);
362        }
363      } else {
364        currentBuffer.asSubByteBuffer(startOffset, cellBufSize, tmpPair);
365        ByteBuffer buf = tmpPair.getFirst();
366        if (buf.isDirect()) {
367          // TODO : getRow len here.
368          ret = tagsLength > 0
369            ? new SizeCachedByteBufferKeyValue(buf, tmpPair.getSecond(), cellBufSize, seqId,
370              keyLength)
371            : new SizeCachedNoTagsByteBufferKeyValue(buf, tmpPair.getSecond(), cellBufSize, seqId,
372              keyLength);
373        } else {
374          if (tagsLength > 0) {
375            ret = new SizeCachedKeyValue(buf.array(), buf.arrayOffset() + tmpPair.getSecond(),
376              cellBufSize, seqId, keyLength);
377          } else {
378            ret = new SizeCachedNoTagsKeyValue(buf.array(), buf.arrayOffset() + tmpPair.getSecond(),
379              cellBufSize, seqId, keyLength);
380          }
381        }
382      }
383      return ret;
384    }
385  }
386}