001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.encoding; 019 020import java.nio.ByteBuffer; 021import org.apache.hadoop.hbase.ByteBufferKeyOnlyKeyValue; 022import org.apache.hadoop.hbase.Cell; 023import org.apache.hadoop.hbase.CellComparator; 024import org.apache.hadoop.hbase.CellUtil; 025import org.apache.hadoop.hbase.ExtendedCell; 026import org.apache.hadoop.hbase.HConstants; 027import org.apache.hadoop.hbase.KeyValue; 028import org.apache.hadoop.hbase.PrivateCellUtil; 029import org.apache.hadoop.hbase.SizeCachedByteBufferKeyValue; 030import org.apache.hadoop.hbase.SizeCachedKeyValue; 031import org.apache.hadoop.hbase.SizeCachedNoTagsByteBufferKeyValue; 032import org.apache.hadoop.hbase.SizeCachedNoTagsKeyValue; 033import org.apache.hadoop.hbase.io.encoding.AbstractDataBlockEncoder.AbstractEncodedSeeker; 034import org.apache.hadoop.hbase.nio.ByteBuff; 035import org.apache.hadoop.hbase.util.ByteBufferUtils; 036import org.apache.hadoop.hbase.util.Bytes; 037import org.apache.hadoop.hbase.util.ObjectIntPair; 038import org.apache.yetus.audience.InterfaceAudience; 039 040@InterfaceAudience.Private 041public class RowIndexSeekerV1 extends AbstractEncodedSeeker { 042 043 // A temp pair object which will be reused by ByteBuff#asSubByteBuffer calls. This avoids too 044 // many object creations. 045 protected final ObjectIntPair<ByteBuffer> tmpPair = new ObjectIntPair<>(); 046 047 private ByteBuff currentBuffer; 048 private SeekerState current = new SeekerState(); // always valid 049 private SeekerState previous = new SeekerState(); // may not be valid 050 051 private int rowNumber; 052 private ByteBuff rowOffsets = null; 053 private final CellComparator cellComparator; 054 055 public RowIndexSeekerV1(HFileBlockDecodingContext decodingCtx) { 056 super(decodingCtx); 057 this.cellComparator = decodingCtx.getHFileContext().getCellComparator(); 058 } 059 060 @Override 061 public void setCurrentBuffer(ByteBuff buffer) { 062 int onDiskSize = buffer.getInt(buffer.limit() - Bytes.SIZEOF_INT); 063 064 // Data part 065 ByteBuff dup = buffer.duplicate(); 066 dup.position(buffer.position()); 067 dup.limit(buffer.position() + onDiskSize); 068 currentBuffer = dup.slice(); 069 current.currentBuffer = currentBuffer; 070 buffer.skip(onDiskSize); 071 072 // Row offset 073 rowNumber = buffer.getInt(); 074 int totalRowOffsetsLength = Bytes.SIZEOF_INT * rowNumber; 075 ByteBuff rowDup = buffer.duplicate(); 076 rowDup.position(buffer.position()); 077 rowDup.limit(buffer.position() + totalRowOffsetsLength); 078 rowOffsets = rowDup.slice(); 079 080 decodeFirst(); 081 } 082 083 @Override 084 @SuppressWarnings("ByteBufferBackingArray") 085 public ExtendedCell getKey() { 086 if (current.keyBuffer.hasArray()) { 087 return new KeyValue.KeyOnlyKeyValue(current.keyBuffer.array(), 088 current.keyBuffer.arrayOffset() + current.keyOffset, current.keyLength); 089 } else { 090 final byte[] key = new byte[current.keyLength]; 091 ByteBufferUtils.copyFromBufferToArray(key, current.keyBuffer, current.keyOffset, 0, 092 current.keyLength); 093 return new KeyValue.KeyOnlyKeyValue(key, 0, current.keyLength); 094 } 095 } 096 097 @Override 098 public ByteBuffer getValueShallowCopy() { 099 currentBuffer.asSubByteBuffer(current.valueOffset, current.valueLength, tmpPair); 100 ByteBuffer dup = tmpPair.getFirst().duplicate(); 101 dup.position(tmpPair.getSecond()); 102 dup.limit(tmpPair.getSecond() + current.valueLength); 103 return dup.slice(); 104 } 105 106 @Override 107 public ExtendedCell getCell() { 108 return current.toCell(); 109 } 110 111 @Override 112 public void rewind() { 113 currentBuffer.rewind(); 114 decodeFirst(); 115 } 116 117 @Override 118 public boolean next() { 119 if (!currentBuffer.hasRemaining()) { 120 return false; 121 } 122 decodeNext(); 123 previous.invalidate(); 124 return true; 125 } 126 127 private int binarySearch(Cell seekCell, boolean seekBefore) { 128 int low = 0; 129 int high = rowNumber - 1; 130 int mid = low + ((high - low) >> 1); 131 int comp = 0; 132 while (low <= high) { 133 mid = low + ((high - low) >> 1); 134 comp = this.cellComparator.compareRows(getRow(mid), seekCell); 135 if (comp < 0) { 136 low = mid + 1; 137 } else if (comp > 0) { 138 high = mid - 1; 139 } else { 140 // key found 141 if (seekBefore) { 142 return mid - 1; 143 } else { 144 return mid; 145 } 146 } 147 } 148 // key not found. 149 if (comp > 0) { 150 return mid - 1; 151 } else { 152 return mid; 153 } 154 } 155 156 private ByteBuffer getRow(int index) { 157 int offset = rowOffsets.getIntAfterPosition(index * Bytes.SIZEOF_INT); 158 ByteBuff block = currentBuffer.duplicate(); 159 block.position(offset + Bytes.SIZEOF_LONG); 160 short rowLen = block.getShort(); 161 block.asSubByteBuffer(block.position(), rowLen, tmpPair); 162 ByteBuffer row = tmpPair.getFirst(); 163 row.position(tmpPair.getSecond()).limit(tmpPair.getSecond() + rowLen); 164 return row; 165 } 166 167 @Override 168 public int seekToKeyInBlock(ExtendedCell seekCell, boolean seekBefore) { 169 previous.invalidate(); 170 int index = binarySearch(seekCell, seekBefore); 171 if (index < 0) { 172 return HConstants.INDEX_KEY_MAGIC; // using optimized index key 173 } else { 174 int offset = rowOffsets.getIntAfterPosition(index * Bytes.SIZEOF_INT); 175 if (offset != 0) { 176 decodeAtPosition(offset); 177 } 178 } 179 do { 180 int comp = 181 PrivateCellUtil.compareKeyIgnoresMvcc(this.cellComparator, seekCell, current.currentKey); 182 if (comp == 0) { // exact match 183 if (seekBefore) { 184 if (!previous.isValid()) { 185 // The caller (seekBefore) has to ensure that we are not at the 186 // first key in the block. 187 throw new IllegalStateException( 188 "Cannot seekBefore if " + "positioned at the first key in the block: key=" 189 + Bytes.toStringBinary(seekCell.getRowArray())); 190 } 191 moveToPrevious(); 192 return 1; 193 } 194 return 0; 195 } 196 197 if (comp < 0) { // already too large, check previous 198 if (previous.isValid()) { 199 moveToPrevious(); 200 } else { 201 return HConstants.INDEX_KEY_MAGIC; // using optimized index key 202 } 203 return 1; 204 } 205 206 // move to next, if more data is available 207 if (currentBuffer.hasRemaining()) { 208 previous.copyFromNext(current); 209 decodeNext(); 210 } else { 211 break; 212 } 213 } while (true); 214 215 // we hit the end of the block, not an exact match 216 return 1; 217 } 218 219 private void moveToPrevious() { 220 if (!previous.isValid()) { 221 throw new IllegalStateException("Can move back only once and not in first key in the block."); 222 } 223 224 SeekerState tmp = previous; 225 previous = current; 226 current = tmp; 227 228 // move after last key value 229 currentBuffer.position(current.nextKvOffset); 230 previous.invalidate(); 231 } 232 233 @Override 234 public int compareKey(CellComparator comparator, ExtendedCell key) { 235 return PrivateCellUtil.compareKeyIgnoresMvcc(comparator, key, current.currentKey); 236 } 237 238 protected void decodeFirst() { 239 decodeNext(); 240 previous.invalidate(); 241 } 242 243 protected void decodeAtPosition(int position) { 244 currentBuffer.position(position); 245 decodeNext(); 246 previous.invalidate(); 247 } 248 249 protected void decodeNext() { 250 current.startOffset = currentBuffer.position(); 251 long ll = currentBuffer.getLongAfterPosition(0); 252 // Read top half as an int of key length and bottom int as value length 253 current.keyLength = (int) (ll >> Integer.SIZE); 254 current.valueLength = (int) (Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll); 255 currentBuffer.skip(Bytes.SIZEOF_LONG); 256 // key part 257 currentBuffer.asSubByteBuffer(currentBuffer.position(), current.keyLength, tmpPair); 258 current.keyBuffer = tmpPair.getFirst(); 259 current.keyOffset = tmpPair.getSecond(); 260 currentBuffer.skip(current.keyLength); 261 // value part 262 current.valueOffset = currentBuffer.position(); 263 currentBuffer.skip(current.valueLength); 264 if (includesTags()) { 265 decodeTags(); 266 } 267 if (includesMvcc()) { 268 current.memstoreTS = ByteBufferUtils.readVLong(currentBuffer); 269 } else { 270 current.memstoreTS = 0; 271 } 272 current.nextKvOffset = currentBuffer.position(); 273 current.currentKey.setKey(current.keyBuffer, current.keyOffset, current.keyLength); 274 } 275 276 protected void decodeTags() { 277 current.tagsLength = currentBuffer.getShortAfterPosition(0); 278 currentBuffer.skip(Bytes.SIZEOF_SHORT); 279 currentBuffer.skip(current.tagsLength); 280 } 281 282 private class SeekerState { 283 /** 284 * The size of a (key length, value length) tuple that prefixes each entry in a data block. 285 */ 286 public final static int KEY_VALUE_LEN_SIZE = 2 * Bytes.SIZEOF_INT; 287 288 // RowIndexSeekerV1 reads one cell at a time from a ByteBuff and uses SeekerState's fields to 289 // record the structure of the cell within the ByteBuff. 290 291 // The source of bytes that our cell is backed by 292 protected ByteBuff currentBuffer; 293 // Row structure starts at startOffset 294 protected int startOffset = -1; 295 // Key starts at keyOffset 296 protected int keyOffset = -1; 297 // Key ends at keyOffset + keyLength 298 protected int keyLength; 299 // Value starts at valueOffset 300 protected int valueOffset = -1; 301 // Value ends at valueOffset + valueLength 302 protected int valueLength; 303 // Tags start after values and end after tagsLength 304 protected int tagsLength = 0; 305 306 // A ByteBuffer version of currentBuffer that we use to access the key. position and limit 307 // are not adjusted so you must use keyOffset and keyLength to know where in this ByteBuffer to 308 // read. 309 protected ByteBuffer keyBuffer = null; 310 // seqId of the cell being read 311 protected long memstoreTS; 312 // Start of the next row structure in currentBuffer 313 protected int nextKvOffset; 314 // Buffer backed keyonlyKV, cheaply reset and re-used as necessary to avoid allocations. 315 // Fed to a comparator in RowIndexSeekerV1#binarySearch(). 316 private final ByteBufferKeyOnlyKeyValue currentKey = new ByteBufferKeyOnlyKeyValue(); 317 318 protected boolean isValid() { 319 return valueOffset != -1; 320 } 321 322 protected void invalidate() { 323 valueOffset = -1; 324 currentKey.clear(); 325 currentBuffer = null; 326 } 327 328 /** 329 * Copy the state from the next one into this instance (the previous state placeholder). Used to 330 * save the previous state when we are advancing the seeker to the next key/value. 331 */ 332 protected void copyFromNext(SeekerState nextState) { 333 keyBuffer = nextState.keyBuffer; 334 currentKey.setKey(nextState.keyBuffer, 335 nextState.currentKey.getRowPosition() - Bytes.SIZEOF_SHORT, nextState.keyLength); 336 337 startOffset = nextState.startOffset; 338 keyOffset = nextState.keyOffset; 339 valueOffset = nextState.valueOffset; 340 keyLength = nextState.keyLength; 341 valueLength = nextState.valueLength; 342 nextKvOffset = nextState.nextKvOffset; 343 memstoreTS = nextState.memstoreTS; 344 currentBuffer = nextState.currentBuffer; 345 tagsLength = nextState.tagsLength; 346 } 347 348 @Override 349 public String toString() { 350 return CellUtil.getCellKeyAsString(toCell()); 351 } 352 353 protected int getCellBufSize() { 354 int kvBufSize = KEY_VALUE_LEN_SIZE + keyLength + valueLength; 355 if (includesTags() && tagsLength > 0) { 356 kvBufSize += Bytes.SIZEOF_SHORT + tagsLength; 357 } 358 return kvBufSize; 359 } 360 361 public ExtendedCell toCell() { 362 ExtendedCell ret; 363 int cellBufSize = getCellBufSize(); 364 long seqId = 0L; 365 if (includesMvcc()) { 366 seqId = memstoreTS; 367 } 368 if (currentBuffer.hasArray()) { 369 // TODO : reduce the varieties of KV here. Check if based on a boolean 370 // we can handle the 'no tags' case. 371 if (tagsLength > 0) { 372 // TODO : getRow len here. 373 ret = new SizeCachedKeyValue(currentBuffer.array(), 374 currentBuffer.arrayOffset() + startOffset, cellBufSize, seqId, keyLength); 375 } else { 376 ret = new SizeCachedNoTagsKeyValue(currentBuffer.array(), 377 currentBuffer.arrayOffset() + startOffset, cellBufSize, seqId, keyLength); 378 } 379 } else { 380 currentBuffer.asSubByteBuffer(startOffset, cellBufSize, tmpPair); 381 ByteBuffer buf = tmpPair.getFirst(); 382 if (buf.isDirect()) { 383 // TODO : getRow len here. 384 ret = tagsLength > 0 385 ? new SizeCachedByteBufferKeyValue(buf, tmpPair.getSecond(), cellBufSize, seqId, 386 keyLength) 387 : new SizeCachedNoTagsByteBufferKeyValue(buf, tmpPair.getSecond(), cellBufSize, seqId, 388 keyLength); 389 } else { 390 if (tagsLength > 0) { 391 ret = new SizeCachedKeyValue(buf.array(), buf.arrayOffset() + tmpPair.getSecond(), 392 cellBufSize, seqId, keyLength); 393 } else { 394 ret = new SizeCachedNoTagsKeyValue(buf.array(), buf.arrayOffset() + tmpPair.getSecond(), 395 cellBufSize, seqId, keyLength); 396 } 397 } 398 } 399 return ret; 400 } 401 } 402}