001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.encoding; 019 020import java.nio.ByteBuffer; 021import org.apache.hadoop.hbase.ByteBufferKeyOnlyKeyValue; 022import org.apache.hadoop.hbase.Cell; 023import org.apache.hadoop.hbase.CellComparator; 024import org.apache.hadoop.hbase.CellUtil; 025import org.apache.hadoop.hbase.HConstants; 026import org.apache.hadoop.hbase.KeyValue; 027import org.apache.hadoop.hbase.PrivateCellUtil; 028import org.apache.hadoop.hbase.SizeCachedByteBufferKeyValue; 029import org.apache.hadoop.hbase.SizeCachedKeyValue; 030import org.apache.hadoop.hbase.SizeCachedNoTagsByteBufferKeyValue; 031import org.apache.hadoop.hbase.SizeCachedNoTagsKeyValue; 032import org.apache.hadoop.hbase.io.encoding.AbstractDataBlockEncoder.AbstractEncodedSeeker; 033import org.apache.hadoop.hbase.nio.ByteBuff; 034import org.apache.hadoop.hbase.util.ByteBufferUtils; 035import org.apache.hadoop.hbase.util.Bytes; 036import org.apache.hadoop.hbase.util.ObjectIntPair; 037import org.apache.yetus.audience.InterfaceAudience; 038 039@InterfaceAudience.Private 040public class RowIndexSeekerV1 extends AbstractEncodedSeeker { 041 042 // A temp pair object which will be reused by ByteBuff#asSubByteBuffer calls. This avoids too 043 // many object creations. 044 protected final ObjectIntPair<ByteBuffer> tmpPair = new ObjectIntPair<>(); 045 046 private ByteBuff currentBuffer; 047 private SeekerState current = new SeekerState(); // always valid 048 private SeekerState previous = new SeekerState(); // may not be valid 049 050 private int rowNumber; 051 private ByteBuff rowOffsets = null; 052 private final CellComparator cellComparator; 053 054 public RowIndexSeekerV1(HFileBlockDecodingContext decodingCtx) { 055 super(decodingCtx); 056 this.cellComparator = decodingCtx.getHFileContext().getCellComparator(); 057 } 058 059 @Override 060 public void setCurrentBuffer(ByteBuff buffer) { 061 int onDiskSize = buffer.getInt(buffer.limit() - Bytes.SIZEOF_INT); 062 063 // Data part 064 ByteBuff dup = buffer.duplicate(); 065 dup.position(buffer.position()); 066 dup.limit(buffer.position() + onDiskSize); 067 currentBuffer = dup.slice(); 068 current.currentBuffer = currentBuffer; 069 buffer.skip(onDiskSize); 070 071 // Row offset 072 rowNumber = buffer.getInt(); 073 int totalRowOffsetsLength = Bytes.SIZEOF_INT * rowNumber; 074 ByteBuff rowDup = buffer.duplicate(); 075 rowDup.position(buffer.position()); 076 rowDup.limit(buffer.position() + totalRowOffsetsLength); 077 rowOffsets = rowDup.slice(); 078 079 decodeFirst(); 080 } 081 082 @Override 083 public Cell getKey() { 084 if (current.keyBuffer.hasArray()) { 085 return new KeyValue.KeyOnlyKeyValue(current.keyBuffer.array(), 086 current.keyBuffer.arrayOffset() + current.keyBuffer.position(), current.keyLength); 087 } else { 088 byte[] key = new byte[current.keyLength]; 089 ByteBufferUtils.copyFromBufferToArray(key, current.keyBuffer, current.keyBuffer.position(), 0, 090 current.keyLength); 091 return new KeyValue.KeyOnlyKeyValue(key, 0, current.keyLength); 092 } 093 } 094 095 @Override 096 public ByteBuffer getValueShallowCopy() { 097 currentBuffer.asSubByteBuffer(current.valueOffset, current.valueLength, tmpPair); 098 ByteBuffer dup = tmpPair.getFirst().duplicate(); 099 dup.position(tmpPair.getSecond()); 100 dup.limit(tmpPair.getSecond() + current.valueLength); 101 return dup.slice(); 102 } 103 104 @Override 105 public Cell getCell() { 106 return current.toCell(); 107 } 108 109 @Override 110 public void rewind() { 111 currentBuffer.rewind(); 112 decodeFirst(); 113 } 114 115 @Override 116 public boolean next() { 117 if (!currentBuffer.hasRemaining()) { 118 return false; 119 } 120 decodeNext(); 121 previous.invalidate(); 122 return true; 123 } 124 125 private int binarySearch(Cell seekCell, boolean seekBefore) { 126 int low = 0; 127 int high = rowNumber - 1; 128 int mid = low + ((high - low) >> 1); 129 int comp = 0; 130 while (low <= high) { 131 mid = low + ((high - low) >> 1); 132 comp = this.cellComparator.compareRows(getRow(mid), seekCell); 133 if (comp < 0) { 134 low = mid + 1; 135 } else if (comp > 0) { 136 high = mid - 1; 137 } else { 138 // key found 139 if (seekBefore) { 140 return mid - 1; 141 } else { 142 return mid; 143 } 144 } 145 } 146 // key not found. 147 if (comp > 0) { 148 return mid - 1; 149 } else { 150 return mid; 151 } 152 } 153 154 private ByteBuffer getRow(int index) { 155 int offset = rowOffsets.getIntAfterPosition(index * Bytes.SIZEOF_INT); 156 ByteBuff block = currentBuffer.duplicate(); 157 block.position(offset + Bytes.SIZEOF_LONG); 158 short rowLen = block.getShort(); 159 block.asSubByteBuffer(block.position(), rowLen, tmpPair); 160 ByteBuffer row = tmpPair.getFirst(); 161 row.position(tmpPair.getSecond()).limit(tmpPair.getSecond() + rowLen); 162 return row; 163 } 164 165 @Override 166 public int seekToKeyInBlock(Cell seekCell, boolean seekBefore) { 167 previous.invalidate(); 168 int index = binarySearch(seekCell, seekBefore); 169 if (index < 0) { 170 return HConstants.INDEX_KEY_MAGIC; // using optimized index key 171 } else { 172 int offset = rowOffsets.getIntAfterPosition(index * Bytes.SIZEOF_INT); 173 if (offset != 0) { 174 decodeAtPosition(offset); 175 } 176 } 177 do { 178 int comp = 179 PrivateCellUtil.compareKeyIgnoresMvcc(this.cellComparator, seekCell, current.currentKey); 180 if (comp == 0) { // exact match 181 if (seekBefore) { 182 if (!previous.isValid()) { 183 // The caller (seekBefore) has to ensure that we are not at the 184 // first key in the block. 185 throw new IllegalStateException( 186 "Cannot seekBefore if " + "positioned at the first key in the block: key=" 187 + Bytes.toStringBinary(seekCell.getRowArray())); 188 } 189 moveToPrevious(); 190 return 1; 191 } 192 return 0; 193 } 194 195 if (comp < 0) { // already too large, check previous 196 if (previous.isValid()) { 197 moveToPrevious(); 198 } else { 199 return HConstants.INDEX_KEY_MAGIC; // using optimized index key 200 } 201 return 1; 202 } 203 204 // move to next, if more data is available 205 if (currentBuffer.hasRemaining()) { 206 previous.copyFromNext(current); 207 decodeNext(); 208 } else { 209 break; 210 } 211 } while (true); 212 213 // we hit the end of the block, not an exact match 214 return 1; 215 } 216 217 private void moveToPrevious() { 218 if (!previous.isValid()) { 219 throw new IllegalStateException("Can move back only once and not in first key in the block."); 220 } 221 222 SeekerState tmp = previous; 223 previous = current; 224 current = tmp; 225 226 // move after last key value 227 currentBuffer.position(current.nextKvOffset); 228 previous.invalidate(); 229 } 230 231 @Override 232 public int compareKey(CellComparator comparator, Cell key) { 233 return PrivateCellUtil.compareKeyIgnoresMvcc(comparator, key, current.currentKey); 234 } 235 236 protected void decodeFirst() { 237 decodeNext(); 238 previous.invalidate(); 239 } 240 241 protected void decodeAtPosition(int position) { 242 currentBuffer.position(position); 243 decodeNext(); 244 previous.invalidate(); 245 } 246 247 protected void decodeNext() { 248 current.startOffset = currentBuffer.position(); 249 long ll = currentBuffer.getLongAfterPosition(0); 250 // Read top half as an int of key length and bottom int as value length 251 current.keyLength = (int) (ll >> Integer.SIZE); 252 current.valueLength = (int) (Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll); 253 currentBuffer.skip(Bytes.SIZEOF_LONG); 254 // key part 255 currentBuffer.asSubByteBuffer(currentBuffer.position(), current.keyLength, tmpPair); 256 ByteBuffer key = tmpPair.getFirst().duplicate(); 257 key.position(tmpPair.getSecond()).limit(tmpPair.getSecond() + current.keyLength); 258 current.keyBuffer = key; 259 currentBuffer.skip(current.keyLength); 260 // value part 261 current.valueOffset = currentBuffer.position(); 262 currentBuffer.skip(current.valueLength); 263 if (includesTags()) { 264 decodeTags(); 265 } 266 if (includesMvcc()) { 267 current.memstoreTS = ByteBufferUtils.readVLong(currentBuffer); 268 } else { 269 current.memstoreTS = 0; 270 } 271 current.nextKvOffset = currentBuffer.position(); 272 current.currentKey.setKey(current.keyBuffer, tmpPair.getSecond(), current.keyLength); 273 } 274 275 protected void decodeTags() { 276 current.tagsLength = currentBuffer.getShortAfterPosition(0); 277 currentBuffer.skip(Bytes.SIZEOF_SHORT); 278 current.tagsOffset = currentBuffer.position(); 279 currentBuffer.skip(current.tagsLength); 280 } 281 282 private class SeekerState { 283 /** 284 * The size of a (key length, value length) tuple that prefixes each entry in a data block. 285 */ 286 public final static int KEY_VALUE_LEN_SIZE = 2 * Bytes.SIZEOF_INT; 287 288 protected ByteBuff currentBuffer; 289 protected int startOffset = -1; 290 protected int valueOffset = -1; 291 protected int keyLength; 292 protected int valueLength; 293 protected int tagsLength = 0; 294 protected int tagsOffset = -1; 295 296 protected ByteBuffer keyBuffer = null; 297 protected long memstoreTS; 298 protected int nextKvOffset; 299 // buffer backed keyonlyKV 300 private ByteBufferKeyOnlyKeyValue currentKey = new ByteBufferKeyOnlyKeyValue(); 301 302 protected boolean isValid() { 303 return valueOffset != -1; 304 } 305 306 protected void invalidate() { 307 valueOffset = -1; 308 currentKey = new ByteBufferKeyOnlyKeyValue(); 309 currentBuffer = null; 310 } 311 312 /** 313 * Copy the state from the next one into this instance (the previous state placeholder). Used to 314 * save the previous state when we are advancing the seeker to the next key/value. 315 */ 316 protected void copyFromNext(SeekerState nextState) { 317 keyBuffer = nextState.keyBuffer; 318 currentKey.setKey(nextState.keyBuffer, 319 nextState.currentKey.getRowPosition() - Bytes.SIZEOF_SHORT, nextState.keyLength); 320 321 startOffset = nextState.startOffset; 322 valueOffset = nextState.valueOffset; 323 keyLength = nextState.keyLength; 324 valueLength = nextState.valueLength; 325 nextKvOffset = nextState.nextKvOffset; 326 memstoreTS = nextState.memstoreTS; 327 currentBuffer = nextState.currentBuffer; 328 tagsOffset = nextState.tagsOffset; 329 tagsLength = nextState.tagsLength; 330 } 331 332 @Override 333 public String toString() { 334 return CellUtil.getCellKeyAsString(toCell()); 335 } 336 337 protected int getCellBufSize() { 338 int kvBufSize = KEY_VALUE_LEN_SIZE + keyLength + valueLength; 339 if (includesTags() && tagsLength > 0) { 340 kvBufSize += Bytes.SIZEOF_SHORT + tagsLength; 341 } 342 return kvBufSize; 343 } 344 345 public Cell toCell() { 346 Cell ret; 347 int cellBufSize = getCellBufSize(); 348 long seqId = 0L; 349 if (includesMvcc()) { 350 seqId = memstoreTS; 351 } 352 if (currentBuffer.hasArray()) { 353 // TODO : reduce the varieties of KV here. Check if based on a boolean 354 // we can handle the 'no tags' case. 355 if (tagsLength > 0) { 356 // TODO : getRow len here. 357 ret = new SizeCachedKeyValue(currentBuffer.array(), 358 currentBuffer.arrayOffset() + startOffset, cellBufSize, seqId, keyLength); 359 } else { 360 ret = new SizeCachedNoTagsKeyValue(currentBuffer.array(), 361 currentBuffer.arrayOffset() + startOffset, cellBufSize, seqId, keyLength); 362 } 363 } else { 364 currentBuffer.asSubByteBuffer(startOffset, cellBufSize, tmpPair); 365 ByteBuffer buf = tmpPair.getFirst(); 366 if (buf.isDirect()) { 367 // TODO : getRow len here. 368 ret = tagsLength > 0 369 ? new SizeCachedByteBufferKeyValue(buf, tmpPair.getSecond(), cellBufSize, seqId, 370 keyLength) 371 : new SizeCachedNoTagsByteBufferKeyValue(buf, tmpPair.getSecond(), cellBufSize, seqId, 372 keyLength); 373 } else { 374 if (tagsLength > 0) { 375 ret = new SizeCachedKeyValue(buf.array(), buf.arrayOffset() + tmpPair.getSecond(), 376 cellBufSize, seqId, keyLength); 377 } else { 378 ret = new SizeCachedNoTagsKeyValue(buf.array(), buf.arrayOffset() + tmpPair.getSecond(), 379 cellBufSize, seqId, keyLength); 380 } 381 } 382 } 383 return ret; 384 } 385 } 386}