001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import java.io.IOException; 021import java.util.Iterator; 022import java.util.SortedSet; 023import org.apache.commons.lang3.NotImplementedException; 024import org.apache.hadoop.fs.Path; 025import org.apache.hadoop.hbase.Cell; 026import org.apache.hadoop.hbase.PrivateCellUtil; 027import org.apache.hadoop.hbase.client.Scan; 028import org.apache.yetus.audience.InterfaceAudience; 029 030/** 031 * A scanner of a single memstore segment. 032 */ 033@InterfaceAudience.Private 034public class SegmentScanner implements KeyValueScanner { 035 036 // the observed structure 037 protected final Segment segment; 038 // the highest relevant MVCC 039 private long readPoint; 040 // the current iterator that can be reinitialized by 041 // seek(), backwardSeek(), or reseek() 042 protected Iterator<Cell> iter; 043 // the pre-calculated cell to be returned by peek() 044 protected Cell current = null; 045 // or next() 046 // A flag represents whether could stop skipping KeyValues for MVCC 047 // if have encountered the next row. Only used for reversed scan 048 private boolean stopSkippingKVsIfNextRow = false; 049 // Stop skipping KeyValues for MVCC if finish this row. Only used for reversed scan 050 private Cell stopSkippingKVsRow; 051 // last iterated KVs by seek (to restore the iterator state after reseek) 052 private Cell last = null; 053 054 // flag to indicate if this scanner is closed 055 protected boolean closed = false; 056 057 /** 058 * Scanners are ordered from 0 (oldest) to newest in increasing order. 059 */ 060 protected SegmentScanner(Segment segment, long readPoint) { 061 this.segment = segment; 062 this.readPoint = readPoint; 063 // increase the reference count so the underlying structure will not be de-allocated 064 this.segment.incScannerCount(); 065 iter = segment.iterator(); 066 // the initialization of the current is required for working with heap of SegmentScanners 067 updateCurrent(); 068 if (current == null) { 069 // nothing to fetch from this scanner 070 close(); 071 } 072 } 073 074 /** 075 * Look at the next Cell in this scanner, but do not iterate the scanner 076 * @return the currently observed Cell 077 */ 078 @Override 079 public Cell peek() { // sanity check, the current should be always valid 080 if (closed) { 081 return null; 082 } 083 if (current != null && current.getSequenceId() > readPoint) { 084 throw new RuntimeException("current is invalid: read point is " + readPoint + ", " 085 + "while current sequence id is " + current.getSequenceId()); 086 } 087 return current; 088 } 089 090 /** 091 * Return the next Cell in this scanner, iterating the scanner 092 * @return the next Cell or null if end of scanner 093 */ 094 @Override 095 public Cell next() throws IOException { 096 if (closed) { 097 return null; 098 } 099 Cell oldCurrent = current; 100 updateCurrent(); // update the currently observed Cell 101 return oldCurrent; 102 } 103 104 /** 105 * Seek the scanner at or after the specified Cell. 106 * @param cell seek value 107 * @return true if scanner has values left, false if end of scanner 108 */ 109 @Override 110 public boolean seek(Cell cell) throws IOException { 111 if (closed) { 112 return false; 113 } 114 if (cell == null) { 115 close(); 116 return false; 117 } 118 // restart the iterator from new key 119 iter = getIterator(cell); 120 // last is going to be reinitialized in the next getNext() call 121 last = null; 122 updateCurrent(); 123 return (current != null); 124 } 125 126 protected Iterator<Cell> getIterator(Cell cell) { 127 return segment.tailSet(cell).iterator(); 128 } 129 130 /** 131 * Reseek the scanner at or after the specified KeyValue. This method is guaranteed to seek at or 132 * after the required key only if the key comes after the current position of the scanner. Should 133 * not be used to seek to a key which may come before the current position. 134 * @param cell seek value (should be non-null) 135 * @return true if scanner has values left, false if end of scanner 136 */ 137 @Override 138 public boolean reseek(Cell cell) throws IOException { 139 if (closed) { 140 return false; 141 } 142 /* 143 * See HBASE-4195 & HBASE-3855 & HBASE-6591 for the background on this implementation. This code 144 * is executed concurrently with flush and puts, without locks. The ideal implementation for 145 * performance would use the sub skip list implicitly pointed by the iterator. Unfortunately the 146 * Java API does not offer a method to get it. So we remember the last keys we iterated to and 147 * restore the reseeked set to at least that point. 148 */ 149 iter = getIterator(getHighest(cell, last)); 150 updateCurrent(); 151 return (current != null); 152 } 153 154 /** 155 * Seek the scanner at or before the row of specified Cell, it firstly tries to seek the scanner 156 * at or after the specified Cell, return if peek KeyValue of scanner has the same row with 157 * specified Cell, otherwise seek the scanner at the first Cell of the row which is the previous 158 * row of specified KeyValue 159 * @param key seek Cell 160 * @return true if the scanner is at the valid KeyValue, false if such Cell does not exist 161 */ 162 @Override 163 public boolean backwardSeek(Cell key) throws IOException { 164 if (closed) { 165 return false; 166 } 167 seek(key); // seek forward then go backward 168 if (peek() == null || segment.compareRows(peek(), key) > 0) { 169 return seekToPreviousRow(key); 170 } 171 return true; 172 } 173 174 /** 175 * Seek the scanner at the first Cell of the row which is the previous row of specified key 176 * @param cell seek value 177 * @return true if the scanner at the first valid Cell of previous row, false if not existing such 178 * Cell 179 */ 180 @Override 181 public boolean seekToPreviousRow(Cell cell) throws IOException { 182 if (closed) { 183 return false; 184 } 185 boolean keepSeeking; 186 Cell key = cell; 187 do { 188 Cell firstKeyOnRow = PrivateCellUtil.createFirstOnRow(key); 189 SortedSet<Cell> cellHead = segment.headSet(firstKeyOnRow); 190 Cell lastCellBeforeRow = cellHead.isEmpty() ? null : cellHead.last(); 191 if (lastCellBeforeRow == null) { 192 current = null; 193 return false; 194 } 195 Cell firstKeyOnPreviousRow = PrivateCellUtil.createFirstOnRow(lastCellBeforeRow); 196 this.stopSkippingKVsIfNextRow = true; 197 this.stopSkippingKVsRow = firstKeyOnPreviousRow; 198 seek(firstKeyOnPreviousRow); 199 this.stopSkippingKVsIfNextRow = false; 200 if ( 201 peek() == null || segment.getComparator().compareRows(peek(), firstKeyOnPreviousRow) > 0 202 ) { 203 keepSeeking = true; 204 key = firstKeyOnPreviousRow; 205 continue; 206 } else { 207 keepSeeking = false; 208 } 209 } while (keepSeeking); 210 return true; 211 } 212 213 /** 214 * Seek the scanner at the first KeyValue of last row 215 * @return true if scanner has values left, false if the underlying data is empty 216 */ 217 @Override 218 public boolean seekToLastRow() throws IOException { 219 if (closed) { 220 return false; 221 } 222 Cell higherCell = segment.isEmpty() ? null : segment.last(); 223 if (higherCell == null) { 224 return false; 225 } 226 227 Cell firstCellOnLastRow = PrivateCellUtil.createFirstOnRow(higherCell); 228 229 if (seek(firstCellOnLastRow)) { 230 return true; 231 } else { 232 return seekToPreviousRow(higherCell); 233 } 234 } 235 236 /** 237 * Close the KeyValue scanner. 238 */ 239 @Override 240 public void close() { 241 if (closed) { 242 return; 243 } 244 getSegment().decScannerCount(); 245 closed = true; 246 } 247 248 /** 249 * This functionality should be resolved in the higher level which is MemStoreScanner, currently 250 * returns true as default. Doesn't throw IllegalStateException in order not to change the 251 * signature of the overridden method 252 */ 253 @Override 254 public boolean shouldUseScanner(Scan scan, HStore store, long oldestUnexpiredTS) { 255 return getSegment().shouldSeek(scan.getColumnFamilyTimeRange().getOrDefault( 256 store.getColumnFamilyDescriptor().getName(), scan.getTimeRange()), oldestUnexpiredTS); 257 } 258 259 @Override 260 public boolean requestSeek(Cell c, boolean forward, boolean useBloom) throws IOException { 261 return NonLazyKeyValueScanner.doRealSeek(this, c, forward); 262 } 263 264 /** 265 * This scanner is working solely on the in-memory MemStore and doesn't work on store files, 266 * MutableCellSetSegmentScanner always does the seek, therefore always returning true. 267 */ 268 @Override 269 public boolean realSeekDone() { 270 return true; 271 } 272 273 /** 274 * This function should be never called on scanners that always do real seek operations (i.e. most 275 * of the scanners and also this one). The easiest way to achieve this is to call 276 * {@link #realSeekDone()} first. 277 */ 278 @Override 279 public void enforceSeek() throws IOException { 280 throw new NotImplementedException("enforceSeek cannot be called on a SegmentScanner"); 281 } 282 283 /** Returns true if this is a file scanner. Otherwise a memory scanner is assumed. */ 284 @Override 285 public boolean isFileScanner() { 286 return false; 287 } 288 289 @Override 290 public Path getFilePath() { 291 return null; 292 } 293 294 /** 295 * @return the next key in the index (the key to seek to the next block) if known, or null 296 * otherwise Not relevant for in-memory scanner 297 */ 298 @Override 299 public Cell getNextIndexedKey() { 300 return null; 301 } 302 303 /** 304 * Called after a batch of rows scanned (RPC) and set to be returned to client. Any in between 305 * cleanup can be done here. Nothing to be done for MutableCellSetSegmentScanner. 306 */ 307 @Override 308 public void shipped() throws IOException { 309 // do nothing 310 } 311 312 // debug method 313 @Override 314 public String toString() { 315 String res = "Store segment scanner of type " + this.getClass().getName() + "; "; 316 res += "Scanner order " + getScannerOrder() + "; "; 317 res += getSegment().toString(); 318 return res; 319 } 320 321 /********************* Private Methods **********************/ 322 323 private Segment getSegment() { 324 return segment; 325 } 326 327 /** 328 * Private internal method for iterating over the segment, skipping the cells with irrelevant MVCC 329 */ 330 protected void updateCurrent() { 331 Cell next = null; 332 333 try { 334 while (iter.hasNext()) { 335 next = iter.next(); 336 if (next.getSequenceId() <= this.readPoint) { 337 current = next; 338 return;// skip irrelevant versions 339 } 340 // for backwardSeek() stay in the boundaries of a single row 341 if (stopSkippingKVsIfNextRow && segment.compareRows(next, stopSkippingKVsRow) > 0) { 342 current = null; 343 return; 344 } 345 } // end of while 346 347 current = null; // nothing found 348 } finally { 349 if (next != null) { 350 // in all cases, remember the last KV we iterated to, needed for reseek() 351 last = next; 352 } 353 } 354 } 355 356 /** 357 * Private internal method that returns the higher of the two key values, or null if they are both 358 * null 359 */ 360 private Cell getHighest(Cell first, Cell second) { 361 if (first == null && second == null) { 362 return null; 363 } 364 if (first != null && second != null) { 365 int compare = segment.compare(first, second); 366 return (compare > 0 ? first : second); 367 } 368 return (first != null ? first : second); 369 } 370}