001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import java.io.IOException;
021import java.util.Iterator;
022import java.util.SortedSet;
023import java.util.function.IntConsumer;
024import org.apache.commons.lang3.NotImplementedException;
025import org.apache.hadoop.fs.Path;
026import org.apache.hadoop.hbase.Cell;
027import org.apache.hadoop.hbase.ExtendedCell;
028import org.apache.hadoop.hbase.PrivateCellUtil;
029import org.apache.hadoop.hbase.client.Scan;
030import org.apache.yetus.audience.InterfaceAudience;
031
032/**
033 * A scanner of a single memstore segment.
034 */
035@InterfaceAudience.Private
036public class SegmentScanner implements KeyValueScanner {
037
038  // the observed structure
039  protected final Segment segment;
040  // the highest relevant MVCC
041  private long readPoint;
042  // the current iterator that can be reinitialized by
043  // seek(), backwardSeek(), or reseek()
044  protected Iterator<ExtendedCell> iter;
045  // the pre-calculated cell to be returned by peek()
046  protected ExtendedCell current = null;
047  // or next()
048  // A flag represents whether could stop skipping KeyValues for MVCC
049  // if have encountered the next row. Only used for reversed scan
050  private boolean stopSkippingKVsIfNextRow = false;
051  // Stop skipping KeyValues for MVCC if finish this row. Only used for reversed scan
052  private ExtendedCell stopSkippingKVsRow;
053  // last iterated KVs by seek (to restore the iterator state after reseek)
054  private ExtendedCell last = null;
055
056  // flag to indicate if this scanner is closed
057  protected boolean closed = false;
058
059  /**
060   * Scanners are ordered from 0 (oldest) to newest in increasing order.
061   */
062  protected SegmentScanner(Segment segment, long readPoint) {
063    this.segment = segment;
064    this.readPoint = readPoint;
065    // increase the reference count so the underlying structure will not be de-allocated
066    this.segment.incScannerCount();
067    iter = segment.iterator();
068    // the initialization of the current is required for working with heap of SegmentScanners
069    updateCurrent();
070    if (current == null) {
071      // nothing to fetch from this scanner
072      close();
073    }
074  }
075
076  /**
077   * Look at the next Cell in this scanner, but do not iterate the scanner
078   * @return the currently observed Cell
079   */
080  @Override
081  public ExtendedCell peek() { // sanity check, the current should be always valid
082    if (closed) {
083      return null;
084    }
085    if (current != null && current.getSequenceId() > readPoint) {
086      throw new RuntimeException("current is invalid: read point is " + readPoint + ", "
087        + "while current sequence id is " + current.getSequenceId());
088    }
089    return current;
090  }
091
092  /**
093   * Return the next Cell in this scanner, iterating the scanner
094   * @return the next Cell or null if end of scanner
095   */
096  @Override
097  public ExtendedCell next() throws IOException {
098    if (closed) {
099      return null;
100    }
101    ExtendedCell oldCurrent = current;
102    updateCurrent(); // update the currently observed Cell
103    return oldCurrent;
104  }
105
106  /**
107   * Seek the scanner at or after the specified Cell.
108   * @param cell seek value
109   * @return true if scanner has values left, false if end of scanner
110   */
111  @Override
112  public boolean seek(ExtendedCell cell) throws IOException {
113    if (closed) {
114      return false;
115    }
116    if (cell == null) {
117      close();
118      return false;
119    }
120    // restart the iterator from new key
121    iter = getIterator(cell);
122    // last is going to be reinitialized in the next getNext() call
123    last = null;
124    updateCurrent();
125    return (current != null);
126  }
127
128  protected Iterator<ExtendedCell> getIterator(ExtendedCell cell) {
129    return segment.tailSet(cell).iterator();
130  }
131
132  /**
133   * Reseek the scanner at or after the specified KeyValue. This method is guaranteed to seek at or
134   * after the required key only if the key comes after the current position of the scanner. Should
135   * not be used to seek to a key which may come before the current position.
136   * @param cell seek value (should be non-null)
137   * @return true if scanner has values left, false if end of scanner
138   */
139  @Override
140  public boolean reseek(ExtendedCell cell) throws IOException {
141    if (closed) {
142      return false;
143    }
144    /*
145     * See HBASE-4195 & HBASE-3855 & HBASE-6591 for the background on this implementation. This code
146     * is executed concurrently with flush and puts, without locks. The ideal implementation for
147     * performance would use the sub skip list implicitly pointed by the iterator. Unfortunately the
148     * Java API does not offer a method to get it. So we remember the last keys we iterated to and
149     * restore the reseeked set to at least that point.
150     */
151    iter = getIterator(getHighest(cell, last));
152    updateCurrent();
153    return (current != null);
154  }
155
156  /**
157   * Seek the scanner at or before the row of specified Cell, it firstly tries to seek the scanner
158   * at or after the specified Cell, return if peek KeyValue of scanner has the same row with
159   * specified Cell, otherwise seek the scanner at the first Cell of the row which is the previous
160   * row of specified KeyValue
161   * @param key seek Cell
162   * @return true if the scanner is at the valid KeyValue, false if such Cell does not exist
163   */
164  @Override
165  public boolean backwardSeek(ExtendedCell key) throws IOException {
166    if (closed) {
167      return false;
168    }
169    seek(key); // seek forward then go backward
170    if (peek() == null || segment.compareRows(peek(), key) > 0) {
171      return seekToPreviousRow(key);
172    }
173    return true;
174  }
175
176  /**
177   * Seek the scanner at the first Cell of the row which is the previous row of specified key
178   * @param cell seek value
179   * @return true if the scanner at the first valid Cell of previous row, false if not existing such
180   *         Cell
181   */
182  @Override
183  public boolean seekToPreviousRow(ExtendedCell cell) throws IOException {
184    if (closed) {
185      return false;
186    }
187    boolean keepSeeking;
188    Cell key = cell;
189    do {
190      ExtendedCell firstKeyOnRow = PrivateCellUtil.createFirstOnRow(key);
191      SortedSet<ExtendedCell> cellHead = segment.headSet(firstKeyOnRow);
192      Cell lastCellBeforeRow = cellHead.isEmpty() ? null : cellHead.last();
193      if (lastCellBeforeRow == null) {
194        current = null;
195        return false;
196      }
197      ExtendedCell firstKeyOnPreviousRow = PrivateCellUtil.createFirstOnRow(lastCellBeforeRow);
198      this.stopSkippingKVsIfNextRow = true;
199      this.stopSkippingKVsRow = firstKeyOnPreviousRow;
200      seek(firstKeyOnPreviousRow);
201      this.stopSkippingKVsIfNextRow = false;
202      if (
203        peek() == null || segment.getComparator().compareRows(peek(), firstKeyOnPreviousRow) > 0
204      ) {
205        keepSeeking = true;
206        key = firstKeyOnPreviousRow;
207        continue;
208      } else {
209        keepSeeking = false;
210      }
211    } while (keepSeeking);
212    return true;
213  }
214
215  /**
216   * Seek the scanner at the first KeyValue of last row
217   * @return true if scanner has values left, false if the underlying data is empty
218   */
219  @Override
220  public boolean seekToLastRow() throws IOException {
221    if (closed) {
222      return false;
223    }
224    ExtendedCell higherCell = segment.isEmpty() ? null : segment.last();
225    if (higherCell == null) {
226      return false;
227    }
228
229    ExtendedCell firstCellOnLastRow = PrivateCellUtil.createFirstOnRow(higherCell);
230
231    if (seek(firstCellOnLastRow)) {
232      return true;
233    } else {
234      return seekToPreviousRow(higherCell);
235    }
236  }
237
238  /**
239   * Close the KeyValue scanner.
240   */
241  @Override
242  public void close() {
243    if (closed) {
244      return;
245    }
246    getSegment().decScannerCount();
247    closed = true;
248  }
249
250  /**
251   * This functionality should be resolved in the higher level which is MemStoreScanner, currently
252   * returns true as default. Doesn't throw IllegalStateException in order not to change the
253   * signature of the overridden method
254   */
255  @Override
256  public boolean shouldUseScanner(Scan scan, HStore store, long oldestUnexpiredTS) {
257    return getSegment().shouldSeek(scan.getColumnFamilyTimeRange().getOrDefault(
258      store.getColumnFamilyDescriptor().getName(), scan.getTimeRange()), oldestUnexpiredTS);
259  }
260
261  @Override
262  public boolean requestSeek(ExtendedCell c, boolean forward, boolean useBloom) throws IOException {
263    return NonLazyKeyValueScanner.doRealSeek(this, c, forward);
264  }
265
266  /**
267   * This scanner is working solely on the in-memory MemStore and doesn't work on store files,
268   * MutableCellSetSegmentScanner always does the seek, therefore always returning true.
269   */
270  @Override
271  public boolean realSeekDone() {
272    return true;
273  }
274
275  /**
276   * This function should be never called on scanners that always do real seek operations (i.e. most
277   * of the scanners and also this one). The easiest way to achieve this is to call
278   * {@link #realSeekDone()} first.
279   */
280  @Override
281  public void enforceSeek() throws IOException {
282    throw new NotImplementedException("enforceSeek cannot be called on a SegmentScanner");
283  }
284
285  /** Returns true if this is a file scanner. Otherwise a memory scanner is assumed. */
286  @Override
287  public boolean isFileScanner() {
288    return false;
289  }
290
291  @Override
292  public void recordBlockSize(IntConsumer blockSizeConsumer) {
293    // do nothing
294  }
295
296  @Override
297  public Path getFilePath() {
298    return null;
299  }
300
301  /**
302   * @return the next key in the index (the key to seek to the next block) if known, or null
303   *         otherwise Not relevant for in-memory scanner
304   */
305  @Override
306  public ExtendedCell getNextIndexedKey() {
307    return null;
308  }
309
310  /**
311   * Called after a batch of rows scanned (RPC) and set to be returned to client. Any in between
312   * cleanup can be done here. Nothing to be done for MutableCellSetSegmentScanner.
313   */
314  @Override
315  public void shipped() throws IOException {
316    // do nothing
317  }
318
319  // debug method
320  @Override
321  public String toString() {
322    String res = "Store segment scanner of type " + this.getClass().getName() + "; ";
323    res += "Scanner order " + getScannerOrder() + "; ";
324    res += getSegment().toString();
325    return res;
326  }
327
328  /********************* Private Methods **********************/
329
330  private Segment getSegment() {
331    return segment;
332  }
333
334  /**
335   * Private internal method for iterating over the segment, skipping the cells with irrelevant MVCC
336   */
337  protected void updateCurrent() {
338    ExtendedCell next = null;
339
340    try {
341      while (iter.hasNext()) {
342        next = iter.next();
343        if (next.getSequenceId() <= this.readPoint) {
344          current = next;
345          return;// skip irrelevant versions
346        }
347        // for backwardSeek() stay in the boundaries of a single row
348        if (stopSkippingKVsIfNextRow && segment.compareRows(next, stopSkippingKVsRow) > 0) {
349          current = null;
350          return;
351        }
352      } // end of while
353
354      current = null; // nothing found
355    } finally {
356      if (next != null) {
357        // in all cases, remember the last KV we iterated to, needed for reseek()
358        last = next;
359      }
360    }
361  }
362
363  /**
364   * Private internal method that returns the higher of the two key values, or null if they are both
365   * null
366   */
367  private ExtendedCell getHighest(ExtendedCell first, ExtendedCell second) {
368    if (first == null && second == null) {
369      return null;
370    }
371    if (first != null && second != null) {
372      int compare = segment.compare(first, second);
373      return (compare > 0 ? first : second);
374    }
375    return (first != null ? first : second);
376  }
377}