001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import java.util.List;
021import org.apache.hadoop.hbase.Cell;
022import org.apache.hadoop.hbase.HBaseInterfaceAudience;
023import org.apache.hadoop.hbase.client.metrics.ServerSideScanMetrics;
024import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
025import org.apache.yetus.audience.InterfaceAudience;
026import org.apache.yetus.audience.InterfaceStability;
027
028/**
029 * ScannerContext instances encapsulate limit tracking AND progress towards those limits during
030 * invocations of {@link InternalScanner#next(java.util.List)} and
031 * {@link RegionScanner#next(java.util.List)}.
032 * <p>
033 * A ScannerContext instance should be updated periodically throughout execution whenever progress
034 * towards a limit has been made. Each limit can be checked via the appropriate checkLimit method.
035 * <p>
036 * Once a limit has been reached, the scan will stop. The invoker of
037 * {@link InternalScanner#next(java.util.List)} or {@link RegionScanner#next(java.util.List)} can
038 * use the appropriate check*Limit methods to see exactly which limits have been reached.
039 * Alternatively, {@link #checkAnyLimitReached(LimitScope)} is provided to see if ANY limit was
040 * reached
041 * <p>
042 * {@link NoLimitScannerContext#NO_LIMIT} is an immutable static definition that can be used
043 * whenever a {@link ScannerContext} is needed but limits do not need to be enforced.
044 * <p>
045 * NOTE: It is important that this class only ever expose setter methods that can be safely skipped
046 * when limits should be NOT enforced. This is because of the necessary immutability of the class
047 * {@link NoLimitScannerContext}. If a setter cannot be safely skipped, the immutable nature of
048 * {@link NoLimitScannerContext} will lead to incorrect behavior.
049 */
050@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.COPROC)
051@InterfaceStability.Evolving
052public class ScannerContext {
053
054  LimitFields limits;
055  /**
056   * A different set of progress fields. Only include batch, dataSize and heapSize. Compare to
057   * LimitFields, ProgressFields doesn't contain time field. As we save a deadline in LimitFields,
058   * so use {@link EnvironmentEdgeManager.currentTime()} directly when check time limit.
059   */
060  ProgressFields progress;
061
062  /**
063   * The state of the scanner after the invocation of {@link InternalScanner#next(java.util.List)}
064   * or {@link RegionScanner#next(java.util.List)}.
065   */
066  NextState scannerState;
067  private static final NextState DEFAULT_STATE = NextState.MORE_VALUES;
068
069  /**
070   * Used as an indication to invocations of {@link InternalScanner#next(java.util.List)} and
071   * {@link RegionScanner#next(java.util.List)} that, if true, the progress tracked within this
072   * {@link ScannerContext} instance should be considered while evaluating the limits. Useful for
073   * enforcing a set of limits across multiple calls (i.e. the limit may not be reached in a single
074   * invocation, but any progress made should be considered in future invocations)
075   * <p>
076   * Defaulting this value to false means that, by default, any tracked progress will be wiped clean
077   * on invocations to {@link InternalScanner#next(java.util.List)} and
078   * {@link RegionScanner#next(java.util.List)} and the call will be treated as though no progress
079   * has been made towards the limits so far.
080   * <p>
081   * This is an important mechanism. Users of Internal/Region scanners expect that they can define
082   * some limits and then repeatedly invoke {@link InternalScanner#next(List)} or
083   * {@link RegionScanner#next(List)} where each invocation respects these limits separately.
084   * <p>
085   * For example:
086   *
087   * <pre>
088   *  {@code
089   * ScannerContext context = new ScannerContext.newBuilder().setBatchLimit(5).build();
090   * RegionScanner scanner = ...
091   * List<Cell> results = new ArrayList<Cell>();
092   * while(scanner.next(results, context)) {
093   *   // Do something with a batch of 5 cells
094   * }
095   * }
096   * </pre>
097   *
098   * However, in the case of RPCs, the server wants to be able to define a set of limits for a
099   * particular RPC request and have those limits respected across multiple invocations. This means
100   * that the progress made towards the limits in earlier calls will be saved and considered in
101   * future invocations
102   */
103  boolean keepProgress;
104  private static boolean DEFAULT_KEEP_PROGRESS = false;
105
106  private Cell lastPeekedCell = null;
107
108  // Set this to true will have the same behavior with reaching the time limit.
109  // This is used when you want to make the current RSRpcService.scan returns immediately. For
110  // example, when we want to switch from pread to stream, we can only do it after the rpc call is
111  // returned.
112  private boolean returnImmediately;
113
114  /**
115   * Tracks the relevant server side metrics during scans. null when metrics should not be tracked
116   */
117  final ServerSideScanMetrics metrics;
118
119  ScannerContext(boolean keepProgress, LimitFields limitsToCopy, boolean trackMetrics) {
120    this.limits = new LimitFields();
121    if (limitsToCopy != null) {
122      this.limits.copy(limitsToCopy);
123    }
124
125    // Progress fields are initialized to 0
126    progress = new ProgressFields(0, 0, 0);
127
128    this.keepProgress = keepProgress;
129    this.scannerState = DEFAULT_STATE;
130    this.metrics = trackMetrics ? new ServerSideScanMetrics() : null;
131  }
132
133  public boolean isTrackingMetrics() {
134    return this.metrics != null;
135  }
136
137  /**
138   * Get the metrics instance. Should only be called after a call to {@link #isTrackingMetrics()}
139   * has been made to confirm that metrics are indeed being tracked.
140   * @return {@link ServerSideScanMetrics} instance that is tracking metrics for this scan
141   */
142  public ServerSideScanMetrics getMetrics() {
143    assert isTrackingMetrics();
144    return this.metrics;
145  }
146
147  /**
148   * @return true if the progress tracked so far in this instance will be considered during an
149   *         invocation of {@link InternalScanner#next(java.util.List)} or
150   *         {@link RegionScanner#next(java.util.List)}. false when the progress tracked so far
151   *         should not be considered and should instead be wiped away via {@link #clearProgress()}
152   */
153  boolean getKeepProgress() {
154    return keepProgress;
155  }
156
157  void setKeepProgress(boolean keepProgress) {
158    this.keepProgress = keepProgress;
159  }
160
161  /**
162   * Progress towards the batch limit has been made. Increment internal tracking of batch progress
163   */
164  void incrementBatchProgress(int batch) {
165    int currentBatch = progress.getBatch();
166    progress.setBatch(currentBatch + batch);
167  }
168
169  /**
170   * Progress towards the size limit has been made. Increment internal tracking of size progress
171   */
172  void incrementSizeProgress(long dataSize, long heapSize) {
173    long curDataSize = progress.getDataSize();
174    progress.setDataSize(curDataSize + dataSize);
175    long curHeapSize = progress.getHeapSize();
176    progress.setHeapSize(curHeapSize + heapSize);
177  }
178
179  int getBatchProgress() {
180    return progress.getBatch();
181  }
182
183  long getDataSizeProgress() {
184    return progress.getDataSize();
185  }
186
187  long getHeapSizeProgress() {
188    return progress.getHeapSize();
189  }
190
191  void setProgress(int batchProgress, long sizeProgress, long heapSizeProgress) {
192    setBatchProgress(batchProgress);
193    setSizeProgress(sizeProgress, heapSizeProgress);
194  }
195
196  void setSizeProgress(long dataSizeProgress, long heapSizeProgress) {
197    progress.setDataSize(dataSizeProgress);
198    progress.setHeapSize(heapSizeProgress);
199  }
200
201  void setBatchProgress(int batchProgress) {
202    progress.setBatch(batchProgress);
203  }
204
205  /**
206   * Clear away any progress that has been made so far. All progress fields are reset to initial
207   * values
208   */
209  void clearProgress() {
210    progress.setFields(0, 0, 0);
211  }
212
213  /**
214   * Note that this is not a typical setter. This setter returns the {@link NextState} that was
215   * passed in so that methods can be invoked against the new state. Furthermore, this pattern
216   * allows the {@link NoLimitScannerContext} to cleanly override this setter and simply return the
217   * new state, thus preserving the immutability of {@link NoLimitScannerContext} n * @return The
218   * state that was passed in.
219   */
220  NextState setScannerState(NextState state) {
221    if (!NextState.isValidState(state)) {
222      throw new IllegalArgumentException("Cannot set to invalid state: " + state);
223    }
224
225    this.scannerState = state;
226    return state;
227  }
228
229  /**
230   * @return true when we have more cells for the current row. This usually because we have reached
231   *         a limit in the middle of a row
232   */
233  boolean mayHaveMoreCellsInRow() {
234    return scannerState == NextState.SIZE_LIMIT_REACHED_MID_ROW
235      || scannerState == NextState.TIME_LIMIT_REACHED_MID_ROW
236      || scannerState == NextState.BATCH_LIMIT_REACHED;
237  }
238
239  /**
240   * n * @return true if the batch limit can be enforced in the checker's scope
241   */
242  boolean hasBatchLimit(LimitScope checkerScope) {
243    return limits.canEnforceBatchLimitFromScope(checkerScope) && limits.getBatch() > 0;
244  }
245
246  /**
247   * n * @return true if the size limit can be enforced in the checker's scope
248   */
249  boolean hasSizeLimit(LimitScope checkerScope) {
250    return limits.canEnforceSizeLimitFromScope(checkerScope)
251      && (limits.getDataSize() > 0 || limits.getHeapSize() > 0);
252  }
253
254  /**
255   * n * @return true if the time limit can be enforced in the checker's scope
256   */
257  boolean hasTimeLimit(LimitScope checkerScope) {
258    return limits.canEnforceTimeLimitFromScope(checkerScope)
259      && (limits.getTime() > 0 || returnImmediately);
260  }
261
262  /**
263   * n * @return true if any limit can be enforced within the checker's scope
264   */
265  boolean hasAnyLimit(LimitScope checkerScope) {
266    return hasBatchLimit(checkerScope) || hasSizeLimit(checkerScope) || hasTimeLimit(checkerScope);
267  }
268
269  /**
270   * @param scope The scope in which the size limit will be enforced
271   */
272  void setSizeLimitScope(LimitScope scope) {
273    limits.setSizeScope(scope);
274  }
275
276  /**
277   * @param scope The scope in which the time limit will be enforced
278   */
279  void setTimeLimitScope(LimitScope scope) {
280    limits.setTimeScope(scope);
281  }
282
283  int getBatchLimit() {
284    return limits.getBatch();
285  }
286
287  long getDataSizeLimit() {
288    return limits.getDataSize();
289  }
290
291  long getTimeLimit() {
292    return limits.getTime();
293  }
294
295  /**
296   * @param checkerScope The scope that the limit is being checked from
297   * @return true when the limit is enforceable from the checker's scope and it has been reached
298   */
299  boolean checkBatchLimit(LimitScope checkerScope) {
300    return hasBatchLimit(checkerScope) && progress.getBatch() >= limits.getBatch();
301  }
302
303  /**
304   * @param checkerScope The scope that the limit is being checked from
305   * @return true when the limit is enforceable from the checker's scope and it has been reached
306   */
307  boolean checkSizeLimit(LimitScope checkerScope) {
308    return hasSizeLimit(checkerScope) && (progress.getDataSize() >= limits.getDataSize()
309      || progress.getHeapSize() >= limits.getHeapSize());
310  }
311
312  /**
313   * @param checkerScope The scope that the limit is being checked from. The time limit is always
314   *                     checked against {@link EnvironmentEdgeManager.currentTime}
315   * @return true when the limit is enforceable from the checker's scope and it has been reached
316   */
317  boolean checkTimeLimit(LimitScope checkerScope) {
318    return hasTimeLimit(checkerScope)
319      && (returnImmediately || EnvironmentEdgeManager.currentTime() >= limits.getTime());
320  }
321
322  /**
323   * @param checkerScope The scope that the limits are being checked from
324   * @return true when some limit is enforceable from the checker's scope and it has been reached
325   */
326  boolean checkAnyLimitReached(LimitScope checkerScope) {
327    return checkSizeLimit(checkerScope) || checkBatchLimit(checkerScope)
328      || checkTimeLimit(checkerScope);
329  }
330
331  Cell getLastPeekedCell() {
332    return lastPeekedCell;
333  }
334
335  void setLastPeekedCell(Cell lastPeekedCell) {
336    this.lastPeekedCell = lastPeekedCell;
337  }
338
339  void returnImmediately() {
340    this.returnImmediately = true;
341  }
342
343  @Override
344  public String toString() {
345    StringBuilder sb = new StringBuilder();
346    sb.append("{");
347
348    sb.append("limits:");
349    sb.append(limits);
350
351    sb.append(", progress:");
352    sb.append(progress);
353
354    sb.append(", keepProgress:");
355    sb.append(keepProgress);
356
357    sb.append(", state:");
358    sb.append(scannerState);
359
360    sb.append("}");
361    return sb.toString();
362  }
363
364  public static Builder newBuilder() {
365    return new Builder();
366  }
367
368  public static Builder newBuilder(boolean keepProgress) {
369    return new Builder(keepProgress);
370  }
371
372  public static final class Builder {
373    boolean keepProgress = DEFAULT_KEEP_PROGRESS;
374    boolean trackMetrics = false;
375    LimitFields limits = new LimitFields();
376
377    private Builder() {
378    }
379
380    private Builder(boolean keepProgress) {
381      this.keepProgress = keepProgress;
382    }
383
384    public Builder setKeepProgress(boolean keepProgress) {
385      this.keepProgress = keepProgress;
386      return this;
387    }
388
389    public Builder setTrackMetrics(boolean trackMetrics) {
390      this.trackMetrics = trackMetrics;
391      return this;
392    }
393
394    public Builder setSizeLimit(LimitScope sizeScope, long dataSizeLimit, long heapSizeLimit) {
395      limits.setDataSize(dataSizeLimit);
396      limits.setHeapSize(heapSizeLimit);
397      limits.setSizeScope(sizeScope);
398      return this;
399    }
400
401    public Builder setTimeLimit(LimitScope timeScope, long timeLimit) {
402      limits.setTime(timeLimit);
403      limits.setTimeScope(timeScope);
404      return this;
405    }
406
407    public Builder setBatchLimit(int batchLimit) {
408      limits.setBatch(batchLimit);
409      return this;
410    }
411
412    public ScannerContext build() {
413      return new ScannerContext(keepProgress, limits, trackMetrics);
414    }
415  }
416
417  /**
418   * The possible states a scanner may be in following a call to {@link InternalScanner#next(List)}
419   */
420  public enum NextState {
421    MORE_VALUES(true, false),
422    NO_MORE_VALUES(false, false),
423    SIZE_LIMIT_REACHED(true, true),
424
425    /**
426     * Special case of size limit reached to indicate that the size limit was reached in the middle
427     * of a row and thus a partial results was formed
428     */
429    SIZE_LIMIT_REACHED_MID_ROW(true, true),
430    TIME_LIMIT_REACHED(true, true),
431
432    /**
433     * Special case of time limit reached to indicate that the time limit was reached in the middle
434     * of a row and thus a partial results was formed
435     */
436    TIME_LIMIT_REACHED_MID_ROW(true, true),
437    BATCH_LIMIT_REACHED(true, true);
438
439    private final boolean moreValues;
440    private final boolean limitReached;
441
442    private NextState(boolean moreValues, boolean limitReached) {
443      this.moreValues = moreValues;
444      this.limitReached = limitReached;
445    }
446
447    /**
448     * @return true when the state indicates that more values may follow those that have been
449     *         returned
450     */
451    public boolean hasMoreValues() {
452      return this.moreValues;
453    }
454
455    /** Returns true when the state indicates that a limit has been reached and scan should stop */
456    public boolean limitReached() {
457      return this.limitReached;
458    }
459
460    public static boolean isValidState(NextState state) {
461      return state != null;
462    }
463
464    public static boolean hasMoreValues(NextState state) {
465      return isValidState(state) && state.hasMoreValues();
466    }
467  }
468
469  /**
470   * The various scopes where a limit can be enforced. Used to differentiate when a limit should be
471   * enforced or not.
472   */
473  public enum LimitScope {
474    /**
475     * Enforcing a limit between rows means that the limit will not be considered until all the
476     * cells for a particular row have been retrieved
477     */
478    BETWEEN_ROWS(0),
479
480    /**
481     * Enforcing a limit between cells means that the limit will be considered after each full cell
482     * has been retrieved
483     */
484    BETWEEN_CELLS(1);
485
486    /**
487     * When enforcing a limit, we must check that the scope is appropriate for enforcement.
488     * <p>
489     * To communicate this concept, each scope has a depth. A limit will be enforced if the depth of
490     * the checker's scope is less than or equal to the limit's scope. This means that when checking
491     * limits, the checker must know their own scope (i.e. are they checking the limits between
492     * rows, between cells, etc...)
493     */
494    final int depth;
495
496    LimitScope(int depth) {
497      this.depth = depth;
498    }
499
500    final int depth() {
501      return depth;
502    }
503
504    /**
505     * @param checkerScope The scope in which the limit is being checked
506     * @return true when the checker is in a scope that indicates the limit can be enforced. Limits
507     *         can be enforced from "higher or equal" scopes (i.e. the checker's scope is at a
508     *         lesser depth than the limit)
509     */
510    boolean canEnforceLimitFromScope(LimitScope checkerScope) {
511      return checkerScope != null && checkerScope.depth() <= depth;
512    }
513  }
514
515  /**
516   * The different fields that can be used as limits in calls to
517   * {@link InternalScanner#next(java.util.List)} and {@link RegionScanner#next(java.util.List)}
518   */
519  private static class LimitFields {
520    /**
521     * Default values of the limit fields. Defined such that if a field does NOT change from its
522     * default, it will not be enforced
523     */
524    private static int DEFAULT_BATCH = -1;
525    private static long DEFAULT_SIZE = -1L;
526    private static long DEFAULT_TIME = -1L;
527
528    /**
529     * Default scope that is assigned to a limit if a scope is not specified.
530     */
531    private static final LimitScope DEFAULT_SCOPE = LimitScope.BETWEEN_ROWS;
532
533    // The batch limit will always be enforced between cells, thus, there isn't a field to hold the
534    // batch scope
535    int batch = DEFAULT_BATCH;
536
537    LimitScope sizeScope = DEFAULT_SCOPE;
538    // The sum of cell data sizes(key + value). The Cell data might be in on heap or off heap area.
539    long dataSize = DEFAULT_SIZE;
540    // The sum of heap space occupied by all tracked cells. This includes Cell POJO's overhead as
541    // such AND data cells of Cells which are in on heap area.
542    long heapSize = DEFAULT_SIZE;
543
544    LimitScope timeScope = DEFAULT_SCOPE;
545    long time = DEFAULT_TIME;
546
547    /**
548     * Fields keep their default values.
549     */
550    LimitFields() {
551    }
552
553    void copy(LimitFields limitsToCopy) {
554      if (limitsToCopy != null) {
555        setFields(limitsToCopy.getBatch(), limitsToCopy.getSizeScope(), limitsToCopy.getDataSize(),
556          limitsToCopy.getHeapSize(), limitsToCopy.getTimeScope(), limitsToCopy.getTime());
557      }
558    }
559
560    /**
561     * Set all fields together.
562     */
563    void setFields(int batch, LimitScope sizeScope, long dataSize, long heapSize,
564      LimitScope timeScope, long time) {
565      setBatch(batch);
566      setSizeScope(sizeScope);
567      setDataSize(dataSize);
568      setHeapSize(heapSize);
569      setTimeScope(timeScope);
570      setTime(time);
571    }
572
573    int getBatch() {
574      return this.batch;
575    }
576
577    void setBatch(int batch) {
578      this.batch = batch;
579    }
580
581    /**
582     * n * @return true when the limit can be enforced from the scope of the checker
583     */
584    boolean canEnforceBatchLimitFromScope(LimitScope checkerScope) {
585      return LimitScope.BETWEEN_CELLS.canEnforceLimitFromScope(checkerScope);
586    }
587
588    long getDataSize() {
589      return this.dataSize;
590    }
591
592    long getHeapSize() {
593      return this.heapSize;
594    }
595
596    void setDataSize(long dataSize) {
597      this.dataSize = dataSize;
598    }
599
600    void setHeapSize(long heapSize) {
601      this.heapSize = heapSize;
602    }
603
604    /** Returns {@link LimitScope} indicating scope in which the size limit is enforced */
605    LimitScope getSizeScope() {
606      return this.sizeScope;
607    }
608
609    /**
610     * Change the scope in which the size limit is enforced
611     */
612    void setSizeScope(LimitScope scope) {
613      this.sizeScope = scope;
614    }
615
616    /**
617     * n * @return true when the limit can be enforced from the scope of the checker
618     */
619    boolean canEnforceSizeLimitFromScope(LimitScope checkerScope) {
620      return this.sizeScope.canEnforceLimitFromScope(checkerScope);
621    }
622
623    long getTime() {
624      return this.time;
625    }
626
627    void setTime(long time) {
628      this.time = time;
629    }
630
631    /** Returns {@link LimitScope} indicating scope in which the time limit is enforced */
632    LimitScope getTimeScope() {
633      return this.timeScope;
634    }
635
636    /**
637     * Change the scope in which the time limit is enforced
638     */
639    void setTimeScope(LimitScope scope) {
640      this.timeScope = scope;
641    }
642
643    /**
644     * n * @return true when the limit can be enforced from the scope of the checker
645     */
646    boolean canEnforceTimeLimitFromScope(LimitScope checkerScope) {
647      return this.timeScope.canEnforceLimitFromScope(checkerScope);
648    }
649
650    @Override
651    public String toString() {
652      StringBuilder sb = new StringBuilder();
653      sb.append("{");
654
655      sb.append("batch:");
656      sb.append(batch);
657
658      sb.append(", dataSize:");
659      sb.append(dataSize);
660
661      sb.append(", heapSize:");
662      sb.append(heapSize);
663
664      sb.append(", sizeScope:");
665      sb.append(sizeScope);
666
667      sb.append(", time:");
668      sb.append(time);
669
670      sb.append(", timeScope:");
671      sb.append(timeScope);
672
673      sb.append("}");
674      return sb.toString();
675    }
676  }
677
678  private static class ProgressFields {
679
680    private static int DEFAULT_BATCH = -1;
681    private static long DEFAULT_SIZE = -1L;
682
683    // The batch limit will always be enforced between cells, thus, there isn't a field to hold the
684    // batch scope
685    int batch = DEFAULT_BATCH;
686
687    // The sum of cell data sizes(key + value). The Cell data might be in on heap or off heap area.
688    long dataSize = DEFAULT_SIZE;
689    // The sum of heap space occupied by all tracked cells. This includes Cell POJO's overhead as
690    // such AND data cells of Cells which are in on heap area.
691    long heapSize = DEFAULT_SIZE;
692
693    ProgressFields(int batch, long size, long heapSize) {
694      setFields(batch, size, heapSize);
695    }
696
697    /**
698     * Set all fields together.
699     */
700    void setFields(int batch, long dataSize, long heapSize) {
701      setBatch(batch);
702      setDataSize(dataSize);
703      setHeapSize(heapSize);
704    }
705
706    int getBatch() {
707      return this.batch;
708    }
709
710    void setBatch(int batch) {
711      this.batch = batch;
712    }
713
714    long getDataSize() {
715      return this.dataSize;
716    }
717
718    long getHeapSize() {
719      return this.heapSize;
720    }
721
722    void setDataSize(long dataSize) {
723      this.dataSize = dataSize;
724    }
725
726    void setHeapSize(long heapSize) {
727      this.heapSize = heapSize;
728    }
729
730    @Override
731    public String toString() {
732      StringBuilder sb = new StringBuilder();
733      sb.append("{");
734
735      sb.append("batch:");
736      sb.append(batch);
737
738      sb.append(", dataSize:");
739      sb.append(dataSize);
740
741      sb.append(", heapSize:");
742      sb.append(heapSize);
743
744      sb.append("}");
745      return sb.toString();
746    }
747  }
748}