001/*
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019
020package org.apache.hadoop.hbase.client;
021
022import java.io.IOException;
023import java.util.ArrayList;
024import java.util.HashMap;
025import java.util.List;
026import java.util.Map;
027import java.util.NavigableSet;
028import java.util.TreeMap;
029import java.util.TreeSet;
030import org.apache.hadoop.hbase.HConstants;
031import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
032import org.apache.hadoop.hbase.filter.Filter;
033import org.apache.hadoop.hbase.filter.IncompatibleFilterException;
034import org.apache.hadoop.hbase.io.TimeRange;
035import org.apache.hadoop.hbase.security.access.Permission;
036import org.apache.hadoop.hbase.security.visibility.Authorizations;
037import org.apache.hadoop.hbase.util.Bytes;
038import org.apache.yetus.audience.InterfaceAudience;
039import org.slf4j.Logger;
040import org.slf4j.LoggerFactory;
041
042/**
043 * Used to perform Scan operations.
044 * <p>
045 * All operations are identical to {@link Get} with the exception of instantiation. Rather than
046 * specifying a single row, an optional startRow and stopRow may be defined. If rows are not
047 * specified, the Scanner will iterate over all rows.
048 * <p>
049 * To get all columns from all rows of a Table, create an instance with no constraints; use the
050 * {@link #Scan()} constructor. To constrain the scan to specific column families, call
051 * {@link #addFamily(byte[]) addFamily} for each family to retrieve on your Scan instance.
052 * <p>
053 * To get specific columns, call {@link #addColumn(byte[], byte[]) addColumn} for each column to
054 * retrieve.
055 * <p>
056 * To only retrieve columns within a specific range of version timestamps, call
057 * {@link #setTimeRange(long, long) setTimeRange}.
058 * <p>
059 * To only retrieve columns with a specific timestamp, call {@link #setTimestamp(long) setTimestamp}
060 * .
061 * <p>
062 * To limit the number of versions of each column to be returned, call {@link #setMaxVersions(int)
063 * setMaxVersions}.
064 * <p>
065 * To limit the maximum number of values returned for each call to next(), call
066 * {@link #setBatch(int) setBatch}.
067 * <p>
068 * To add a filter, call {@link #setFilter(org.apache.hadoop.hbase.filter.Filter) setFilter}.
069 * <p>
070 * For small scan, it is deprecated in 2.0.0. Now we have a {@link #setLimit(int)} method in Scan
071 * object which is used to tell RS how many rows we want. If the rows return reaches the limit, the
072 * RS will close the RegionScanner automatically. And we will also fetch data when openScanner in
073 * the new implementation, this means we can also finish a scan operation in one rpc call. And we
074 * have also introduced a {@link #setReadType(ReadType)} method. You can use this method to tell RS
075 * to use pread explicitly.
076 * <p>
077 * Expert: To explicitly disable server-side block caching for this scan, execute
078 * {@link #setCacheBlocks(boolean)}.
079 * <p>
080 * <em>Note:</em> Usage alters Scan instances. Internally, attributes are updated as the Scan runs
081 * and if enabled, metrics accumulate in the Scan instance. Be aware this is the case when you go to
082 * clone a Scan instance or if you go to reuse a created Scan instance; safer is create a Scan
083 * instance per usage.
084 */
085@InterfaceAudience.Public
086public class Scan extends Query {
087  private static final Logger LOG = LoggerFactory.getLogger(Scan.class);
088
089  private static final String RAW_ATTR = "_raw_";
090
091  private byte[] startRow = HConstants.EMPTY_START_ROW;
092  private boolean includeStartRow = true;
093  private byte[] stopRow  = HConstants.EMPTY_END_ROW;
094  private boolean includeStopRow = false;
095  private int maxVersions = 1;
096  private int batch = -1;
097
098  /**
099   * Partial {@link Result}s are {@link Result}s must be combined to form a complete {@link Result}.
100   * The {@link Result}s had to be returned in fragments (i.e. as partials) because the size of the
101   * cells in the row exceeded max result size on the server. Typically partial results will be
102   * combined client side into complete results before being delivered to the caller. However, if
103   * this flag is set, the caller is indicating that they do not mind seeing partial results (i.e.
104   * they understand that the results returned from the Scanner may only represent part of a
105   * particular row). In such a case, any attempt to combine the partials into a complete result on
106   * the client side will be skipped, and the caller will be able to see the exact results returned
107   * from the server.
108   */
109  private boolean allowPartialResults = false;
110
111  private int storeLimit = -1;
112  private int storeOffset = 0;
113
114  private static final String SCAN_ATTRIBUTES_METRICS_ENABLE = "scan.attributes.metrics.enable";
115
116  // If an application wants to use multiple scans over different tables each scan must
117  // define this attribute with the appropriate table name by calling
118  // scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(tableName))
119  static public final String SCAN_ATTRIBUTES_TABLE_NAME = "scan.attributes.table.name";
120
121  /**
122   * -1 means no caching specified and the value of {@link HConstants#HBASE_CLIENT_SCANNER_CACHING}
123   * (default to {@link HConstants#DEFAULT_HBASE_CLIENT_SCANNER_CACHING}) will be used
124   */
125  private int caching = -1;
126  private long maxResultSize = -1;
127  private boolean cacheBlocks = true;
128  private boolean reversed = false;
129  private TimeRange tr = TimeRange.allTime();
130  private Map<byte [], NavigableSet<byte []>> familyMap =
131    new TreeMap<byte [], NavigableSet<byte []>>(Bytes.BYTES_COMPARATOR);
132  private Boolean asyncPrefetch = null;
133
134  /**
135   * Parameter name for client scanner sync/async prefetch toggle.
136   * When using async scanner, prefetching data from the server is done at the background.
137   * The parameter currently won't have any effect in the case that the user has set
138   * Scan#setSmall or Scan#setReversed
139   */
140  public static final String HBASE_CLIENT_SCANNER_ASYNC_PREFETCH =
141      "hbase.client.scanner.async.prefetch";
142
143  /**
144   * Default value of {@link #HBASE_CLIENT_SCANNER_ASYNC_PREFETCH}.
145   */
146  public static final boolean DEFAULT_HBASE_CLIENT_SCANNER_ASYNC_PREFETCH = false;
147
148  /**
149   * Set it true for small scan to get better performance Small scan should use pread and big scan
150   * can use seek + read seek + read is fast but can cause two problem (1) resource contention (2)
151   * cause too much network io [89-fb] Using pread for non-compaction read request
152   * https://issues.apache.org/jira/browse/HBASE-7266 On the other hand, if setting it true, we
153   * would do openScanner,next,closeScanner in one RPC call. It means the better performance for
154   * small scan. [HBASE-9488]. Generally, if the scan range is within one data block(64KB), it could
155   * be considered as a small scan.
156   */
157  private boolean small = false;
158
159  /**
160   * The mvcc read point to use when open a scanner. Remember to clear it after switching regions as
161   * the mvcc is only valid within region scope.
162   */
163  private long mvccReadPoint = -1L;
164
165  /**
166   * The number of rows we want for this scan. We will terminate the scan if the number of return
167   * rows reaches this value.
168   */
169  private int limit = -1;
170
171  /**
172   * Control whether to use pread at server side.
173   */
174  private ReadType readType = ReadType.DEFAULT;
175
176  private boolean needCursorResult = false;
177
178  /**
179   * Create a Scan operation across all rows.
180   */
181  public Scan() {}
182
183  /**
184   * @deprecated since 2.0.0 and will be removed in 3.0.0. Use
185   *   {@code new Scan().withStartRow(startRow).setFilter(filter)} instead.
186   * @see <a href="https://issues.apache.org/jira/browse/HBASE-17320">HBASE-17320</a>
187   */
188  @Deprecated
189  public Scan(byte[] startRow, Filter filter) {
190    this(startRow);
191    this.filter = filter;
192  }
193
194  /**
195   * Create a Scan operation starting at the specified row.
196   * <p>
197   * If the specified row does not exist, the Scanner will start from the next closest row after the
198   * specified row.
199   * @param startRow row to start scanner at or after
200   * @deprecated since 2.0.0 and will be removed in 3.0.0. Use
201   *   {@code new Scan().withStartRow(startRow)} instead.
202   * @see <a href="https://issues.apache.org/jira/browse/HBASE-17320">HBASE-17320</a>
203   */
204  @Deprecated
205  public Scan(byte[] startRow) {
206    setStartRow(startRow);
207  }
208
209  /**
210   * Create a Scan operation for the range of rows specified.
211   * @param startRow row to start scanner at or after (inclusive)
212   * @param stopRow row to stop scanner before (exclusive)
213   * @deprecated since 2.0.0 and will be removed in 3.0.0. Use
214   *   {@code new Scan().withStartRow(startRow).withStopRow(stopRow)} instead.
215   * @see <a href="https://issues.apache.org/jira/browse/HBASE-17320">HBASE-17320</a>
216   */
217  @Deprecated
218  public Scan(byte[] startRow, byte[] stopRow) {
219    setStartRow(startRow);
220    setStopRow(stopRow);
221  }
222
223  /**
224   * Creates a new instance of this class while copying all values.
225   *
226   * @param scan  The scan instance to copy from.
227   * @throws IOException When copying the values fails.
228   */
229  public Scan(Scan scan) throws IOException {
230    startRow = scan.getStartRow();
231    includeStartRow = scan.includeStartRow();
232    stopRow  = scan.getStopRow();
233    includeStopRow = scan.includeStopRow();
234    maxVersions = scan.getMaxVersions();
235    batch = scan.getBatch();
236    storeLimit = scan.getMaxResultsPerColumnFamily();
237    storeOffset = scan.getRowOffsetPerColumnFamily();
238    caching = scan.getCaching();
239    maxResultSize = scan.getMaxResultSize();
240    cacheBlocks = scan.getCacheBlocks();
241    filter = scan.getFilter(); // clone?
242    loadColumnFamiliesOnDemand = scan.getLoadColumnFamiliesOnDemandValue();
243    consistency = scan.getConsistency();
244    this.setIsolationLevel(scan.getIsolationLevel());
245    reversed = scan.isReversed();
246    asyncPrefetch = scan.isAsyncPrefetch();
247    small = scan.isSmall();
248    allowPartialResults = scan.getAllowPartialResults();
249    tr = scan.getTimeRange(); // TimeRange is immutable
250    Map<byte[], NavigableSet<byte[]>> fams = scan.getFamilyMap();
251    for (Map.Entry<byte[],NavigableSet<byte[]>> entry : fams.entrySet()) {
252      byte [] fam = entry.getKey();
253      NavigableSet<byte[]> cols = entry.getValue();
254      if (cols != null && cols.size() > 0) {
255        for (byte[] col : cols) {
256          addColumn(fam, col);
257        }
258      } else {
259        addFamily(fam);
260      }
261    }
262    for (Map.Entry<String, byte[]> attr : scan.getAttributesMap().entrySet()) {
263      setAttribute(attr.getKey(), attr.getValue());
264    }
265    for (Map.Entry<byte[], TimeRange> entry : scan.getColumnFamilyTimeRange().entrySet()) {
266      TimeRange tr = entry.getValue();
267      setColumnFamilyTimeRange(entry.getKey(), tr.getMin(), tr.getMax());
268    }
269    this.mvccReadPoint = scan.getMvccReadPoint();
270    this.limit = scan.getLimit();
271    this.needCursorResult = scan.isNeedCursorResult();
272    setPriority(scan.getPriority());
273    readType = scan.getReadType();
274    super.setReplicaId(scan.getReplicaId());
275  }
276
277  /**
278   * Builds a scan object with the same specs as get.
279   * @param get get to model scan after
280   */
281  public Scan(Get get) {
282    this.startRow = get.getRow();
283    this.includeStartRow = true;
284    this.stopRow = get.getRow();
285    this.includeStopRow = true;
286    this.filter = get.getFilter();
287    this.cacheBlocks = get.getCacheBlocks();
288    this.maxVersions = get.getMaxVersions();
289    this.storeLimit = get.getMaxResultsPerColumnFamily();
290    this.storeOffset = get.getRowOffsetPerColumnFamily();
291    this.tr = get.getTimeRange();
292    this.familyMap = get.getFamilyMap();
293    this.asyncPrefetch = false;
294    this.consistency = get.getConsistency();
295    this.setIsolationLevel(get.getIsolationLevel());
296    this.loadColumnFamiliesOnDemand = get.getLoadColumnFamiliesOnDemandValue();
297    for (Map.Entry<String, byte[]> attr : get.getAttributesMap().entrySet()) {
298      setAttribute(attr.getKey(), attr.getValue());
299    }
300    for (Map.Entry<byte[], TimeRange> entry : get.getColumnFamilyTimeRange().entrySet()) {
301      TimeRange tr = entry.getValue();
302      setColumnFamilyTimeRange(entry.getKey(), tr.getMin(), tr.getMax());
303    }
304    this.mvccReadPoint = -1L;
305    setPriority(get.getPriority());
306    super.setReplicaId(get.getReplicaId());
307  }
308
309  public boolean isGetScan() {
310    return includeStartRow && includeStopRow
311        && ClientUtil.areScanStartRowAndStopRowEqual(this.startRow, this.stopRow);
312  }
313
314  /**
315   * Get all columns from the specified family.
316   * <p>
317   * Overrides previous calls to addColumn for this family.
318   * @param family family name
319   * @return this
320   */
321  public Scan addFamily(byte [] family) {
322    familyMap.remove(family);
323    familyMap.put(family, null);
324    return this;
325  }
326
327  /**
328   * Get the column from the specified family with the specified qualifier.
329   * <p>
330   * Overrides previous calls to addFamily for this family.
331   * @param family family name
332   * @param qualifier column qualifier
333   * @return this
334   */
335  public Scan addColumn(byte [] family, byte [] qualifier) {
336    NavigableSet<byte []> set = familyMap.get(family);
337    if(set == null) {
338      set = new TreeSet<>(Bytes.BYTES_COMPARATOR);
339      familyMap.put(family, set);
340    }
341    if (qualifier == null) {
342      qualifier = HConstants.EMPTY_BYTE_ARRAY;
343    }
344    set.add(qualifier);
345    return this;
346  }
347
348  /**
349   * Get versions of columns only within the specified timestamp range,
350   * [minStamp, maxStamp).  Note, default maximum versions to return is 1.  If
351   * your time range spans more than one version and you want all versions
352   * returned, up the number of versions beyond the default.
353   * @param minStamp minimum timestamp value, inclusive
354   * @param maxStamp maximum timestamp value, exclusive
355   * @see #setMaxVersions()
356   * @see #setMaxVersions(int)
357   * @return this
358   */
359  public Scan setTimeRange(long minStamp, long maxStamp) throws IOException {
360    tr = new TimeRange(minStamp, maxStamp);
361    return this;
362  }
363
364  /**
365   * Get versions of columns with the specified timestamp. Note, default maximum
366   * versions to return is 1.  If your time range spans more than one version
367   * and you want all versions returned, up the number of versions beyond the
368   * defaut.
369   * @param timestamp version timestamp
370   * @see #setMaxVersions()
371   * @see #setMaxVersions(int)
372   * @return this
373   * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0.
374   *             Use {@link #setTimestamp(long)} instead
375   */
376  @Deprecated
377  public Scan setTimeStamp(long timestamp)
378  throws IOException {
379    return this.setTimestamp(timestamp);
380  }
381
382  /**
383   * Get versions of columns with the specified timestamp. Note, default maximum
384   * versions to return is 1.  If your time range spans more than one version
385   * and you want all versions returned, up the number of versions beyond the
386   * defaut.
387   * @param timestamp version timestamp
388   * @see #setMaxVersions()
389   * @see #setMaxVersions(int)
390   * @return this
391   */
392  public Scan setTimestamp(long timestamp) {
393    try {
394      tr = new TimeRange(timestamp, timestamp + 1);
395    } catch(Exception e) {
396      // This should never happen, unless integer overflow or something extremely wrong...
397      LOG.error("TimeRange failed, likely caused by integer overflow. ", e);
398      throw e;
399    }
400
401    return this;
402  }
403
404  @Override public Scan setColumnFamilyTimeRange(byte[] cf, long minStamp, long maxStamp) {
405    return (Scan) super.setColumnFamilyTimeRange(cf, minStamp, maxStamp);
406  }
407
408  /**
409   * Set the start row of the scan.
410   * <p>
411   * If the specified row does not exist, the Scanner will start from the next closest row after the
412   * specified row.
413   * @param startRow row to start scanner at or after
414   * @return this
415   * @throws IllegalArgumentException if startRow does not meet criteria for a row key (when length
416   *           exceeds {@link HConstants#MAX_ROW_LENGTH})
417   * @deprecated since 2.0.0 and will be removed in 3.0.0. Use {@link #withStartRow(byte[])}
418   *   instead. This method may change the inclusive of the stop row to keep compatible with the old
419   *   behavior.
420   * @see #withStartRow(byte[])
421   * @see <a href="https://issues.apache.org/jira/browse/HBASE-17320">HBASE-17320</a>
422   */
423  @Deprecated
424  public Scan setStartRow(byte[] startRow) {
425    withStartRow(startRow);
426    if (ClientUtil.areScanStartRowAndStopRowEqual(this.startRow, this.stopRow)) {
427      // for keeping the old behavior that a scan with the same start and stop row is a get scan.
428      this.includeStopRow = true;
429    }
430    return this;
431  }
432
433  /**
434   * Set the start row of the scan.
435   * <p>
436   * If the specified row does not exist, the Scanner will start from the next closest row after the
437   * specified row.
438   * @param startRow row to start scanner at or after
439   * @return this
440   * @throws IllegalArgumentException if startRow does not meet criteria for a row key (when length
441   *           exceeds {@link HConstants#MAX_ROW_LENGTH})
442   */
443  public Scan withStartRow(byte[] startRow) {
444    return withStartRow(startRow, true);
445  }
446
447  /**
448   * Set the start row of the scan.
449   * <p>
450   * If the specified row does not exist, or the {@code inclusive} is {@code false}, the Scanner
451   * will start from the next closest row after the specified row.
452   * @param startRow row to start scanner at or after
453   * @param inclusive whether we should include the start row when scan
454   * @return this
455   * @throws IllegalArgumentException if startRow does not meet criteria for a row key (when length
456   *           exceeds {@link HConstants#MAX_ROW_LENGTH})
457   */
458  public Scan withStartRow(byte[] startRow, boolean inclusive) {
459    if (Bytes.len(startRow) > HConstants.MAX_ROW_LENGTH) {
460      throw new IllegalArgumentException("startRow's length must be less than or equal to "
461          + HConstants.MAX_ROW_LENGTH + " to meet the criteria" + " for a row key.");
462    }
463    this.startRow = startRow;
464    this.includeStartRow = inclusive;
465    return this;
466  }
467
468  /**
469   * Set the stop row of the scan.
470   * <p>
471   * The scan will include rows that are lexicographically less than the provided stopRow.
472   * <p>
473   * <b>Note:</b> When doing a filter for a rowKey <u>Prefix</u> use
474   * {@link #setRowPrefixFilter(byte[])}. The 'trailing 0' will not yield the desired result.
475   * </p>
476   * @param stopRow row to end at (exclusive)
477   * @return this
478   * @throws IllegalArgumentException if stopRow does not meet criteria for a row key (when length
479   *           exceeds {@link HConstants#MAX_ROW_LENGTH})
480   * @deprecated since 2.0.0 and will be removed in 3.0.0. Use {@link #withStopRow(byte[])} instead.
481   *   This method may change the inclusive of the stop row to keep compatible with the old
482   *   behavior.
483   * @see #withStopRow(byte[])
484   * @see <a href="https://issues.apache.org/jira/browse/HBASE-17320">HBASE-17320</a>
485   */
486  @Deprecated
487  public Scan setStopRow(byte[] stopRow) {
488    withStopRow(stopRow);
489    if (ClientUtil.areScanStartRowAndStopRowEqual(this.startRow, this.stopRow)) {
490      // for keeping the old behavior that a scan with the same start and stop row is a get scan.
491      this.includeStopRow = true;
492    }
493    return this;
494  }
495
496  /**
497   * Set the stop row of the scan.
498   * <p>
499   * The scan will include rows that are lexicographically less than the provided stopRow.
500   * <p>
501   * <b>Note:</b> When doing a filter for a rowKey <u>Prefix</u> use
502   * {@link #setRowPrefixFilter(byte[])}. The 'trailing 0' will not yield the desired result.
503   * </p>
504   * @param stopRow row to end at (exclusive)
505   * @return this
506   * @throws IllegalArgumentException if stopRow does not meet criteria for a row key (when length
507   *           exceeds {@link HConstants#MAX_ROW_LENGTH})
508   */
509  public Scan withStopRow(byte[] stopRow) {
510    return withStopRow(stopRow, false);
511  }
512
513  /**
514   * Set the stop row of the scan.
515   * <p>
516   * The scan will include rows that are lexicographically less than (or equal to if
517   * {@code inclusive} is {@code true}) the provided stopRow.
518   * @param stopRow row to end at
519   * @param inclusive whether we should include the stop row when scan
520   * @return this
521   * @throws IllegalArgumentException if stopRow does not meet criteria for a row key (when length
522   *           exceeds {@link HConstants#MAX_ROW_LENGTH})
523   */
524  public Scan withStopRow(byte[] stopRow, boolean inclusive) {
525    if (Bytes.len(stopRow) > HConstants.MAX_ROW_LENGTH) {
526      throw new IllegalArgumentException("stopRow's length must be less than or equal to "
527          + HConstants.MAX_ROW_LENGTH + " to meet the criteria" + " for a row key.");
528    }
529    this.stopRow = stopRow;
530    this.includeStopRow = inclusive;
531    return this;
532  }
533
534  /**
535   * <p>Set a filter (using stopRow and startRow) so the result set only contains rows where the
536   * rowKey starts with the specified prefix.</p>
537   * <p>This is a utility method that converts the desired rowPrefix into the appropriate values
538   * for the startRow and stopRow to achieve the desired result.</p>
539   * <p>This can safely be used in combination with setFilter.</p>
540   * <p><b>NOTE: Doing a {@link #setStartRow(byte[])} and/or {@link #setStopRow(byte[])}
541   * after this method will yield undefined results.</b></p>
542   * @param rowPrefix the prefix all rows must start with. (Set <i>null</i> to remove the filter.)
543   * @return this
544   */
545  public Scan setRowPrefixFilter(byte[] rowPrefix) {
546    if (rowPrefix == null) {
547      setStartRow(HConstants.EMPTY_START_ROW);
548      setStopRow(HConstants.EMPTY_END_ROW);
549    } else {
550      this.setStartRow(rowPrefix);
551      this.setStopRow(ClientUtil.calculateTheClosestNextRowKeyForPrefix(rowPrefix));
552    }
553    return this;
554  }
555
556  /**
557   * Get all available versions.
558   * @return this
559   * @deprecated since 2.0.0 and will be removed in 3.0.0. It is easy to misunderstand with column
560   *   family's max versions, so use {@link #readAllVersions()} instead.
561   * @see #readAllVersions()
562   * @see <a href="https://issues.apache.org/jira/browse/HBASE-17125">HBASE-17125</a>
563   */
564  @Deprecated
565  public Scan setMaxVersions() {
566    return readAllVersions();
567  }
568
569  /**
570   * Get up to the specified number of versions of each column.
571   * @param maxVersions maximum versions for each column
572   * @return this
573   * @deprecated since 2.0.0 and will be removed in 3.0.0. It is easy to misunderstand with column
574   *   family's max versions, so use {@link #readVersions(int)} instead.
575   * @see #readVersions(int)
576   * @see <a href="https://issues.apache.org/jira/browse/HBASE-17125">HBASE-17125</a>
577   */
578  @Deprecated
579  public Scan setMaxVersions(int maxVersions) {
580    return readVersions(maxVersions);
581  }
582
583  /**
584   * Get all available versions.
585   * @return this
586   */
587  public Scan readAllVersions() {
588    this.maxVersions = Integer.MAX_VALUE;
589    return this;
590  }
591
592  /**
593   * Get up to the specified number of versions of each column.
594   * @param versions specified number of versions for each column
595   * @return this
596   */
597  public Scan readVersions(int versions) {
598    this.maxVersions = versions;
599    return this;
600  }
601
602  /**
603   * Set the maximum number of cells to return for each call to next(). Callers should be aware
604   * that this is not equivalent to calling {@link #setAllowPartialResults(boolean)}.
605   * If you don't allow partial results, the number of cells in each Result must equal to your
606   * batch setting unless it is the last Result for current row. So this method is helpful in paging
607   * queries. If you just want to prevent OOM at client, use setAllowPartialResults(true) is better.
608   * @param batch the maximum number of values
609   * @see Result#mayHaveMoreCellsInRow()
610   */
611  public Scan setBatch(int batch) {
612    if (this.hasFilter() && this.filter.hasFilterRow()) {
613      throw new IncompatibleFilterException(
614        "Cannot set batch on a scan using a filter" +
615        " that returns true for filter.hasFilterRow");
616    }
617    this.batch = batch;
618    return this;
619  }
620
621  /**
622   * Set the maximum number of values to return per row per Column Family
623   * @param limit the maximum number of values returned / row / CF
624   */
625  public Scan setMaxResultsPerColumnFamily(int limit) {
626    this.storeLimit = limit;
627    return this;
628  }
629
630  /**
631   * Set offset for the row per Column Family.
632   * @param offset is the number of kvs that will be skipped.
633   */
634  public Scan setRowOffsetPerColumnFamily(int offset) {
635    this.storeOffset = offset;
636    return this;
637  }
638
639  /**
640   * Set the number of rows for caching that will be passed to scanners.
641   * If not set, the Configuration setting {@link HConstants#HBASE_CLIENT_SCANNER_CACHING} will
642   * apply.
643   * Higher caching values will enable faster scanners but will use more memory.
644   * @param caching the number of rows for caching
645   */
646  public Scan setCaching(int caching) {
647    this.caching = caching;
648    return this;
649  }
650
651  /**
652   * @return the maximum result size in bytes. See {@link #setMaxResultSize(long)}
653   */
654  public long getMaxResultSize() {
655    return maxResultSize;
656  }
657
658  /**
659   * Set the maximum result size. The default is -1; this means that no specific
660   * maximum result size will be set for this scan, and the global configured
661   * value will be used instead. (Defaults to unlimited).
662   *
663   * @param maxResultSize The maximum result size in bytes.
664   */
665  public Scan setMaxResultSize(long maxResultSize) {
666    this.maxResultSize = maxResultSize;
667    return this;
668  }
669
670  @Override
671  public Scan setFilter(Filter filter) {
672    super.setFilter(filter);
673    return this;
674  }
675
676  /**
677   * Setting the familyMap
678   * @param familyMap map of family to qualifier
679   * @return this
680   */
681  public Scan setFamilyMap(Map<byte [], NavigableSet<byte []>> familyMap) {
682    this.familyMap = familyMap;
683    return this;
684  }
685
686  /**
687   * Getting the familyMap
688   * @return familyMap
689   */
690  public Map<byte [], NavigableSet<byte []>> getFamilyMap() {
691    return this.familyMap;
692  }
693
694  /**
695   * @return the number of families in familyMap
696   */
697  public int numFamilies() {
698    if(hasFamilies()) {
699      return this.familyMap.size();
700    }
701    return 0;
702  }
703
704  /**
705   * @return true if familyMap is non empty, false otherwise
706   */
707  public boolean hasFamilies() {
708    return !this.familyMap.isEmpty();
709  }
710
711  /**
712   * @return the keys of the familyMap
713   */
714  public byte[][] getFamilies() {
715    if(hasFamilies()) {
716      return this.familyMap.keySet().toArray(new byte[0][0]);
717    }
718    return null;
719  }
720
721  /**
722   * @return the startrow
723   */
724  public byte [] getStartRow() {
725    return this.startRow;
726  }
727
728  /**
729   * @return if we should include start row when scan
730   */
731  public boolean includeStartRow() {
732    return includeStartRow;
733  }
734
735  /**
736   * @return the stoprow
737   */
738  public byte[] getStopRow() {
739    return this.stopRow;
740  }
741
742  /**
743   * @return if we should include stop row when scan
744   */
745  public boolean includeStopRow() {
746    return includeStopRow;
747  }
748
749  /**
750   * @return the max number of versions to fetch
751   */
752  public int getMaxVersions() {
753    return this.maxVersions;
754  }
755
756  /**
757   * @return maximum number of values to return for a single call to next()
758   */
759  public int getBatch() {
760    return this.batch;
761  }
762
763  /**
764   * @return maximum number of values to return per row per CF
765   */
766  public int getMaxResultsPerColumnFamily() {
767    return this.storeLimit;
768  }
769
770  /**
771   * Method for retrieving the scan's offset per row per column
772   * family (#kvs to be skipped)
773   * @return row offset
774   */
775  public int getRowOffsetPerColumnFamily() {
776    return this.storeOffset;
777  }
778
779  /**
780   * @return caching the number of rows fetched when calling next on a scanner
781   */
782  public int getCaching() {
783    return this.caching;
784  }
785
786  /**
787   * @return TimeRange
788   */
789  public TimeRange getTimeRange() {
790    return this.tr;
791  }
792
793  /**
794   * @return RowFilter
795   */
796  @Override
797  public Filter getFilter() {
798    return filter;
799  }
800
801  /**
802   * @return true is a filter has been specified, false if not
803   */
804  public boolean hasFilter() {
805    return filter != null;
806  }
807
808  /**
809   * Set whether blocks should be cached for this Scan.
810   * <p>
811   * This is true by default.  When true, default settings of the table and
812   * family are used (this will never override caching blocks if the block
813   * cache is disabled for that family or entirely).
814   *
815   * @param cacheBlocks if false, default settings are overridden and blocks
816   * will not be cached
817   */
818  public Scan setCacheBlocks(boolean cacheBlocks) {
819    this.cacheBlocks = cacheBlocks;
820    return this;
821  }
822
823  /**
824   * Get whether blocks should be cached for this Scan.
825   * @return true if default caching should be used, false if blocks should not
826   * be cached
827   */
828  public boolean getCacheBlocks() {
829    return cacheBlocks;
830  }
831
832  /**
833   * Set whether this scan is a reversed one
834   * <p>
835   * This is false by default which means forward(normal) scan.
836   *
837   * @param reversed if true, scan will be backward order
838   * @return this
839   */
840  public Scan setReversed(boolean reversed) {
841    this.reversed = reversed;
842    return this;
843  }
844
845  /**
846   * Get whether this scan is a reversed one.
847   * @return true if backward scan, false if forward(default) scan
848   */
849  public boolean isReversed() {
850    return reversed;
851  }
852
853  /**
854   * Setting whether the caller wants to see the partial results when server returns
855   * less-than-expected cells. It is helpful while scanning a huge row to prevent OOM at client.
856   * By default this value is false and the complete results will be assembled client side
857   * before being delivered to the caller.
858   * @param allowPartialResults
859   * @return this
860   * @see Result#mayHaveMoreCellsInRow()
861   * @see #setBatch(int)
862   */
863  public Scan setAllowPartialResults(final boolean allowPartialResults) {
864    this.allowPartialResults = allowPartialResults;
865    return this;
866  }
867
868  /**
869   * @return true when the constructor of this scan understands that the results they will see may
870   *         only represent a partial portion of a row. The entire row would be retrieved by
871   *         subsequent calls to {@link ResultScanner#next()}
872   */
873  public boolean getAllowPartialResults() {
874    return allowPartialResults;
875  }
876
877  @Override
878  public Scan setLoadColumnFamiliesOnDemand(boolean value) {
879    return (Scan) super.setLoadColumnFamiliesOnDemand(value);
880  }
881
882  /**
883   * Compile the table and column family (i.e. schema) information
884   * into a String. Useful for parsing and aggregation by debugging,
885   * logging, and administration tools.
886   * @return Map
887   */
888  @Override
889  public Map<String, Object> getFingerprint() {
890    Map<String, Object> map = new HashMap<>();
891    List<String> families = new ArrayList<>();
892    if(this.familyMap.isEmpty()) {
893      map.put("families", "ALL");
894      return map;
895    } else {
896      map.put("families", families);
897    }
898    for (Map.Entry<byte [], NavigableSet<byte[]>> entry :
899        this.familyMap.entrySet()) {
900      families.add(Bytes.toStringBinary(entry.getKey()));
901    }
902    return map;
903  }
904
905  /**
906   * Compile the details beyond the scope of getFingerprint (row, columns,
907   * timestamps, etc.) into a Map along with the fingerprinted information.
908   * Useful for debugging, logging, and administration tools.
909   * @param maxCols a limit on the number of columns output prior to truncation
910   * @return Map
911   */
912  @Override
913  public Map<String, Object> toMap(int maxCols) {
914    // start with the fingerpring map and build on top of it
915    Map<String, Object> map = getFingerprint();
916    // map from families to column list replaces fingerprint's list of families
917    Map<String, List<String>> familyColumns = new HashMap<>();
918    map.put("families", familyColumns);
919    // add scalar information first
920    map.put("startRow", Bytes.toStringBinary(this.startRow));
921    map.put("stopRow", Bytes.toStringBinary(this.stopRow));
922    map.put("maxVersions", this.maxVersions);
923    map.put("batch", this.batch);
924    map.put("caching", this.caching);
925    map.put("maxResultSize", this.maxResultSize);
926    map.put("cacheBlocks", this.cacheBlocks);
927    map.put("loadColumnFamiliesOnDemand", this.loadColumnFamiliesOnDemand);
928    List<Long> timeRange = new ArrayList<>(2);
929    timeRange.add(this.tr.getMin());
930    timeRange.add(this.tr.getMax());
931    map.put("timeRange", timeRange);
932    int colCount = 0;
933    // iterate through affected families and list out up to maxCols columns
934    for (Map.Entry<byte [], NavigableSet<byte[]>> entry :
935      this.familyMap.entrySet()) {
936      List<String> columns = new ArrayList<>();
937      familyColumns.put(Bytes.toStringBinary(entry.getKey()), columns);
938      if(entry.getValue() == null) {
939        colCount++;
940        --maxCols;
941        columns.add("ALL");
942      } else {
943        colCount += entry.getValue().size();
944        if (maxCols <= 0) {
945          continue;
946        }
947        for (byte [] column : entry.getValue()) {
948          if (--maxCols <= 0) {
949            continue;
950          }
951          columns.add(Bytes.toStringBinary(column));
952        }
953      }
954    }
955    map.put("totalColumns", colCount);
956    if (this.filter != null) {
957      map.put("filter", this.filter.toString());
958    }
959    // add the id if set
960    if (getId() != null) {
961      map.put("id", getId());
962    }
963    return map;
964  }
965
966  /**
967   * Enable/disable "raw" mode for this scan.
968   * If "raw" is enabled the scan will return all
969   * delete marker and deleted rows that have not
970   * been collected, yet.
971   * This is mostly useful for Scan on column families
972   * that have KEEP_DELETED_ROWS enabled.
973   * It is an error to specify any column when "raw" is set.
974   * @param raw True/False to enable/disable "raw" mode.
975   */
976  public Scan setRaw(boolean raw) {
977    setAttribute(RAW_ATTR, Bytes.toBytes(raw));
978    return this;
979  }
980
981  /**
982   * @return True if this Scan is in "raw" mode.
983   */
984  public boolean isRaw() {
985    byte[] attr = getAttribute(RAW_ATTR);
986    return attr == null ? false : Bytes.toBoolean(attr);
987  }
988
989  /**
990   * Set whether this scan is a small scan
991   * <p>
992   * Small scan should use pread and big scan can use seek + read seek + read is fast but can cause
993   * two problem (1) resource contention (2) cause too much network io [89-fb] Using pread for
994   * non-compaction read request https://issues.apache.org/jira/browse/HBASE-7266 On the other hand,
995   * if setting it true, we would do openScanner,next,closeScanner in one RPC call. It means the
996   * better performance for small scan. [HBASE-9488]. Generally, if the scan range is within one
997   * data block(64KB), it could be considered as a small scan.
998   * @param small
999   * @deprecated since 2.0.0 and will be removed in 3.0.0. Use {@link #setLimit(int)} and
1000   *   {@link #setReadType(ReadType)} instead. And for the one rpc optimization, now we will also
1001   *   fetch data when openScanner, and if the number of rows reaches the limit then we will close
1002   *   the scanner automatically which means we will fall back to one rpc.
1003   * @see #setLimit(int)
1004   * @see #setReadType(ReadType)
1005   * @see <a href="https://issues.apache.org/jira/browse/HBASE-17045">HBASE-17045</a>
1006   */
1007  @Deprecated
1008  public Scan setSmall(boolean small) {
1009    this.small = small;
1010    this.readType = ReadType.PREAD;
1011    return this;
1012  }
1013
1014  /**
1015   * Get whether this scan is a small scan
1016   * @return true if small scan
1017   * @deprecated since 2.0.0 and will be removed in 3.0.0. See the comment of
1018   *   {@link #setSmall(boolean)}
1019   * @see <a href="https://issues.apache.org/jira/browse/HBASE-17045">HBASE-17045</a>
1020   */
1021  @Deprecated
1022  public boolean isSmall() {
1023    return small;
1024  }
1025
1026  @Override
1027  public Scan setAttribute(String name, byte[] value) {
1028    return (Scan) super.setAttribute(name, value);
1029  }
1030
1031  @Override
1032  public Scan setId(String id) {
1033    return (Scan) super.setId(id);
1034  }
1035
1036  @Override
1037  public Scan setAuthorizations(Authorizations authorizations) {
1038    return (Scan) super.setAuthorizations(authorizations);
1039  }
1040
1041  @Override
1042  public Scan setACL(Map<String, Permission> perms) {
1043    return (Scan) super.setACL(perms);
1044  }
1045
1046  @Override
1047  public Scan setACL(String user, Permission perms) {
1048    return (Scan) super.setACL(user, perms);
1049  }
1050
1051  @Override
1052  public Scan setConsistency(Consistency consistency) {
1053    return (Scan) super.setConsistency(consistency);
1054  }
1055
1056  @Override
1057  public Scan setReplicaId(int Id) {
1058    return (Scan) super.setReplicaId(Id);
1059  }
1060
1061  @Override
1062  public Scan setIsolationLevel(IsolationLevel level) {
1063    return (Scan) super.setIsolationLevel(level);
1064  }
1065
1066  @Override
1067  public Scan setPriority(int priority) {
1068    return (Scan) super.setPriority(priority);
1069  }
1070
1071  /**
1072   * Enable collection of {@link ScanMetrics}. For advanced users.
1073   * @param enabled Set to true to enable accumulating scan metrics
1074   */
1075  public Scan setScanMetricsEnabled(final boolean enabled) {
1076    setAttribute(Scan.SCAN_ATTRIBUTES_METRICS_ENABLE, Bytes.toBytes(Boolean.valueOf(enabled)));
1077    return this;
1078  }
1079
1080  /**
1081   * @return True if collection of scan metrics is enabled. For advanced users.
1082   */
1083  public boolean isScanMetricsEnabled() {
1084    byte[] attr = getAttribute(Scan.SCAN_ATTRIBUTES_METRICS_ENABLE);
1085    return attr == null ? false : Bytes.toBoolean(attr);
1086  }
1087
1088  public Boolean isAsyncPrefetch() {
1089    return asyncPrefetch;
1090  }
1091
1092  /**
1093   * @deprecated Since 3.0.0, will be removed in 4.0.0. After building sync client upon async
1094   *             client, the implementation is always 'async prefetch', so this flag is useless now.
1095   */
1096  @Deprecated
1097  public Scan setAsyncPrefetch(boolean asyncPrefetch) {
1098    this.asyncPrefetch = asyncPrefetch;
1099    return this;
1100  }
1101
1102  /**
1103   * @return the limit of rows for this scan
1104   */
1105  public int getLimit() {
1106    return limit;
1107  }
1108
1109  /**
1110   * Set the limit of rows for this scan. We will terminate the scan if the number of returned rows
1111   * reaches this value.
1112   * <p>
1113   * This condition will be tested at last, after all other conditions such as stopRow, filter, etc.
1114   * @param limit the limit of rows for this scan
1115   * @return this
1116   */
1117  public Scan setLimit(int limit) {
1118    this.limit = limit;
1119    return this;
1120  }
1121
1122  /**
1123   * Call this when you only want to get one row. It will set {@code limit} to {@code 1}, and also
1124   * set {@code readType} to {@link ReadType#PREAD}.
1125   * @return this
1126   */
1127  public Scan setOneRowLimit() {
1128    return setLimit(1).setReadType(ReadType.PREAD);
1129  }
1130
1131  @InterfaceAudience.Public
1132  public enum ReadType {
1133    DEFAULT, STREAM, PREAD
1134  }
1135
1136  /**
1137   * @return the read type for this scan
1138   */
1139  public ReadType getReadType() {
1140    return readType;
1141  }
1142
1143  /**
1144   * Set the read type for this scan.
1145   * <p>
1146   * Notice that we may choose to use pread even if you specific {@link ReadType#STREAM} here. For
1147   * example, we will always use pread if this is a get scan.
1148   * @return this
1149   */
1150  public Scan setReadType(ReadType readType) {
1151    this.readType = readType;
1152    return this;
1153  }
1154
1155  /**
1156   * Get the mvcc read point used to open a scanner.
1157   */
1158  long getMvccReadPoint() {
1159    return mvccReadPoint;
1160  }
1161
1162  /**
1163   * Set the mvcc read point used to open a scanner.
1164   */
1165  Scan setMvccReadPoint(long mvccReadPoint) {
1166    this.mvccReadPoint = mvccReadPoint;
1167    return this;
1168  }
1169
1170  /**
1171   * Set the mvcc read point to -1 which means do not use it.
1172   */
1173  Scan resetMvccReadPoint() {
1174    return setMvccReadPoint(-1L);
1175  }
1176
1177  /**
1178   * When the server is slow or we scan a table with many deleted data or we use a sparse filter,
1179   * the server will response heartbeat to prevent timeout. However the scanner will return a Result
1180   * only when client can do it. So if there are many heartbeats, the blocking time on
1181   * ResultScanner#next() may be very long, which is not friendly to online services.
1182   *
1183   * Set this to true then you can get a special Result whose #isCursor() returns true and is not
1184   * contains any real data. It only tells you where the server has scanned. You can call next
1185   * to continue scanning or open a new scanner with this row key as start row whenever you want.
1186   *
1187   * Users can get a cursor when and only when there is a response from the server but we can not
1188   * return a Result to users, for example, this response is a heartbeat or there are partial cells
1189   * but users do not allow partial result.
1190   *
1191   * Now the cursor is in row level which means the special Result will only contains a row key.
1192   * {@link Result#isCursor()}
1193   * {@link Result#getCursor()}
1194   * {@link Cursor}
1195   */
1196  public Scan setNeedCursorResult(boolean needCursorResult) {
1197    this.needCursorResult = needCursorResult;
1198    return this;
1199  }
1200
1201  public boolean isNeedCursorResult() {
1202    return needCursorResult;
1203  }
1204
1205  /**
1206   * Create a new Scan with a cursor. It only set the position information like start row key.
1207   * The others (like cfs, stop row, limit) should still be filled in by the user.
1208   * {@link Result#isCursor()}
1209   * {@link Result#getCursor()}
1210   * {@link Cursor}
1211   */
1212  public static Scan createScanFromCursor(Cursor cursor) {
1213    return new Scan().withStartRow(cursor.getRow());
1214  }
1215}