View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import java.io.Closeable;
22  import java.io.IOException;
23  import java.util.List;
24  
25  import org.apache.hadoop.hbase.Cell;
26  import org.apache.hadoop.hbase.classification.InterfaceAudience;
27  
28  /**
29   * Internal scanners differ from client-side scanners in that they operate on
30   * HStoreKeys and byte[] instead of RowResults. This is because they are
31   * actually close to how the data is physically stored, and therefore it is more
32   * convenient to interact with them that way. It is also much easier to merge
33   * the results across SortedMaps than RowResults.
34   *
35   * <p>Additionally, we need to be able to determine if the scanner is doing
36   * wildcard column matches (when only a column family is specified or if a
37   * column regex is specified) or if multiple members of the same column family
38   * were specified. If so, we need to ignore the timestamp to ensure that we get
39   * all the family members, as they may have been last updated at different
40   * times.
41   */
42  @InterfaceAudience.Private
43  public interface InternalScanner extends Closeable {
44    /**
45     * This class encapsulates all the meaningful state information that we would like the know about
46     * after a call to {@link InternalScanner#next(List)}. While this is not an enum, a restriction on
47     * the possible states is implied through the exposed {@link #makeState(State)} method.
48     */
49    public static class NextState {
50      /**
51       * The possible states we want to restrict ourselves to. This enum is not sufficient to
52       * encapsulate all of the state information since some of the fields of the state must be
53       * dynamic (e.g. resultSize).
54       */
55      public enum State {
56        MORE_VALUES(true),
57        NO_MORE_VALUES(false),
58        SIZE_LIMIT_REACHED(true),
59        BATCH_LIMIT_REACHED(true);
60  
61        private boolean moreValues;
62  
63        private State(final boolean moreValues) {
64          this.moreValues = moreValues;
65        }
66  
67        /**
68         * @return true when the state indicates that more values may follow those that have been
69         *         returned
70         */
71        public boolean hasMoreValues() {
72          return this.moreValues;
73        }
74      }
75  
76      /**
77       * state variables
78       */
79      private final State state;
80      private long resultSize;
81  
82      /**
83       * Value to use for resultSize when the size has not been calculated. Must be a negative number
84       * so that {@link NextState#hasResultSizeEstimate()} returns false.
85       */
86      private static final long DEFAULT_RESULT_SIZE = -1;
87  
88      private NextState(State state, long resultSize) {
89        this.state = state;
90        this.resultSize = resultSize;
91      }
92  
93      /**
94       * @param state
95       * @return An instance of {@link NextState} where the size of the results returned from the call
96       *         to {@link InternalScanner#next(List)} is unknown. It it the responsibility of the
97       *         caller of {@link InternalScanner#next(List)} to calculate the result size if needed
98       */
99      public static NextState makeState(final State state) {
100       return makeState(state, DEFAULT_RESULT_SIZE);
101     }
102 
103     /**
104      * @param state
105      * @param resultSize
106      * @return An instance of {@link NextState} where the size of the values returned from the call
107      *         to {@link InternalScanner#next(List)} is known. The caller can avoid recalculating
108      *         the result size by using the cached value retrievable via {@link #getResultSize()}
109      */
110     public static NextState makeState(final State state, long resultSize) {
111       switch (state) {
112       case MORE_VALUES:
113         return createMoreValuesState(resultSize);
114       case NO_MORE_VALUES:
115         return createNoMoreValuesState(resultSize);
116       case BATCH_LIMIT_REACHED:
117         return createBatchLimitReachedState(resultSize);
118       case SIZE_LIMIT_REACHED:
119         return createSizeLimitReachedState(resultSize);
120       default:
121         // If the state is not recognized, default to no more value state
122         return createNoMoreValuesState(resultSize);
123       }
124     }
125 
126     /**
127      * Convenience method for creating a state that indicates that more values can be scanned
128      * @param resultSize estimate of the size (heap size) of the values returned from the call to
129      *          {@link InternalScanner#next(List)}
130      */
131     private static NextState createMoreValuesState(long resultSize) {
132       return new NextState(State.MORE_VALUES, resultSize);
133     }
134 
135     /**
136      * Convenience method for creating a state that indicates that no more values can be scanned.
137      * @param resultSize estimate of the size (heap size) of the values returned from the call to
138      *          {@link InternalScanner#next(List)}
139      */
140     private static NextState createNoMoreValuesState(long resultSize) {
141       return new NextState(State.NO_MORE_VALUES, resultSize);
142     }
143 
144     /**
145      * Convenience method for creating a state that indicates that the scan stopped because the
146      * batch limit was exceeded
147      * @param resultSize estimate of the size (heap size) of the values returned from the call to
148      *          {@link InternalScanner#next(List)}
149      */
150     private static NextState createBatchLimitReachedState(long resultSize) {
151       return new NextState(State.BATCH_LIMIT_REACHED, resultSize);
152     }
153 
154     /**
155      * Convenience method for creating a state that indicates that the scan stopped due to the size
156      * limit
157      * @param resultSize estimate of the size (heap size) of the values returned from the call to
158      *          {@link InternalScanner#next(List)}
159      */
160     private static NextState createSizeLimitReachedState(long resultSize) {
161       return new NextState(State.SIZE_LIMIT_REACHED, resultSize);
162     }
163 
164     /**
165      * @return true when the scanner has more values to be scanned following the values returned by
166      *         the call to {@link InternalScanner#next(List)}
167      */
168     public boolean hasMoreValues() {
169       return this.state.hasMoreValues();
170     }
171 
172     /**
173      * @return true when the scanner had to stop scanning because it reached the batch limit
174      */
175     public boolean batchLimitReached() {
176       return this.state == State.BATCH_LIMIT_REACHED;
177     }
178 
179     /**
180      * @return true when the scanner had to stop scanning because it reached the size limit
181      */
182     public boolean sizeLimitReached() {
183       return this.state == State.SIZE_LIMIT_REACHED;
184     }
185 
186     /**
187      * @return The size (heap size) of the values that were returned from the call to
188      *         {@link InternalScanner#next(List)}. This value should only be used if
189      *         {@link #hasResultSizeEstimate()} returns true.
190      */
191     public long getResultSize() {
192       return resultSize;
193     }
194 
195     /**
196      * @return true when an estimate for the size of the values returned by
197      *         {@link InternalScanner#next(List)} was provided. If false, it is the responsibility
198      *         of the caller to calculate the result size
199      */
200     public boolean hasResultSizeEstimate() {
201       return resultSize >= 0;
202     }
203 
204     /**
205      * Helper method to centralize all checks as to whether or not the state is valid.
206      * @param state
207      * @return true when the state is valid
208      */
209     public static boolean isValidState(NextState state) {
210       return state != null;
211     }
212 
213     /**
214      * @param state
215      * @return true when the state is non null and indicates that more values exist
216      */
217     public static boolean hasMoreValues(NextState state) {
218       return state != null && state.hasMoreValues();
219     }
220   }
221 
222   /**
223    * Grab the next row's worth of values.
224    * @param results return output array
225    * @return state where {@link NextState#hasMoreValues()} is true if more rows exist after this
226    *         one, false if scanner is done
227    * @throws IOException e
228    */
229   NextState next(List<Cell> results) throws IOException;
230 
231   /**
232    * Grab the next row's worth of values with a limit on the number of values to return.
233    * @param result return output array
234    * @param limit limit on row count to get
235    * @return state where {@link NextState#hasMoreValues()} is true if more rows exist after this
236    *         one, false if scanner is done
237    * @throws IOException e
238    */
239   NextState next(List<Cell> result, int limit) throws IOException;
240 
241   /**
242    * Grab the next row's worth of values with a limit on the number of values to return as well as a
243    * restriction on the size of the list of values that are returned.
244    * @param result return output array
245    * @param limit limit on row count to get
246    * @param remainingResultSize limit on the size of the result being returned
247    * @return state where {@link NextState#hasMoreValues()} is true if more rows exist after this
248    *         one, false if scanner is done
249    * @throws IOException e
250    */
251   NextState next(List<Cell> result, int limit, long remainingResultSize) throws IOException;
252 
253   /**
254    * Closes the scanner and releases any resources it has allocated
255    * @throws IOException
256    */
257   void close() throws IOException;
258 }