View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.filter;
21  
22  import java.io.IOException;
23  import java.util.List;
24  
25  import org.apache.hadoop.hbase.Cell;
26  import org.apache.hadoop.hbase.classification.InterfaceAudience;
27  import org.apache.hadoop.hbase.classification.InterfaceStability;
28  import org.apache.hadoop.hbase.exceptions.DeserializationException;
29  
30  /**
31   * Interface for row and column filters directly applied within the regionserver.
32   *
33   * A filter can expect the following call sequence:
34   * <ul>
35   *   <li> {@link #reset()} : reset the filter state before filtering a new row. </li>
36   *   <li> {@link #filterAllRemaining()}: true means row scan is over; false means keep going. </li>
37   *   <li> {@link #filterRowKey(Cell)}: true means drop this row; false means include.</li>
38   *   <li> {@link #filterKeyValue(Cell)}: decides whether to include or exclude this Cell.
39   *        See {@link ReturnCode}. </li>
40   *   <li> {@link #transformCell(Cell)}: if the Cell is included, let the filter transform the
41   *        Cell. </li>
42   *   <li> {@link #filterRowCells(List)}: allows direct modification of the final list to be submitted
43   *   <li> {@link #filterRow()}: last chance to drop entire row based on the sequence of
44   *        filter calls. Eg: filter a row if it doesn't contain a specified column. </li>
45   * </ul>
46   *
47   * Filter instances are created one per region/scan.  This abstract class replaces
48   * the old RowFilterInterface.
49   *
50   * When implementing your own filters, consider inheriting {@link FilterBase} to help
51   * you reduce boilerplate.
52   *
53   * @see FilterBase
54   */
55  @InterfaceAudience.Public
56  @InterfaceStability.Stable
57  public abstract class Filter {
58    protected transient boolean reversed;
59    /**
60     * Reset the state of the filter between rows.
61     * 
62     * Concrete implementers can signal a failure condition in their code by throwing an
63     * {@link IOException}.
64     * 
65     * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
66     */
67    abstract public void reset() throws IOException;
68  
69    /**
70     * Filters a row based on the row key. If this returns true, the entire row will be excluded. If
71     * false, each KeyValue in the row will be passed to {@link #filterKeyValue(Cell)} below.
72     * 
73     * Concrete implementers can signal a failure condition in their code by throwing an
74     * {@link IOException}.
75     * 
76     * @param buffer buffer containing row key
77     * @param offset offset into buffer where row key starts
78     * @param length length of the row key
79     * @return true, remove entire row, false, include the row (maybe).
80     * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
81     * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0.
82     *             Instead use {@link #filterRowKey(Cell)}
83     */
84    @Deprecated
85    abstract public boolean filterRowKey(byte[] buffer, int offset, int length) throws IOException;
86  
87    /**
88     * Filters a row based on the row key. If this returns true, the entire row will be excluded. If
89     * false, each KeyValue in the row will be passed to {@link #filterKeyValue(Cell)} below.
90     * If {@link #filterAllRemaining()} returns true, then {@link #filterRowKey(Cell)} should
91     * also return true.
92     *
93     * Concrete implementers can signal a failure condition in their code by throwing an
94     * {@link IOException}.
95     *
96     * @param firstRowCell The first cell coming in the new row
97     * @return true, remove entire row, false, include the row (maybe).
98     * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
99     */
100   abstract public boolean filterRowKey(Cell firstRowCell) throws IOException;
101
102   /**
103    * If this returns true, the scan will terminate.
104    * 
105    * Concrete implementers can signal a failure condition in their code by throwing an
106    * {@link IOException}.
107    * 
108    * @return true to end scan, false to continue.
109    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
110    */
111   abstract public boolean filterAllRemaining() throws IOException;
112
113   /**
114    * A way to filter based on the column family, column qualifier and/or the column value. Return
115    * code is described below. This allows filters to filter only certain number of columns, then
116    * terminate without matching ever column.
117    * 
118    * If filterRowKey returns true, filterKeyValue needs to be consistent with it.
119    * 
120    * filterKeyValue can assume that filterRowKey has already been called for the row.
121    * 
122    * If your filter returns <code>ReturnCode.NEXT_ROW</code>, it should return
123    * <code>ReturnCode.NEXT_ROW</code> until {@link #reset()} is called just in case the caller calls
124    * for the next row.
125    * 
126    * Concrete implementers can signal a failure condition in their code by throwing an
127    * {@link IOException}.
128    * 
129    * @param v the Cell in question
130    * @return code as described below
131    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
132    * @see Filter.ReturnCode
133    */
134   abstract public ReturnCode filterKeyValue(final Cell v) throws IOException;
135
136   /**
137    * Give the filter a chance to transform the passed KeyValue. If the Cell is changed a new
138    * Cell object must be returned.
139    * 
140    * @see org.apache.hadoop.hbase.KeyValue#shallowCopy()
141    *      The transformed KeyValue is what is eventually returned to the client. Most filters will
142    *      return the passed KeyValue unchanged.
143    * @see org.apache.hadoop.hbase.filter.KeyOnlyFilter#transformCell(Cell) for an example of a
144    *      transformation.
145    * 
146    *      Concrete implementers can signal a failure condition in their code by throwing an
147    *      {@link IOException}.
148    * 
149    * @param v the KeyValue in question
150    * @return the changed KeyValue
151    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
152    */
153   abstract public Cell transformCell(final Cell v) throws IOException;
154
155   /**
156    * Return codes for filterValue().
157    */
158   @InterfaceAudience.Public
159   @InterfaceStability.Stable
160   public enum ReturnCode {
161     /**
162      * Include the Cell
163      */
164     INCLUDE,
165     /**
166      * Include the Cell and seek to the next column skipping older versions.
167      */
168     INCLUDE_AND_NEXT_COL,
169     /**
170      * Skip this Cell
171      */
172     SKIP,
173     /**
174      * Skip this column. Go to the next column in this row.
175      */
176     NEXT_COL,
177     /**
178      * Done with columns, skip to next row. Note that filterRow() will
179      * still be called.
180      */
181     NEXT_ROW,
182     /**
183      * Seek to next key which is given as hint by the filter.
184      */
185     SEEK_NEXT_USING_HINT,
186     /**
187      * Include KeyValue and done with row, seek to next.
188      */
189     INCLUDE_AND_SEEK_NEXT_ROW,
190 }
191
192   /**
193    * Chance to alter the list of Cells to be submitted. Modifications to the list will carry on
194    * 
195    * Concrete implementers can signal a failure condition in their code by throwing an
196    * {@link IOException}.
197    * 
198    * @param kvs the list of Cells to be filtered
199    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
200    */
201   abstract public void filterRowCells(List<Cell> kvs) throws IOException;
202
203   /**
204    * Primarily used to check for conflicts with scans(such as scans that do not read a full row at a
205    * time).
206    * 
207    * @return True if this filter actively uses filterRowCells(List) or filterRow().
208    */
209   abstract public boolean hasFilterRow();
210
211   /**
212    * Last chance to veto row based on previous {@link #filterKeyValue(Cell)} calls. The filter
213    * needs to retain state then return a particular value for this call if they wish to exclude a
214    * row if a certain column is missing (for example).
215    * 
216    * Concrete implementers can signal a failure condition in their code by throwing an
217    * {@link IOException}.
218    * 
219    * @return true to exclude row, false to include row.
220    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
221    */
222   abstract public boolean filterRow() throws IOException;
223
224   /**
225    * If the filter returns the match code SEEK_NEXT_USING_HINT, then it should also tell which is
226    * the next key it must seek to. After receiving the match code SEEK_NEXT_USING_HINT, the
227    * QueryMatcher would call this function to find out which key it must next seek to.
228    * 
229    * Concrete implementers can signal a failure condition in their code by throwing an
230    * {@link IOException}.
231    * 
232    * @return KeyValue which must be next seeked. return null if the filter is not sure which key to
233    *         seek to next.
234    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
235    */
236   abstract public Cell getNextCellHint(final Cell currentCell) throws IOException;
237
238   /**
239    * Check that given column family is essential for filter to check row. Most filters always return
240    * true here. But some could have more sophisticated logic which could significantly reduce
241    * scanning process by not even touching columns until we are 100% sure that it's data is needed
242    * in result.
243    * 
244    * Concrete implementers can signal a failure condition in their code by throwing an
245    * {@link IOException}.
246    * 
247    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
248    */
249   abstract public boolean isFamilyEssential(byte[] name) throws IOException;
250
251   /**
252    * TODO: JAVADOC
253    * 
254    * Concrete implementers can signal a failure condition in their code by throwing an
255    * {@link IOException}.
256    * 
257    * @return The filter serialized using pb
258    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
259    */
260   abstract public byte[] toByteArray() throws IOException;
261
262   /**
263    * 
264    * Concrete implementers can signal a failure condition in their code by throwing an
265    * {@link IOException}.
266    * 
267    * @param pbBytes A pb serialized {@link Filter} instance
268    * @return An instance of {@link Filter} made from <code>bytes</code>
269    * @throws DeserializationException
270    * @see #toByteArray
271    */
272   public static Filter parseFrom(final byte [] pbBytes) throws DeserializationException {
273     throw new DeserializationException(
274       "parseFrom called on base Filter, but should be called on derived type");
275   }
276
277   /**
278    * Concrete implementers can signal a failure condition in their code by throwing an
279    * {@link IOException}.
280    * 
281    * @param other
282    * @return true if and only if the fields of the filter that are serialized are equal to the
283    *         corresponding fields in other. Used for testing.
284    * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
285    */
286   abstract boolean areSerializedFieldsEqual(Filter other);
287
288   /**
289    * alter the reversed scan flag
290    * @param reversed flag
291    */
292   public void setReversed(boolean reversed) {
293     this.reversed = reversed;
294   }
295
296   public boolean isReversed() {
297     return this.reversed;
298   }
299 }