001/*
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019
020package org.apache.hadoop.hbase.filter;
021
022import java.io.IOException;
023import java.util.List;
024
025import org.apache.hadoop.hbase.Cell;
026import org.apache.yetus.audience.InterfaceAudience;
027import org.apache.hadoop.hbase.exceptions.DeserializationException;
028
029/**
030 * Interface for row and column filters directly applied within the regionserver.
031 *
032 * A filter can expect the following call sequence:
033 * <ul>
034 *   <li> {@link #reset()} : reset the filter state before filtering a new row. </li>
035 *   <li> {@link #filterAllRemaining()}: true means row scan is over; false means keep going. </li>
036 *   <li> {@link #filterRowKey(Cell)}: true means drop this row; false means include.</li>
037 *   <li> {@link #filterCell(Cell)}: decides whether to include or exclude this Cell.
038 *        See {@link ReturnCode}. </li>
039 *   <li> {@link #transformCell(Cell)}: if the Cell is included, let the filter transform the
040 *        Cell. </li>
041 *   <li> {@link #filterRowCells(List)}: allows direct modification of the final list to be submitted
042 *   <li> {@link #filterRow()}: last chance to drop entire row based on the sequence of
043 *        filter calls. Eg: filter a row if it doesn't contain a specified column. </li>
044 * </ul>
045 *
046 * Filter instances are created one per region/scan.  This abstract class replaces
047 * the old RowFilterInterface.
048 *
049 * When implementing your own filters, consider inheriting {@link FilterBase} to help
050 * you reduce boilerplate.
051 *
052 * @see FilterBase
053 */
054@InterfaceAudience.Public
055public abstract class Filter {
056  protected transient boolean reversed;
057  /**
058   * Reset the state of the filter between rows.
059   * 
060   * Concrete implementers can signal a failure condition in their code by throwing an
061   * {@link IOException}.
062   * 
063   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
064   */
065  abstract public void reset() throws IOException;
066
067  /**
068   * Filters a row based on the row key. If this returns true, the entire row will be excluded. If
069   * false, each KeyValue in the row will be passed to {@link #filterCell(Cell)} below.
070   * 
071   * Concrete implementers can signal a failure condition in their code by throwing an
072   * {@link IOException}.
073   * 
074   * @param buffer buffer containing row key
075   * @param offset offset into buffer where row key starts
076   * @param length length of the row key
077   * @return true, remove entire row, false, include the row (maybe).
078   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
079   * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0.
080   *             Instead use {@link #filterRowKey(Cell)}
081   */
082  @Deprecated
083  abstract public boolean filterRowKey(byte[] buffer, int offset, int length) throws IOException;
084
085  /**
086   * Filters a row based on the row key. If this returns true, the entire row will be excluded. If
087   * false, each KeyValue in the row will be passed to {@link #filterCell(Cell)} below.
088   * If {@link #filterAllRemaining()} returns true, then {@link #filterRowKey(Cell)} should
089   * also return true.
090   *
091   * Concrete implementers can signal a failure condition in their code by throwing an
092   * {@link IOException}.
093   *
094   * @param firstRowCell The first cell coming in the new row
095   * @return true, remove entire row, false, include the row (maybe).
096   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
097   */
098  abstract public boolean filterRowKey(Cell firstRowCell) throws IOException;
099
100  /**
101   * If this returns true, the scan will terminate.
102   * 
103   * Concrete implementers can signal a failure condition in their code by throwing an
104   * {@link IOException}.
105   * 
106   * @return true to end scan, false to continue.
107   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
108   */
109  abstract public boolean filterAllRemaining() throws IOException;
110
111  /**
112   * A way to filter based on the column family, column qualifier and/or the column value. Return
113   * code is described below. This allows filters to filter only certain number of columns, then
114   * terminate without matching ever column.
115   * 
116   * If filterRowKey returns true, filterKeyValue needs to be consistent with it.
117   * 
118   * filterKeyValue can assume that filterRowKey has already been called for the row.
119   * 
120   * If your filter returns <code>ReturnCode.NEXT_ROW</code>, it should return
121   * <code>ReturnCode.NEXT_ROW</code> until {@link #reset()} is called just in case the caller calls
122   * for the next row.
123   *
124   * Concrete implementers can signal a failure condition in their code by throwing an
125   * {@link IOException}.
126   * 
127   * @param c the Cell in question
128   * @return code as described below, Filter.ReturnCode.INCLUDE by default
129   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
130   * @see Filter.ReturnCode
131   * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0.
132   *             Instead use filterCell(Cell)
133   */
134  @Deprecated
135  public ReturnCode filterKeyValue(final Cell c) throws IOException {
136    return Filter.ReturnCode.INCLUDE;
137  }
138
139  /**
140   * A way to filter based on the column family, column qualifier and/or the column value. Return
141   * code is described below. This allows filters to filter only certain number of columns, then
142   * terminate without matching ever column.
143   *
144   * If filterRowKey returns true, filterCell needs to be consistent with it.
145   *
146   * filterCell can assume that filterRowKey has already been called for the row.
147   *
148   * If your filter returns <code>ReturnCode.NEXT_ROW</code>, it should return
149   * <code>ReturnCode.NEXT_ROW</code> until {@link #reset()} is called just in case the caller calls
150   * for the next row.
151   *
152   * Concrete implementers can signal a failure condition in their code by throwing an
153   * {@link IOException}.
154   *
155   * @param c the Cell in question
156   * @return code as described below
157   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
158   * @see Filter.ReturnCode
159   */
160  public ReturnCode filterCell(final Cell c) throws IOException{
161    return filterKeyValue(c);
162  }
163
164  /**
165   * Give the filter a chance to transform the passed KeyValue. If the Cell is changed a new
166   * Cell object must be returned.
167   * 
168   * @see org.apache.hadoop.hbase.KeyValue#shallowCopy()
169   *      The transformed KeyValue is what is eventually returned to the client. Most filters will
170   *      return the passed KeyValue unchanged.
171   * @see org.apache.hadoop.hbase.filter.KeyOnlyFilter#transformCell(Cell) for an example of a
172   *      transformation.
173   * 
174   *      Concrete implementers can signal a failure condition in their code by throwing an
175   *      {@link IOException}.
176   * 
177   * @param v the KeyValue in question
178   * @return the changed KeyValue
179   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
180   */
181  abstract public Cell transformCell(final Cell v) throws IOException;
182
183  /**
184   * Return codes for filterValue().
185   */
186  @InterfaceAudience.Public
187  public enum ReturnCode {
188    /**
189     * Include the Cell
190     */
191    INCLUDE,
192    /**
193     * Include the Cell and seek to the next column skipping older versions.
194     */
195    INCLUDE_AND_NEXT_COL,
196    /**
197     * Skip this Cell
198     */
199    SKIP,
200    /**
201     * Skip this column. Go to the next column in this row.
202     */
203    NEXT_COL,
204    /**
205     * Seek to next row in current family. It may still pass a cell whose family is different but
206     * row is the same as previous cell to {@link #filterCell(Cell)} , even if we get a NEXT_ROW
207     * returned for previous cell. For more details see HBASE-18368. <br>
208     * Once reset() method was invoked, then we switch to the next row for all family, and you can
209     * catch the event by invoking CellUtils.matchingRows(previousCell, currentCell). <br>
210     * Note that filterRow() will still be called. <br>
211     */
212    NEXT_ROW,
213    /**
214     * Seek to next key which is given as hint by the filter.
215     */
216    SEEK_NEXT_USING_HINT,
217    /**
218     * Include KeyValue and done with row, seek to next. See NEXT_ROW.
219     */
220    INCLUDE_AND_SEEK_NEXT_ROW,
221}
222
223  /**
224   * Chance to alter the list of Cells to be submitted. Modifications to the list will carry on
225   * 
226   * Concrete implementers can signal a failure condition in their code by throwing an
227   * {@link IOException}.
228   * 
229   * @param kvs the list of Cells to be filtered
230   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
231   */
232  abstract public void filterRowCells(List<Cell> kvs) throws IOException;
233
234  /**
235   * Primarily used to check for conflicts with scans(such as scans that do not read a full row at a
236   * time).
237   * 
238   * @return True if this filter actively uses filterRowCells(List) or filterRow().
239   */
240  abstract public boolean hasFilterRow();
241
242  /**
243   * Last chance to veto row based on previous {@link #filterCell(Cell)} calls. The filter
244   * needs to retain state then return a particular value for this call if they wish to exclude a
245   * row if a certain column is missing (for example).
246   * 
247   * Concrete implementers can signal a failure condition in their code by throwing an
248   * {@link IOException}.
249   * 
250   * @return true to exclude row, false to include row.
251   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
252   */
253  abstract public boolean filterRow() throws IOException;
254
255  /**
256   * If the filter returns the match code SEEK_NEXT_USING_HINT, then it should also tell which is
257   * the next key it must seek to. After receiving the match code SEEK_NEXT_USING_HINT, the
258   * QueryMatcher would call this function to find out which key it must next seek to.
259   * 
260   * Concrete implementers can signal a failure condition in their code by throwing an
261   * {@link IOException}.
262   * 
263   * @return KeyValue which must be next seeked. return null if the filter is not sure which key to
264   *         seek to next.
265   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
266   */
267  abstract public Cell getNextCellHint(final Cell currentCell) throws IOException;
268
269  /**
270   * Check that given column family is essential for filter to check row. Most filters always return
271   * true here. But some could have more sophisticated logic which could significantly reduce
272   * scanning process by not even touching columns until we are 100% sure that it's data is needed
273   * in result.
274   * 
275   * Concrete implementers can signal a failure condition in their code by throwing an
276   * {@link IOException}.
277   * 
278   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
279   */
280  abstract public boolean isFamilyEssential(byte[] name) throws IOException;
281
282  /**
283   * TODO: JAVADOC
284   * 
285   * Concrete implementers can signal a failure condition in their code by throwing an
286   * {@link IOException}.
287   * 
288   * @return The filter serialized using pb
289   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
290   */
291  abstract public byte[] toByteArray() throws IOException;
292
293  /**
294   * 
295   * Concrete implementers can signal a failure condition in their code by throwing an
296   * {@link IOException}.
297   * 
298   * @param pbBytes A pb serialized {@link Filter} instance
299   * @return An instance of {@link Filter} made from <code>bytes</code>
300   * @throws DeserializationException
301   * @see #toByteArray
302   */
303  public static Filter parseFrom(final byte [] pbBytes) throws DeserializationException {
304    throw new DeserializationException(
305      "parseFrom called on base Filter, but should be called on derived type");
306  }
307
308  /**
309   * Concrete implementers can signal a failure condition in their code by throwing an
310   * {@link IOException}.
311   * 
312   * @param other
313   * @return true if and only if the fields of the filter that are serialized are equal to the
314   *         corresponding fields in other. Used for testing.
315   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
316   */
317  abstract boolean areSerializedFieldsEqual(Filter other);
318
319  /**
320   * alter the reversed scan flag
321   * @param reversed flag
322   */
323  public void setReversed(boolean reversed) {
324    this.reversed = reversed;
325  }
326
327  public boolean isReversed() {
328    return this.reversed;
329  }
330}