001/*
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019
020package org.apache.hadoop.hbase.filter;
021
022import java.io.IOException;
023import java.util.List;
024
025import org.apache.hadoop.hbase.Cell;
026import org.apache.yetus.audience.InterfaceAudience;
027import org.apache.hadoop.hbase.exceptions.DeserializationException;
028
029/**
030 * Interface for row and column filters directly applied within the regionserver.
031 *
032 * A filter can expect the following call sequence:
033 * <ul>
034 *   <li> {@link #reset()} : reset the filter state before filtering a new row. </li>
035 *   <li> {@link #filterAllRemaining()}: true means row scan is over; false means keep going. </li>
036 *   <li> {@link #filterRowKey(Cell)}: true means drop this row; false means include.</li>
037 *   <li> {@link #filterCell(Cell)}: decides whether to include or exclude this Cell.
038 *        See {@link ReturnCode}. </li>
039 *   <li> {@link #transformCell(Cell)}: if the Cell is included, let the filter transform the
040 *        Cell. </li>
041 *   <li> {@link #filterRowCells(List)}: allows direct modification of the final list to be submitted
042 *   <li> {@link #filterRow()}: last chance to drop entire row based on the sequence of
043 *        filter calls. Eg: filter a row if it doesn't contain a specified column. </li>
044 * </ul>
045 *
046 * Filter instances are created one per region/scan.  This abstract class replaces
047 * the old RowFilterInterface.
048 *
049 * When implementing your own filters, consider inheriting {@link FilterBase} to help
050 * you reduce boilerplate.
051 *
052 * @see FilterBase
053 */
054@InterfaceAudience.Public
055public abstract class Filter {
056  protected transient boolean reversed;
057  /**
058   * Reset the state of the filter between rows.
059   * 
060   * Concrete implementers can signal a failure condition in their code by throwing an
061   * {@link IOException}.
062   * 
063   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
064   */
065  abstract public void reset() throws IOException;
066
067  /**
068   * Filters a row based on the row key. If this returns true, the entire row will be excluded. If
069   * false, each KeyValue in the row will be passed to {@link #filterCell(Cell)} below.
070   * If {@link #filterAllRemaining()} returns true, then {@link #filterRowKey(Cell)} should
071   * also return true.
072   *
073   * Concrete implementers can signal a failure condition in their code by throwing an
074   * {@link IOException}.
075   *
076   * @param firstRowCell The first cell coming in the new row
077   * @return true, remove entire row, false, include the row (maybe).
078   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
079   */
080  abstract public boolean filterRowKey(Cell firstRowCell) throws IOException;
081
082  /**
083   * If this returns true, the scan will terminate.
084   * 
085   * Concrete implementers can signal a failure condition in their code by throwing an
086   * {@link IOException}.
087   * 
088   * @return true to end scan, false to continue.
089   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
090   */
091  abstract public boolean filterAllRemaining() throws IOException;
092
093  /**
094   * A way to filter based on the column family, column qualifier and/or the column value. Return
095   * code is described below. This allows filters to filter only certain number of columns, then
096   * terminate without matching ever column.
097   *
098   * If filterRowKey returns true, filterCell needs to be consistent with it.
099   *
100   * filterCell can assume that filterRowKey has already been called for the row.
101   *
102   * If your filter returns <code>ReturnCode.NEXT_ROW</code>, it should return
103   * <code>ReturnCode.NEXT_ROW</code> until {@link #reset()} is called just in case the caller calls
104   * for the next row.
105   *
106   * Concrete implementers can signal a failure condition in their code by throwing an
107   * {@link IOException}.
108   *
109   * @param c the Cell in question
110   * @return code as described below
111   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
112   * @see Filter.ReturnCode
113   */
114  public ReturnCode filterCell(final Cell c) throws IOException {
115    return ReturnCode.INCLUDE;
116  }
117
118  /**
119   * Give the filter a chance to transform the passed KeyValue. If the Cell is changed a new
120   * Cell object must be returned.
121   * 
122   * @see org.apache.hadoop.hbase.KeyValue#shallowCopy()
123   *      The transformed KeyValue is what is eventually returned to the client. Most filters will
124   *      return the passed KeyValue unchanged.
125   * @see org.apache.hadoop.hbase.filter.KeyOnlyFilter#transformCell(Cell) for an example of a
126   *      transformation.
127   * 
128   *      Concrete implementers can signal a failure condition in their code by throwing an
129   *      {@link IOException}.
130   * 
131   * @param v the KeyValue in question
132   * @return the changed KeyValue
133   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
134   */
135  abstract public Cell transformCell(final Cell v) throws IOException;
136
137  /**
138   * Return codes for filterValue().
139   */
140  @InterfaceAudience.Public
141  public enum ReturnCode {
142    /**
143     * Include the Cell
144     */
145    INCLUDE,
146    /**
147     * Include the Cell and seek to the next column skipping older versions.
148     */
149    INCLUDE_AND_NEXT_COL,
150    /**
151     * Skip this Cell
152     */
153    SKIP,
154    /**
155     * Skip this column. Go to the next column in this row.
156     */
157    NEXT_COL,
158    /**
159     * Seek to next row in current family. It may still pass a cell whose family is different but
160     * row is the same as previous cell to {@link #filterCell(Cell)} , even if we get a NEXT_ROW
161     * returned for previous cell. For more details see HBASE-18368. <br>
162     * Once reset() method was invoked, then we switch to the next row for all family, and you can
163     * catch the event by invoking CellUtils.matchingRows(previousCell, currentCell). <br>
164     * Note that filterRow() will still be called. <br>
165     */
166    NEXT_ROW,
167    /**
168     * Seek to next key which is given as hint by the filter.
169     */
170    SEEK_NEXT_USING_HINT,
171    /**
172     * Include KeyValue and done with row, seek to next. See NEXT_ROW.
173     */
174    INCLUDE_AND_SEEK_NEXT_ROW,
175}
176
177  /**
178   * Chance to alter the list of Cells to be submitted. Modifications to the list will carry on
179   * 
180   * Concrete implementers can signal a failure condition in their code by throwing an
181   * {@link IOException}.
182   * 
183   * @param kvs the list of Cells to be filtered
184   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
185   */
186  abstract public void filterRowCells(List<Cell> kvs) throws IOException;
187
188  /**
189   * Primarily used to check for conflicts with scans(such as scans that do not read a full row at a
190   * time).
191   * 
192   * @return True if this filter actively uses filterRowCells(List) or filterRow().
193   */
194  abstract public boolean hasFilterRow();
195
196  /**
197   * Last chance to veto row based on previous {@link #filterCell(Cell)} calls. The filter
198   * needs to retain state then return a particular value for this call if they wish to exclude a
199   * row if a certain column is missing (for example).
200   * 
201   * Concrete implementers can signal a failure condition in their code by throwing an
202   * {@link IOException}.
203   * 
204   * @return true to exclude row, false to include row.
205   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
206   */
207  abstract public boolean filterRow() throws IOException;
208
209  /**
210   * If the filter returns the match code SEEK_NEXT_USING_HINT, then it should also tell which is
211   * the next key it must seek to. After receiving the match code SEEK_NEXT_USING_HINT, the
212   * QueryMatcher would call this function to find out which key it must next seek to.
213   * 
214   * Concrete implementers can signal a failure condition in their code by throwing an
215   * {@link IOException}.
216   * 
217   * @return KeyValue which must be next seeked. return null if the filter is not sure which key to
218   *         seek to next.
219   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
220   */
221  abstract public Cell getNextCellHint(final Cell currentCell) throws IOException;
222
223  /**
224   * Check that given column family is essential for filter to check row. Most filters always return
225   * true here. But some could have more sophisticated logic which could significantly reduce
226   * scanning process by not even touching columns until we are 100% sure that it's data is needed
227   * in result.
228   * 
229   * Concrete implementers can signal a failure condition in their code by throwing an
230   * {@link IOException}.
231   * 
232   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
233   */
234  abstract public boolean isFamilyEssential(byte[] name) throws IOException;
235
236  /**
237   * TODO: JAVADOC
238   * 
239   * Concrete implementers can signal a failure condition in their code by throwing an
240   * {@link IOException}.
241   * 
242   * @return The filter serialized using pb
243   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
244   */
245  abstract public byte[] toByteArray() throws IOException;
246
247  /**
248   * 
249   * Concrete implementers can signal a failure condition in their code by throwing an
250   * {@link IOException}.
251   * 
252   * @param pbBytes A pb serialized {@link Filter} instance
253   * @return An instance of {@link Filter} made from <code>bytes</code>
254   * @throws DeserializationException
255   * @see #toByteArray
256   */
257  public static Filter parseFrom(final byte [] pbBytes) throws DeserializationException {
258    throw new DeserializationException(
259      "parseFrom called on base Filter, but should be called on derived type");
260  }
261
262  /**
263   * Concrete implementers can signal a failure condition in their code by throwing an
264   * {@link IOException}.
265   * 
266   * @param other
267   * @return true if and only if the fields of the filter that are serialized are equal to the
268   *         corresponding fields in other. Used for testing.
269   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
270   */
271  abstract boolean areSerializedFieldsEqual(Filter other);
272
273  /**
274   * alter the reversed scan flag
275   * @param reversed flag
276   */
277  public void setReversed(boolean reversed) {
278    this.reversed = reversed;
279  }
280
281  public boolean isReversed() {
282    return this.reversed;
283  }
284}