001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.filter;
019
020import java.io.IOException;
021import java.util.List;
022import org.apache.hadoop.hbase.Cell;
023import org.apache.hadoop.hbase.exceptions.DeserializationException;
024import org.apache.yetus.audience.InterfaceAudience;
025
026/**
027 * Interface for row and column filters directly applied within the regionserver. A filter can
028 * expect the following call sequence:
029 * <ul>
030 * <li>{@link #reset()} : reset the filter state before filtering a new row.</li>
031 * <li>{@link #filterAllRemaining()}: true means row scan is over; false means keep going.</li>
032 * <li>{@link #filterRowKey(Cell)}: true means drop this row; false means include.</li>
033 * <li>{@link #filterCell(Cell)}: decides whether to include or exclude this Cell. See
034 * {@link ReturnCode}.</li>
035 * <li>{@link #transformCell(Cell)}: if the Cell is included, let the filter transform the Cell.
036 * </li>
037 * <li>{@link #filterRowCells(List)}: allows direct modification of the final list to be submitted
038 * <li>{@link #filterRow()}: last chance to drop entire row based on the sequence of filter calls.
039 * Eg: filter a row if it doesn't contain a specified column.</li>
040 * </ul>
041 * Filter instances are created one per region/scan. This abstract class replaces the old
042 * RowFilterInterface. When implementing your own filters, consider inheriting {@link FilterBase} to
043 * help you reduce boilerplate.
044 * @see FilterBase
045 */
046@InterfaceAudience.Public
047public abstract class Filter {
048  protected transient boolean reversed;
049
050  /**
051   * Reset the state of the filter between rows. Concrete implementers can signal a failure
052   * condition in their code by throwing an {@link IOException}.
053   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
054   */
055  abstract public void reset() throws IOException;
056
057  /**
058   * Filters a row based on the row key. If this returns true, the entire row will be excluded. If
059   * false, each KeyValue in the row will be passed to {@link #filterCell(Cell)} below. Concrete
060   * implementers can signal a failure condition in their code by throwing an {@link IOException}.
061   * @param buffer buffer containing row key
062   * @param offset offset into buffer where row key starts
063   * @param length length of the row key
064   * @return true, remove entire row, false, include the row (maybe).
065   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
066   * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. Instead use
067   *             {@link #filterRowKey(Cell)}
068   */
069  @Deprecated
070  abstract public boolean filterRowKey(byte[] buffer, int offset, int length) throws IOException;
071
072  /**
073   * Filters a row based on the row key. If this returns true, the entire row will be excluded. If
074   * false, each KeyValue in the row will be passed to {@link #filterCell(Cell)} below. If
075   * {@link #filterAllRemaining()} returns true, then {@link #filterRowKey(Cell)} should also return
076   * true. Concrete implementers can signal a failure condition in their code by throwing an
077   * {@link IOException}.
078   * @param firstRowCell The first cell coming in the new row
079   * @return true, remove entire row, false, include the row (maybe).
080   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
081   */
082  abstract public boolean filterRowKey(Cell firstRowCell) throws IOException;
083
084  /**
085   * If this returns true, the scan will terminate. Concrete implementers can signal a failure
086   * condition in their code by throwing an {@link IOException}.
087   * @return true to end scan, false to continue.
088   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
089   */
090  abstract public boolean filterAllRemaining() throws IOException;
091
092  /**
093   * A way to filter based on the column family, column qualifier and/or the column value. Return
094   * code is described below. This allows filters to filter only certain number of columns, then
095   * terminate without matching ever column. If filterRowKey returns true, filterKeyValue needs to
096   * be consistent with it. filterKeyValue can assume that filterRowKey has already been called for
097   * the row. If your filter returns <code>ReturnCode.NEXT_ROW</code>, it should return
098   * <code>ReturnCode.NEXT_ROW</code> until {@link #reset()} is called just in case the caller calls
099   * for the next row. Concrete implementers can signal a failure condition in their code by
100   * throwing an {@link IOException}.
101   * @param c the Cell in question
102   * @return code as described below, Filter.ReturnCode.INCLUDE by default
103   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
104   * @see Filter.ReturnCode
105   * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. Instead use
106   *             filterCell(Cell)
107   */
108  @Deprecated
109  public ReturnCode filterKeyValue(final Cell c) throws IOException {
110    return Filter.ReturnCode.INCLUDE;
111  }
112
113  /**
114   * A way to filter based on the column family, column qualifier and/or the column value. Return
115   * code is described below. This allows filters to filter only certain number of columns, then
116   * terminate without matching ever column. If filterRowKey returns true, filterCell needs to be
117   * consistent with it. filterCell can assume that filterRowKey has already been called for the
118   * row. If your filter returns <code>ReturnCode.NEXT_ROW</code>, it should return
119   * <code>ReturnCode.NEXT_ROW</code> until {@link #reset()} is called just in case the caller calls
120   * for the next row. Concrete implementers can signal a failure condition in their code by
121   * throwing an {@link IOException}.
122   * @param c the Cell in question
123   * @return code as described below
124   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
125   * @see Filter.ReturnCode
126   */
127  public ReturnCode filterCell(final Cell c) throws IOException {
128    return filterKeyValue(c);
129  }
130
131  /**
132   * Give the filter a chance to transform the passed KeyValue. If the Cell is changed a new Cell
133   * object must be returned.
134   * @see org.apache.hadoop.hbase.KeyValue#shallowCopy() The transformed KeyValue is what is
135   *      eventually returned to the client. Most filters will return the passed KeyValue unchanged.
136   * @see org.apache.hadoop.hbase.filter.KeyOnlyFilter#transformCell(Cell) for an example of a
137   *      transformation. Concrete implementers can signal a failure condition in their code by
138   *      throwing an {@link IOException}.
139   * @param v the KeyValue in question
140   * @return the changed KeyValue
141   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
142   */
143  abstract public Cell transformCell(final Cell v) throws IOException;
144
145  /**
146   * Return codes for filterValue().
147   */
148  @InterfaceAudience.Public
149  public enum ReturnCode {
150    /**
151     * Include the Cell
152     */
153    INCLUDE,
154    /**
155     * Include the Cell and seek to the next column skipping older versions.
156     */
157    INCLUDE_AND_NEXT_COL,
158    /**
159     * Skip this Cell
160     */
161    SKIP,
162    /**
163     * Skip this column. Go to the next column in this row.
164     */
165    NEXT_COL,
166    /**
167     * Seek to next row in current family. It may still pass a cell whose family is different but
168     * row is the same as previous cell to {@link #filterCell(Cell)} , even if we get a NEXT_ROW
169     * returned for previous cell. For more details see HBASE-18368. <br>
170     * Once reset() method was invoked, then we switch to the next row for all family, and you can
171     * catch the event by invoking CellUtils.matchingRows(previousCell, currentCell). <br>
172     * Note that filterRow() will still be called. <br>
173     */
174    NEXT_ROW,
175    /**
176     * Seek to next key which is given as hint by the filter.
177     */
178    SEEK_NEXT_USING_HINT,
179    /**
180     * Include KeyValue and done with row, seek to next. See NEXT_ROW.
181     */
182    INCLUDE_AND_SEEK_NEXT_ROW,
183  }
184
185  /**
186   * Chance to alter the list of Cells to be submitted. Modifications to the list will carry on
187   * Concrete implementers can signal a failure condition in their code by throwing an
188   * {@link IOException}.
189   * @param kvs the list of Cells to be filtered
190   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
191   */
192  abstract public void filterRowCells(List<Cell> kvs) throws IOException;
193
194  /**
195   * Primarily used to check for conflicts with scans(such as scans that do not read a full row at a
196   * time).
197   * @return True if this filter actively uses filterRowCells(List) or filterRow().
198   */
199  abstract public boolean hasFilterRow();
200
201  /**
202   * Last chance to veto row based on previous {@link #filterCell(Cell)} calls. The filter needs to
203   * retain state then return a particular value for this call if they wish to exclude a row if a
204   * certain column is missing (for example). Concrete implementers can signal a failure condition
205   * in their code by throwing an {@link IOException}.
206   * @return true to exclude row, false to include row.
207   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
208   */
209  abstract public boolean filterRow() throws IOException;
210
211  /**
212   * If the filter returns the match code SEEK_NEXT_USING_HINT, then it should also tell which is
213   * the next key it must seek to. After receiving the match code SEEK_NEXT_USING_HINT, the
214   * QueryMatcher would call this function to find out which key it must next seek to. Concrete
215   * implementers can signal a failure condition in their code by throwing an {@link IOException}.
216   * @return KeyValue which must be next seeked. return null if the filter is not sure which key to
217   *         seek to next.
218   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
219   */
220  abstract public Cell getNextCellHint(final Cell currentCell) throws IOException;
221
222  /**
223   * Check that given column family is essential for filter to check row. Most filters always return
224   * true here. But some could have more sophisticated logic which could significantly reduce
225   * scanning process by not even touching columns until we are 100% sure that it's data is needed
226   * in result. Concrete implementers can signal a failure condition in their code by throwing an
227   * {@link IOException}.
228   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
229   */
230  abstract public boolean isFamilyEssential(byte[] name) throws IOException;
231
232  /**
233   * TODO: JAVADOC Concrete implementers can signal a failure condition in their code by throwing an
234   * {@link IOException}.
235   * @return The filter serialized using pb
236   * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
237   */
238  abstract public byte[] toByteArray() throws IOException;
239
240  /**
241   * Concrete implementers can signal a failure condition in their code by throwing an
242   * {@link IOException}.
243   * @param pbBytes A pb serialized {@link Filter} instance
244   * @return An instance of {@link Filter} made from <code>bytes</code>
245   * @throws DeserializationException if an error occurred
246   * @see #toByteArray
247   */
248  public static Filter parseFrom(final byte[] pbBytes) throws DeserializationException {
249    throw new DeserializationException(
250      "parseFrom called on base Filter, but should be called on derived type");
251  }
252
253  /**
254   * Concrete implementers can signal a failure condition in their code by throwing an
255   * {@link IOException}. n * @return true if and only if the fields of the filter that are
256   * serialized are equal to the corresponding fields in other. Used for testing.
257   */
258  abstract boolean areSerializedFieldsEqual(Filter other);
259
260  /**
261   * alter the reversed scan flag
262   * @param reversed flag
263   */
264  public void setReversed(boolean reversed) {
265    this.reversed = reversed;
266  }
267
268  public boolean isReversed() {
269    return this.reversed;
270  }
271}