001/*
002 * Copyright 2010 The Apache Software Foundation
003 *
004 * Licensed to the Apache Software Foundation (ASF) under one
005 * or more contributor license agreements.  See the NOTICE file
006 * distributed with this work for additional information
007 * regarding copyright ownership.  The ASF licenses this file
008 * to you under the Apache License, Version 2.0 (the
009 * "License"); you may not use this file except in compliance
010 * with the License.  You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package org.apache.hadoop.hbase.filter;
022
023import org.apache.hadoop.hbase.KeyValue;
024import org.apache.hadoop.io.Writable;
025
026import java.util.List;
027
028/**
029 * Interface for row and column filters directly applied within the regionserver.
030 *
031 * A filter can expect the following call sequence:
032 * <ul>
033 *   <li> {@link #reset()} : reset the filter state before filtering a new row. </li>
034 *   <li> {@link #filterAllRemaining()}: true means row scan is over; false means keep going. </li>
035 *   <li> {@link #filterRowKey(byte[],int,int)}: true means drop this row; false means include.</li>
036 *   <li> {@link #filterKeyValue(KeyValue)}: decides whether to include or exclude this KeyValue.
037 *        See {@link ReturnCode}. </li>
038 *   <li> {@link #transform(KeyValue)}: if the KeyValue is included, let the filter transform the
039 *        KeyValue. </li>
040 *   <li> {@link #filterRow(List)}: allows direct modification of the final list to be submitted
041 *   <li> {@link #filterRow()}: last chance to drop entire row based on the sequence of
042 *        filter calls. Eg: filter a row if it doesn't contain a specified column. </li>
043 * </ul>
044 *
045 * Filter instances are created one per region/scan.  This interface replaces
046 * the old RowFilterInterface.
047 *
048 * When implementing your own filters, consider inheriting {@link FilterBase} to help
049 * you reduce boilerplate.
050 * 
051 * @see FilterBase
052 */
053public interface Filter extends Writable {
054  /**
055   * Reset the state of the filter between rows.
056   */
057  public void reset();
058
059  /**
060   * Filters a row based on the row key. If this returns true, the entire
061   * row will be excluded.  If false, each KeyValue in the row will be
062   * passed to {@link #filterKeyValue(KeyValue)} below.
063   *
064   * @param buffer buffer containing row key
065   * @param offset offset into buffer where row key starts
066   * @param length length of the row key
067   * @return true, remove entire row, false, include the row (maybe).
068   */
069  public boolean filterRowKey(byte [] buffer, int offset, int length);
070
071  /**
072   * If this returns true, the scan will terminate.
073   *
074   * @return true to end scan, false to continue.
075   */
076  public boolean filterAllRemaining();
077
078  /**
079   * A way to filter based on the column family, column qualifier and/or the
080   * column value. Return code is described below.  This allows filters to
081   * filter only certain number of columns, then terminate without matching ever
082   * column.
083   *
084   * If your filter returns <code>ReturnCode.NEXT_ROW</code>, it should return
085   * <code>ReturnCode.NEXT_ROW</code> until {@link #reset()} is called
086   * just in case the caller calls for the next row.
087   *
088   * @param v the KeyValue in question
089   * @return code as described below
090   * @see Filter.ReturnCode
091   */
092  public ReturnCode filterKeyValue(final KeyValue v);
093
094  /**
095   * Give the filter a chance to transform the passed KeyValue.
096   * If the KeyValue is changed a new KeyValue object must be returned.
097   * @see org.apache.hadoop.hbase.KeyValue#shallowCopy()
098   *
099   * The transformed KeyValue is what is eventually returned to the
100   * client. Most filters will return the passed KeyValue unchanged.
101   * @see org.apache.hadoop.hbase.filter.KeyOnlyFilter#transform(KeyValue)
102   * for an example of a transformation.
103   *
104   * @param v the KeyValue in question
105   * @return the changed KeyValue
106   */
107  public KeyValue transform(final KeyValue v);
108
109  /**
110   * Return codes for filterValue().
111   */
112  public enum ReturnCode {
113    /**
114     * Include the KeyValue
115     */
116    INCLUDE,
117    /**
118     * Include the KeyValue and seek to the next column skipping older versions.
119     */
120    INCLUDE_AND_NEXT_COL,
121    /**
122     * Skip this KeyValue
123     */
124    SKIP,
125    /**
126     * Skip this column. Go to the next column in this row.
127     */
128    NEXT_COL,
129    /**
130     * Done with columns, skip to next row. Note that filterRow() will
131     * still be called.
132     */
133    NEXT_ROW,
134    /**
135     * Seek to next key which is given as hint by the filter.
136     */
137    SEEK_NEXT_USING_HINT,
138}
139
140  /**
141   * Chance to alter the list of keyvalues to be submitted.
142   * Modifications to the list will carry on
143   * @param kvs the list of keyvalues to be filtered
144   */
145  public void filterRow(List<KeyValue> kvs);
146
147  /**
148   * @return True if this filter actively uses filterRow(List).
149   * Primarily used to check for conflicts with scans(such as scans
150   * that do not read a full row at a time)
151   */
152  public boolean hasFilterRow();
153
154  /**
155   * Last chance to veto row based on previous {@link #filterKeyValue(KeyValue)}
156   * calls. The filter needs to retain state then return a particular value for
157   * this call if they wish to exclude a row if a certain column is missing
158   * (for example).
159   * @return true to exclude row, false to include row.
160   */
161  public boolean filterRow();
162
163  /**
164   * If the filter returns the match code SEEK_NEXT_USING_HINT, then
165   * it should also tell which is the next key it must seek to.
166   * After receiving the match code SEEK_NEXT_USING_HINT, the QueryMatcher would
167   * call this function to find out which key it must next seek to.
168   * @return KeyValue which must be next seeked. return null if the filter is
169   * not sure which key to seek to next.
170   */
171  public KeyValue getNextKeyHint(final KeyValue currentKV);
172}