View Javadoc

1   /*
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.filter;
22  
23  import org.apache.hadoop.hbase.KeyValue;
24  import org.apache.hadoop.io.Writable;
25  
26  import java.util.List;
27  
28  /**
29   * Interface for row and column filters directly applied within the regionserver.
30   *
31   * A filter can expect the following call sequence:
32   * <ul>
33   *   <li> {@link #reset()} : reset the filter state before filtering a new row. </li>
34   *   <li> {@link #filterAllRemaining()}: true means row scan is over; false means keep going. </li>
35   *   <li> {@link #filterRowKey(byte[],int,int)}: true means drop this row; false means include.</li>
36   *   <li> {@link #filterKeyValue(KeyValue)}: decides whether to include or exclude this KeyValue.
37   *        See {@link ReturnCode}. </li>
38   *   <li> {@link #transform(KeyValue)}: if the KeyValue is included, let the filter transform the
39   *        KeyValue. </li>
40   *   <li> {@link #filterRow(List)}: allows direct modification of the final list to be submitted
41   *   <li> {@link #filterRow()}: last chance to drop entire row based on the sequence of
42   *        filter calls. Eg: filter a row if it doesn't contain a specified column. </li>
43   * </ul>
44   *
45   * Filter instances are created one per region/scan.  This interface replaces
46   * the old RowFilterInterface.
47   *
48   * When implementing your own filters, consider inheriting {@link FilterBase} to help
49   * you reduce boilerplate.
50   * 
51   * @see FilterBase
52   */
53  public interface Filter extends Writable {
54    /**
55     * Reset the state of the filter between rows.
56     */
57    public void reset();
58  
59    /**
60     * Filters a row based on the row key. If this returns true, the entire
61     * row will be excluded.  If false, each KeyValue in the row will be
62     * passed to {@link #filterKeyValue(KeyValue)} below.
63     *
64     * @param buffer buffer containing row key
65     * @param offset offset into buffer where row key starts
66     * @param length length of the row key
67     * @return true, remove entire row, false, include the row (maybe).
68     */
69    public boolean filterRowKey(byte [] buffer, int offset, int length);
70  
71    /**
72     * If this returns true, the scan will terminate.
73     *
74     * @return true to end scan, false to continue.
75     */
76    public boolean filterAllRemaining();
77  
78    /**
79     * A way to filter based on the column family, column qualifier and/or the
80     * column value. Return code is described below.  This allows filters to
81     * filter only certain number of columns, then terminate without matching ever
82     * column.
83     *
84     * If your filter returns <code>ReturnCode.NEXT_ROW</code>, it should return
85     * <code>ReturnCode.NEXT_ROW</code> until {@link #reset()} is called
86     * just in case the caller calls for the next row.
87     *
88     * @param v the KeyValue in question
89     * @return code as described below
90     * @see Filter.ReturnCode
91     */
92    public ReturnCode filterKeyValue(final KeyValue v);
93  
94    /**
95     * Give the filter a chance to transform the passed KeyValue.
96     * If the KeyValue is changed a new KeyValue object must be returned.
97     * @see org.apache.hadoop.hbase.KeyValue#shallowCopy()
98     *
99     * The transformed KeyValue is what is eventually returned to the
100    * client. Most filters will return the passed KeyValue unchanged.
101    * @see org.apache.hadoop.hbase.filter.KeyOnlyFilter#transform(KeyValue)
102    * for an example of a transformation.
103    *
104    * @param v the KeyValue in question
105    * @return the changed KeyValue
106    */
107   public KeyValue transform(final KeyValue v);
108 
109   /**
110    * Return codes for filterValue().
111    */
112   public enum ReturnCode {
113     /**
114      * Include the KeyValue
115      */
116     INCLUDE,
117     /**
118      * Include the KeyValue and seek to the next column skipping older versions.
119      */
120     INCLUDE_AND_NEXT_COL,
121     /**
122      * Skip this KeyValue
123      */
124     SKIP,
125     /**
126      * Skip this column. Go to the next column in this row.
127      */
128     NEXT_COL,
129     /**
130      * Done with columns, skip to next row. Note that filterRow() will
131      * still be called.
132      */
133     NEXT_ROW,
134     /**
135      * Seek to next key which is given as hint by the filter.
136      */
137     SEEK_NEXT_USING_HINT,
138 }
139 
140   /**
141    * Chance to alter the list of keyvalues to be submitted.
142    * Modifications to the list will carry on
143    * @param kvs the list of keyvalues to be filtered
144    */
145   public void filterRow(List<KeyValue> kvs);
146 
147   /**
148    * @return True if this filter actively uses filterRow(List).
149    * Primarily used to check for conflicts with scans(such as scans
150    * that do not read a full row at a time)
151    */
152   public boolean hasFilterRow();
153 
154   /**
155    * Last chance to veto row based on previous {@link #filterKeyValue(KeyValue)}
156    * calls. The filter needs to retain state then return a particular value for
157    * this call if they wish to exclude a row if a certain column is missing
158    * (for example).
159    * @return true to exclude row, false to include row.
160    */
161   public boolean filterRow();
162 
163   /**
164    * If the filter returns the match code SEEK_NEXT_USING_HINT, then
165    * it should also tell which is the next key it must seek to.
166    * After receiving the match code SEEK_NEXT_USING_HINT, the QueryMatcher would
167    * call this function to find out which key it must next seek to.
168    * @return KeyValue which must be next seeked. return null if the filter is
169    * not sure which key to seek to next.
170    */
171   public KeyValue getNextKeyHint(final KeyValue currentKV);
172 }