001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.filter; 019 020import java.io.IOException; 021import java.util.List; 022import org.apache.hadoop.hbase.Cell; 023import org.apache.hadoop.hbase.exceptions.DeserializationException; 024import org.apache.yetus.audience.InterfaceAudience; 025 026/** 027 * Interface for row and column filters directly applied within the regionserver. A filter can 028 * expect the following call sequence: 029 * <ul> 030 * <li>{@link #reset()} : reset the filter state before filtering a new row.</li> 031 * <li>{@link #filterAllRemaining()}: true means row scan is over; false means keep going.</li> 032 * <li>{@link #filterRowKey(Cell)}: true means drop this row; false means include.</li> 033 * <li>{@link #filterCell(Cell)}: decides whether to include or exclude this Cell. See 034 * {@link ReturnCode}.</li> 035 * <li>{@link #transformCell(Cell)}: if the Cell is included, let the filter transform the Cell. 036 * </li> 037 * <li>{@link #filterRowCells(List)}: allows direct modification of the final list to be submitted 038 * <li>{@link #filterRow()}: last chance to drop entire row based on the sequence of filter calls. 039 * Eg: filter a row if it doesn't contain a specified column.</li> 040 * </ul> 041 * Filter instances are created one per region/scan. This abstract class replaces the old 042 * RowFilterInterface. When implementing your own filters, consider inheriting {@link FilterBase} to 043 * help you reduce boilerplate. 044 * @see FilterBase 045 */ 046@InterfaceAudience.Public 047public abstract class Filter { 048 protected transient boolean reversed; 049 050 /** 051 * Reset the state of the filter between rows. Concrete implementers can signal a failure 052 * condition in their code by throwing an {@link IOException}. 053 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 054 */ 055 abstract public void reset() throws IOException; 056 057 /** 058 * Filters a row based on the row key. If this returns true, the entire row will be excluded. If 059 * false, each KeyValue in the row will be passed to {@link #filterCell(Cell)} below. Concrete 060 * implementers can signal a failure condition in their code by throwing an {@link IOException}. 061 * @param buffer buffer containing row key 062 * @param offset offset into buffer where row key starts 063 * @param length length of the row key 064 * @return true, remove entire row, false, include the row (maybe). 065 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 066 * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. Instead use 067 * {@link #filterRowKey(Cell)} 068 */ 069 @Deprecated 070 abstract public boolean filterRowKey(byte[] buffer, int offset, int length) throws IOException; 071 072 /** 073 * Filters a row based on the row key. If this returns true, the entire row will be excluded. If 074 * false, each KeyValue in the row will be passed to {@link #filterCell(Cell)} below. If 075 * {@link #filterAllRemaining()} returns true, then {@link #filterRowKey(Cell)} should also return 076 * true. Concrete implementers can signal a failure condition in their code by throwing an 077 * {@link IOException}. 078 * @param firstRowCell The first cell coming in the new row 079 * @return true, remove entire row, false, include the row (maybe). 080 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 081 */ 082 abstract public boolean filterRowKey(Cell firstRowCell) throws IOException; 083 084 /** 085 * If this returns true, the scan will terminate. Concrete implementers can signal a failure 086 * condition in their code by throwing an {@link IOException}. 087 * @return true to end scan, false to continue. 088 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 089 */ 090 abstract public boolean filterAllRemaining() throws IOException; 091 092 /** 093 * A way to filter based on the column family, column qualifier and/or the column value. Return 094 * code is described below. This allows filters to filter only certain number of columns, then 095 * terminate without matching ever column. If filterRowKey returns true, filterKeyValue needs to 096 * be consistent with it. filterKeyValue can assume that filterRowKey has already been called for 097 * the row. If your filter returns <code>ReturnCode.NEXT_ROW</code>, it should return 098 * <code>ReturnCode.NEXT_ROW</code> until {@link #reset()} is called just in case the caller calls 099 * for the next row. Concrete implementers can signal a failure condition in their code by 100 * throwing an {@link IOException}. 101 * @param c the Cell in question 102 * @return code as described below, Filter.ReturnCode.INCLUDE by default 103 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 104 * @see Filter.ReturnCode 105 * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. Instead use 106 * filterCell(Cell) 107 */ 108 @Deprecated 109 public ReturnCode filterKeyValue(final Cell c) throws IOException { 110 return Filter.ReturnCode.INCLUDE; 111 } 112 113 /** 114 * A way to filter based on the column family, column qualifier and/or the column value. Return 115 * code is described below. This allows filters to filter only certain number of columns, then 116 * terminate without matching ever column. If filterRowKey returns true, filterCell needs to be 117 * consistent with it. filterCell can assume that filterRowKey has already been called for the 118 * row. If your filter returns <code>ReturnCode.NEXT_ROW</code>, it should return 119 * <code>ReturnCode.NEXT_ROW</code> until {@link #reset()} is called just in case the caller calls 120 * for the next row. Concrete implementers can signal a failure condition in their code by 121 * throwing an {@link IOException}. 122 * @param c the Cell in question 123 * @return code as described below 124 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 125 * @see Filter.ReturnCode 126 */ 127 public ReturnCode filterCell(final Cell c) throws IOException { 128 return filterKeyValue(c); 129 } 130 131 /** 132 * Give the filter a chance to transform the passed KeyValue. If the Cell is changed a new Cell 133 * object must be returned. 134 * @see org.apache.hadoop.hbase.KeyValue#shallowCopy() The transformed KeyValue is what is 135 * eventually returned to the client. Most filters will return the passed KeyValue unchanged. 136 * @see org.apache.hadoop.hbase.filter.KeyOnlyFilter#transformCell(Cell) for an example of a 137 * transformation. Concrete implementers can signal a failure condition in their code by 138 * throwing an {@link IOException}. 139 * @param v the KeyValue in question 140 * @return the changed KeyValue 141 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 142 */ 143 abstract public Cell transformCell(final Cell v) throws IOException; 144 145 /** 146 * Return codes for filterValue(). 147 */ 148 @InterfaceAudience.Public 149 public enum ReturnCode { 150 /** 151 * Include the Cell 152 */ 153 INCLUDE, 154 /** 155 * Include the Cell and seek to the next column skipping older versions. 156 */ 157 INCLUDE_AND_NEXT_COL, 158 /** 159 * Skip this Cell 160 */ 161 SKIP, 162 /** 163 * Skip this column. Go to the next column in this row. 164 */ 165 NEXT_COL, 166 /** 167 * Seek to next row in current family. It may still pass a cell whose family is different but 168 * row is the same as previous cell to {@link #filterCell(Cell)} , even if we get a NEXT_ROW 169 * returned for previous cell. For more details see HBASE-18368. <br> 170 * Once reset() method was invoked, then we switch to the next row for all family, and you can 171 * catch the event by invoking CellUtils.matchingRows(previousCell, currentCell). <br> 172 * Note that filterRow() will still be called. <br> 173 */ 174 NEXT_ROW, 175 /** 176 * Seek to next key which is given as hint by the filter. 177 */ 178 SEEK_NEXT_USING_HINT, 179 /** 180 * Include KeyValue and done with row, seek to next. See NEXT_ROW. 181 */ 182 INCLUDE_AND_SEEK_NEXT_ROW, 183 } 184 185 /** 186 * Chance to alter the list of Cells to be submitted. Modifications to the list will carry on 187 * Concrete implementers can signal a failure condition in their code by throwing an 188 * {@link IOException}. 189 * @param kvs the list of Cells to be filtered 190 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 191 */ 192 abstract public void filterRowCells(List<Cell> kvs) throws IOException; 193 194 /** 195 * Primarily used to check for conflicts with scans(such as scans that do not read a full row at a 196 * time). 197 * @return True if this filter actively uses filterRowCells(List) or filterRow(). 198 */ 199 abstract public boolean hasFilterRow(); 200 201 /** 202 * Last chance to veto row based on previous {@link #filterCell(Cell)} calls. The filter needs to 203 * retain state then return a particular value for this call if they wish to exclude a row if a 204 * certain column is missing (for example). Concrete implementers can signal a failure condition 205 * in their code by throwing an {@link IOException}. 206 * @return true to exclude row, false to include row. 207 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 208 */ 209 abstract public boolean filterRow() throws IOException; 210 211 /** 212 * If the filter returns the match code SEEK_NEXT_USING_HINT, then it should also tell which is 213 * the next key it must seek to. After receiving the match code SEEK_NEXT_USING_HINT, the 214 * QueryMatcher would call this function to find out which key it must next seek to. Concrete 215 * implementers can signal a failure condition in their code by throwing an {@link IOException}. 216 * @return KeyValue which must be next seeked. return null if the filter is not sure which key to 217 * seek to next. 218 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 219 */ 220 abstract public Cell getNextCellHint(final Cell currentCell) throws IOException; 221 222 /** 223 * Check that given column family is essential for filter to check row. Most filters always return 224 * true here. But some could have more sophisticated logic which could significantly reduce 225 * scanning process by not even touching columns until we are 100% sure that it's data is needed 226 * in result. Concrete implementers can signal a failure condition in their code by throwing an 227 * {@link IOException}. 228 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 229 */ 230 abstract public boolean isFamilyEssential(byte[] name) throws IOException; 231 232 /** 233 * TODO: JAVADOC Concrete implementers can signal a failure condition in their code by throwing an 234 * {@link IOException}. 235 * @return The filter serialized using pb 236 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 237 */ 238 abstract public byte[] toByteArray() throws IOException; 239 240 /** 241 * Concrete implementers can signal a failure condition in their code by throwing an 242 * {@link IOException}. 243 * @param pbBytes A pb serialized {@link Filter} instance 244 * @return An instance of {@link Filter} made from <code>bytes</code> 245 * @throws DeserializationException if an error occurred 246 * @see #toByteArray 247 */ 248 public static Filter parseFrom(final byte[] pbBytes) throws DeserializationException { 249 throw new DeserializationException( 250 "parseFrom called on base Filter, but should be called on derived type"); 251 } 252 253 /** 254 * Concrete implementers can signal a failure condition in their code by throwing an 255 * {@link IOException}. n * @return true if and only if the fields of the filter that are 256 * serialized are equal to the corresponding fields in other. Used for testing. 257 */ 258 abstract boolean areSerializedFieldsEqual(Filter other); 259 260 /** 261 * alter the reversed scan flag 262 * @param reversed flag 263 */ 264 public void setReversed(boolean reversed) { 265 this.reversed = reversed; 266 } 267 268 public boolean isReversed() { 269 return this.reversed; 270 } 271}