001/* 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019 020package org.apache.hadoop.hbase.filter; 021 022import java.io.IOException; 023import java.util.List; 024 025import org.apache.hadoop.hbase.Cell; 026import org.apache.yetus.audience.InterfaceAudience; 027import org.apache.hadoop.hbase.exceptions.DeserializationException; 028 029/** 030 * Interface for row and column filters directly applied within the regionserver. 031 * 032 * A filter can expect the following call sequence: 033 * <ul> 034 * <li> {@link #reset()} : reset the filter state before filtering a new row. </li> 035 * <li> {@link #filterAllRemaining()}: true means row scan is over; false means keep going. </li> 036 * <li> {@link #filterRowKey(Cell)}: true means drop this row; false means include.</li> 037 * <li> {@link #filterCell(Cell)}: decides whether to include or exclude this Cell. 038 * See {@link ReturnCode}. </li> 039 * <li> {@link #transformCell(Cell)}: if the Cell is included, let the filter transform the 040 * Cell. </li> 041 * <li> {@link #filterRowCells(List)}: allows direct modification of the final list to be submitted 042 * <li> {@link #filterRow()}: last chance to drop entire row based on the sequence of 043 * filter calls. Eg: filter a row if it doesn't contain a specified column. </li> 044 * </ul> 045 * 046 * Filter instances are created one per region/scan. This abstract class replaces 047 * the old RowFilterInterface. 048 * 049 * When implementing your own filters, consider inheriting {@link FilterBase} to help 050 * you reduce boilerplate. 051 * 052 * @see FilterBase 053 */ 054@InterfaceAudience.Public 055public abstract class Filter { 056 protected transient boolean reversed; 057 /** 058 * Reset the state of the filter between rows. 059 * 060 * Concrete implementers can signal a failure condition in their code by throwing an 061 * {@link IOException}. 062 * 063 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 064 */ 065 abstract public void reset() throws IOException; 066 067 /** 068 * Filters a row based on the row key. If this returns true, the entire row will be excluded. If 069 * false, each KeyValue in the row will be passed to {@link #filterCell(Cell)} below. 070 * 071 * Concrete implementers can signal a failure condition in their code by throwing an 072 * {@link IOException}. 073 * 074 * @param buffer buffer containing row key 075 * @param offset offset into buffer where row key starts 076 * @param length length of the row key 077 * @return true, remove entire row, false, include the row (maybe). 078 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 079 * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. 080 * Instead use {@link #filterRowKey(Cell)} 081 */ 082 @Deprecated 083 abstract public boolean filterRowKey(byte[] buffer, int offset, int length) throws IOException; 084 085 /** 086 * Filters a row based on the row key. If this returns true, the entire row will be excluded. If 087 * false, each KeyValue in the row will be passed to {@link #filterCell(Cell)} below. 088 * If {@link #filterAllRemaining()} returns true, then {@link #filterRowKey(Cell)} should 089 * also return true. 090 * 091 * Concrete implementers can signal a failure condition in their code by throwing an 092 * {@link IOException}. 093 * 094 * @param firstRowCell The first cell coming in the new row 095 * @return true, remove entire row, false, include the row (maybe). 096 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 097 */ 098 abstract public boolean filterRowKey(Cell firstRowCell) throws IOException; 099 100 /** 101 * If this returns true, the scan will terminate. 102 * 103 * Concrete implementers can signal a failure condition in their code by throwing an 104 * {@link IOException}. 105 * 106 * @return true to end scan, false to continue. 107 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 108 */ 109 abstract public boolean filterAllRemaining() throws IOException; 110 111 /** 112 * A way to filter based on the column family, column qualifier and/or the column value. Return 113 * code is described below. This allows filters to filter only certain number of columns, then 114 * terminate without matching ever column. 115 * 116 * If filterRowKey returns true, filterKeyValue needs to be consistent with it. 117 * 118 * filterKeyValue can assume that filterRowKey has already been called for the row. 119 * 120 * If your filter returns <code>ReturnCode.NEXT_ROW</code>, it should return 121 * <code>ReturnCode.NEXT_ROW</code> until {@link #reset()} is called just in case the caller calls 122 * for the next row. 123 * 124 * Concrete implementers can signal a failure condition in their code by throwing an 125 * {@link IOException}. 126 * 127 * @param c the Cell in question 128 * @return code as described below, Filter.ReturnCode.INCLUDE by default 129 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 130 * @see Filter.ReturnCode 131 * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. 132 * Instead use filterCell(Cell) 133 */ 134 @Deprecated 135 public ReturnCode filterKeyValue(final Cell c) throws IOException { 136 return Filter.ReturnCode.INCLUDE; 137 } 138 139 /** 140 * A way to filter based on the column family, column qualifier and/or the column value. Return 141 * code is described below. This allows filters to filter only certain number of columns, then 142 * terminate without matching ever column. 143 * 144 * If filterRowKey returns true, filterCell needs to be consistent with it. 145 * 146 * filterCell can assume that filterRowKey has already been called for the row. 147 * 148 * If your filter returns <code>ReturnCode.NEXT_ROW</code>, it should return 149 * <code>ReturnCode.NEXT_ROW</code> until {@link #reset()} is called just in case the caller calls 150 * for the next row. 151 * 152 * Concrete implementers can signal a failure condition in their code by throwing an 153 * {@link IOException}. 154 * 155 * @param c the Cell in question 156 * @return code as described below 157 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 158 * @see Filter.ReturnCode 159 */ 160 public ReturnCode filterCell(final Cell c) throws IOException{ 161 return filterKeyValue(c); 162 } 163 164 /** 165 * Give the filter a chance to transform the passed KeyValue. If the Cell is changed a new 166 * Cell object must be returned. 167 * 168 * @see org.apache.hadoop.hbase.KeyValue#shallowCopy() 169 * The transformed KeyValue is what is eventually returned to the client. Most filters will 170 * return the passed KeyValue unchanged. 171 * @see org.apache.hadoop.hbase.filter.KeyOnlyFilter#transformCell(Cell) for an example of a 172 * transformation. 173 * 174 * Concrete implementers can signal a failure condition in their code by throwing an 175 * {@link IOException}. 176 * 177 * @param v the KeyValue in question 178 * @return the changed KeyValue 179 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 180 */ 181 abstract public Cell transformCell(final Cell v) throws IOException; 182 183 /** 184 * Return codes for filterValue(). 185 */ 186 @InterfaceAudience.Public 187 public enum ReturnCode { 188 /** 189 * Include the Cell 190 */ 191 INCLUDE, 192 /** 193 * Include the Cell and seek to the next column skipping older versions. 194 */ 195 INCLUDE_AND_NEXT_COL, 196 /** 197 * Skip this Cell 198 */ 199 SKIP, 200 /** 201 * Skip this column. Go to the next column in this row. 202 */ 203 NEXT_COL, 204 /** 205 * Seek to next row in current family. It may still pass a cell whose family is different but 206 * row is the same as previous cell to {@link #filterCell(Cell)} , even if we get a NEXT_ROW 207 * returned for previous cell. For more details see HBASE-18368. <br> 208 * Once reset() method was invoked, then we switch to the next row for all family, and you can 209 * catch the event by invoking CellUtils.matchingRows(previousCell, currentCell). <br> 210 * Note that filterRow() will still be called. <br> 211 */ 212 NEXT_ROW, 213 /** 214 * Seek to next key which is given as hint by the filter. 215 */ 216 SEEK_NEXT_USING_HINT, 217 /** 218 * Include KeyValue and done with row, seek to next. See NEXT_ROW. 219 */ 220 INCLUDE_AND_SEEK_NEXT_ROW, 221} 222 223 /** 224 * Chance to alter the list of Cells to be submitted. Modifications to the list will carry on 225 * 226 * Concrete implementers can signal a failure condition in their code by throwing an 227 * {@link IOException}. 228 * 229 * @param kvs the list of Cells to be filtered 230 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 231 */ 232 abstract public void filterRowCells(List<Cell> kvs) throws IOException; 233 234 /** 235 * Primarily used to check for conflicts with scans(such as scans that do not read a full row at a 236 * time). 237 * 238 * @return True if this filter actively uses filterRowCells(List) or filterRow(). 239 */ 240 abstract public boolean hasFilterRow(); 241 242 /** 243 * Last chance to veto row based on previous {@link #filterCell(Cell)} calls. The filter 244 * needs to retain state then return a particular value for this call if they wish to exclude a 245 * row if a certain column is missing (for example). 246 * 247 * Concrete implementers can signal a failure condition in their code by throwing an 248 * {@link IOException}. 249 * 250 * @return true to exclude row, false to include row. 251 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 252 */ 253 abstract public boolean filterRow() throws IOException; 254 255 /** 256 * If the filter returns the match code SEEK_NEXT_USING_HINT, then it should also tell which is 257 * the next key it must seek to. After receiving the match code SEEK_NEXT_USING_HINT, the 258 * QueryMatcher would call this function to find out which key it must next seek to. 259 * 260 * Concrete implementers can signal a failure condition in their code by throwing an 261 * {@link IOException}. 262 * 263 * @return KeyValue which must be next seeked. return null if the filter is not sure which key to 264 * seek to next. 265 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 266 */ 267 abstract public Cell getNextCellHint(final Cell currentCell) throws IOException; 268 269 /** 270 * Check that given column family is essential for filter to check row. Most filters always return 271 * true here. But some could have more sophisticated logic which could significantly reduce 272 * scanning process by not even touching columns until we are 100% sure that it's data is needed 273 * in result. 274 * 275 * Concrete implementers can signal a failure condition in their code by throwing an 276 * {@link IOException}. 277 * 278 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 279 */ 280 abstract public boolean isFamilyEssential(byte[] name) throws IOException; 281 282 /** 283 * TODO: JAVADOC 284 * 285 * Concrete implementers can signal a failure condition in their code by throwing an 286 * {@link IOException}. 287 * 288 * @return The filter serialized using pb 289 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 290 */ 291 abstract public byte[] toByteArray() throws IOException; 292 293 /** 294 * 295 * Concrete implementers can signal a failure condition in their code by throwing an 296 * {@link IOException}. 297 * 298 * @param pbBytes A pb serialized {@link Filter} instance 299 * @return An instance of {@link Filter} made from <code>bytes</code> 300 * @throws DeserializationException 301 * @see #toByteArray 302 */ 303 public static Filter parseFrom(final byte [] pbBytes) throws DeserializationException { 304 throw new DeserializationException( 305 "parseFrom called on base Filter, but should be called on derived type"); 306 } 307 308 /** 309 * Concrete implementers can signal a failure condition in their code by throwing an 310 * {@link IOException}. 311 * 312 * @param other 313 * @return true if and only if the fields of the filter that are serialized are equal to the 314 * corresponding fields in other. Used for testing. 315 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 316 */ 317 abstract boolean areSerializedFieldsEqual(Filter other); 318 319 /** 320 * alter the reversed scan flag 321 * @param reversed flag 322 */ 323 public void setReversed(boolean reversed) { 324 this.reversed = reversed; 325 } 326 327 public boolean isReversed() { 328 return this.reversed; 329 } 330}