001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.filter; 019 020import java.io.IOException; 021import java.util.List; 022import org.apache.hadoop.hbase.Cell; 023import org.apache.hadoop.hbase.exceptions.DeserializationException; 024import org.apache.yetus.audience.InterfaceAudience; 025 026/** 027 * Interface for row and column filters directly applied within the regionserver. A filter can 028 * expect the following call sequence: 029 * <ul> 030 * <li>{@link #reset()} : reset the filter state before filtering a new row.</li> 031 * <li>{@link #filterAllRemaining()}: true means row scan is over; false means keep going.</li> 032 * <li>{@link #filterRowKey(Cell)}: true means drop this row; false means include.</li> 033 * <li>{@link #filterCell(Cell)}: decides whether to include or exclude this Cell. See 034 * {@link ReturnCode}.</li> 035 * <li>{@link #transformCell(Cell)}: if the Cell is included, let the filter transform the Cell. 036 * </li> 037 * <li>{@link #filterRowCells(List)}: allows direct modification of the final list to be submitted 038 * <li>{@link #filterRow()}: last chance to drop entire row based on the sequence of filter calls. 039 * Eg: filter a row if it doesn't contain a specified column.</li> 040 * </ul> 041 * Filter instances are created one per region/scan. This abstract class replaces the old 042 * RowFilterInterface. When implementing your own filters, consider inheriting {@link FilterBase} to 043 * help you reduce boilerplate. 044 * @see FilterBase 045 */ 046@InterfaceAudience.Public 047public abstract class Filter { 048 protected transient boolean reversed; 049 050 /** 051 * Reset the state of the filter between rows. Concrete implementers can signal a failure 052 * condition in their code by throwing an {@link IOException}. 053 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 054 */ 055 abstract public void reset() throws IOException; 056 057 /** 058 * Filters a row based on the row key. If this returns true, the entire row will be excluded. If 059 * false, each KeyValue in the row will be passed to {@link #filterCell(Cell)} below. If 060 * {@link #filterAllRemaining()} returns true, then {@link #filterRowKey(Cell)} should also return 061 * true. Concrete implementers can signal a failure condition in their code by throwing an 062 * {@link IOException}. 063 * @param firstRowCell The first cell coming in the new row 064 * @return true, remove entire row, false, include the row (maybe). 065 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 066 */ 067 abstract public boolean filterRowKey(Cell firstRowCell) throws IOException; 068 069 /** 070 * If this returns true, the scan will terminate. Concrete implementers can signal a failure 071 * condition in their code by throwing an {@link IOException}. 072 * @return true to end scan, false to continue. 073 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 074 */ 075 abstract public boolean filterAllRemaining() throws IOException; 076 077 /** 078 * A way to filter based on the column family, column qualifier and/or the column value. Return 079 * code is described below. This allows filters to filter only certain number of columns, then 080 * terminate without matching ever column. If filterRowKey returns true, filterCell needs to be 081 * consistent with it. filterCell can assume that filterRowKey has already been called for the 082 * row. If your filter returns <code>ReturnCode.NEXT_ROW</code>, it should return 083 * <code>ReturnCode.NEXT_ROW</code> until {@link #reset()} is called just in case the caller calls 084 * for the next row. Concrete implementers can signal a failure condition in their code by 085 * throwing an {@link IOException}. 086 * @param c the Cell in question 087 * @return code as described below 088 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 089 * @see Filter.ReturnCode 090 */ 091 public ReturnCode filterCell(final Cell c) throws IOException { 092 return ReturnCode.INCLUDE; 093 } 094 095 /** 096 * Give the filter a chance to transform the passed Cell. If the Cell is changed a new Cell object 097 * must be returned. 098 * <p/> 099 * <strong>NOTICE:</strong> Filter will be evaluate at server side so the returned {@link Cell} 100 * must be an {@link org.apache.hadoop.hbase.ExtendedCell}, although it is marked as IA.Private. 101 * @see org.apache.hadoop.hbase.KeyValue#shallowCopy() The transformed KeyValue is what is 102 * eventually returned to the client. Most filters will return the passed KeyValue unchanged. 103 * @see org.apache.hadoop.hbase.filter.KeyOnlyFilter#transformCell(Cell) for an example of a 104 * transformation. Concrete implementers can signal a failure condition in their code by 105 * throwing an {@link IOException}. 106 * @param v the Cell in question 107 * @return the changed Cell 108 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 109 */ 110 abstract public Cell transformCell(final Cell v) throws IOException; 111 112 /** 113 * Return codes for filterValue(). 114 */ 115 @InterfaceAudience.Public 116 public enum ReturnCode { 117 /** 118 * Include the Cell 119 */ 120 INCLUDE, 121 /** 122 * Include the Cell and seek to the next column skipping older versions. 123 */ 124 INCLUDE_AND_NEXT_COL, 125 /** 126 * Skip this Cell 127 */ 128 SKIP, 129 /** 130 * Skip this column. Go to the next column in this row. 131 */ 132 NEXT_COL, 133 /** 134 * Seek to next row in current family. It may still pass a cell whose family is different but 135 * row is the same as previous cell to {@link #filterCell(Cell)} , even if we get a NEXT_ROW 136 * returned for previous cell. For more details see HBASE-18368. <br> 137 * Once reset() method was invoked, then we switch to the next row for all family, and you can 138 * catch the event by invoking CellUtils.matchingRows(previousCell, currentCell). <br> 139 * Note that filterRow() will still be called. <br> 140 */ 141 NEXT_ROW, 142 /** 143 * Seek to next key which is given as hint by the filter. 144 */ 145 SEEK_NEXT_USING_HINT, 146 /** 147 * Include KeyValue and done with row, seek to next. See NEXT_ROW. 148 */ 149 INCLUDE_AND_SEEK_NEXT_ROW, 150 } 151 152 /** 153 * Chance to alter the list of Cells to be submitted. Modifications to the list will carry on 154 * Concrete implementers can signal a failure condition in their code by throwing an 155 * {@link IOException}. 156 * @param kvs the list of Cells to be filtered 157 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 158 */ 159 abstract public void filterRowCells(List<Cell> kvs) throws IOException; 160 161 /** 162 * Primarily used to check for conflicts with scans(such as scans that do not read a full row at a 163 * time). 164 * @return True if this filter actively uses filterRowCells(List) or filterRow(). 165 */ 166 abstract public boolean hasFilterRow(); 167 168 /** 169 * Last chance to veto row based on previous {@link #filterCell(Cell)} calls. The filter needs to 170 * retain state then return a particular value for this call if they wish to exclude a row if a 171 * certain column is missing (for example). Concrete implementers can signal a failure condition 172 * in their code by throwing an {@link IOException}. 173 * @return true to exclude row, false to include row. 174 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 175 */ 176 abstract public boolean filterRow() throws IOException; 177 178 /** 179 * If the filter returns the match code SEEK_NEXT_USING_HINT, then it should also tell which is 180 * the next key it must seek to. After receiving the match code SEEK_NEXT_USING_HINT, the 181 * QueryMatcher would call this function to find out which key it must next seek to. Concrete 182 * implementers can signal a failure condition in their code by throwing an {@link IOException}. 183 * <strong>NOTICE:</strong> Filter will be evaluate at server side so the returned {@link Cell} 184 * must be an {@link org.apache.hadoop.hbase.ExtendedCell}, although it is marked as IA.Private. 185 * @return KeyValue which must be next seeked. return null if the filter is not sure which key to 186 * seek to next. 187 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 188 */ 189 abstract public Cell getNextCellHint(final Cell currentCell) throws IOException; 190 191 /** 192 * Check that given column family is essential for filter to check row. Most filters always return 193 * true here. But some could have more sophisticated logic which could significantly reduce 194 * scanning process by not even touching columns until we are 100% sure that it's data is needed 195 * in result. Concrete implementers can signal a failure condition in their code by throwing an 196 * {@link IOException}. 197 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 198 */ 199 abstract public boolean isFamilyEssential(byte[] name) throws IOException; 200 201 /** 202 * TODO: JAVADOC Concrete implementers can signal a failure condition in their code by throwing an 203 * {@link IOException}. 204 * @return The filter serialized using pb 205 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 206 */ 207 abstract public byte[] toByteArray() throws IOException; 208 209 /** 210 * Concrete implementers can signal a failure condition in their code by throwing an 211 * {@link IOException}. 212 * @param pbBytes A pb serialized {@link Filter} instance 213 * @return An instance of {@link Filter} made from <code>bytes</code> 214 * @throws DeserializationException if an error occurred 215 * @see #toByteArray 216 */ 217 public static Filter parseFrom(final byte[] pbBytes) throws DeserializationException { 218 throw new DeserializationException( 219 "parseFrom called on base Filter, but should be called on derived type"); 220 } 221 222 /** 223 * Concrete implementers can signal a failure condition in their code by throwing an 224 * {@link IOException}. 225 * @return true if and only if the fields of the filter that are serialized are equal to the 226 * corresponding fields in other. Used for testing. 227 */ 228 abstract boolean areSerializedFieldsEqual(Filter other); 229 230 /** 231 * alter the reversed scan flag 232 * @param reversed flag 233 */ 234 public void setReversed(boolean reversed) { 235 this.reversed = reversed; 236 } 237 238 public boolean isReversed() { 239 return this.reversed; 240 } 241}