001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.filter; 019 020import java.io.IOException; 021import java.util.List; 022import org.apache.hadoop.hbase.Cell; 023import org.apache.hadoop.hbase.exceptions.DeserializationException; 024import org.apache.yetus.audience.InterfaceAudience; 025 026/** 027 * Interface for row and column filters directly applied within the regionserver. A filter can 028 * expect the following call sequence: 029 * <ul> 030 * <li>{@link #reset()} : reset the filter state before filtering a new row.</li> 031 * <li>{@link #filterAllRemaining()}: true means row scan is over; false means keep going.</li> 032 * <li>{@link #filterRowKey(Cell)}: true means drop this row; false means include.</li> 033 * <li>{@link #filterCell(Cell)}: decides whether to include or exclude this Cell. See 034 * {@link ReturnCode}.</li> 035 * <li>{@link #transformCell(Cell)}: if the Cell is included, let the filter transform the Cell. 036 * </li> 037 * <li>{@link #filterRowCells(List)}: allows direct modification of the final list to be submitted 038 * <li>{@link #filterRow()}: last chance to drop entire row based on the sequence of filter calls. 039 * Eg: filter a row if it doesn't contain a specified column.</li> 040 * </ul> 041 * Filter instances are created one per region/scan. This abstract class replaces the old 042 * RowFilterInterface. When implementing your own filters, consider inheriting {@link FilterBase} to 043 * help you reduce boilerplate. 044 * @see FilterBase 045 */ 046@InterfaceAudience.Public 047public abstract class Filter { 048 protected transient boolean reversed; 049 050 /** 051 * Reset the state of the filter between rows. Concrete implementers can signal a failure 052 * condition in their code by throwing an {@link IOException}. 053 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 054 */ 055 abstract public void reset() throws IOException; 056 057 /** 058 * Filters a row based on the row key. If this returns true, the entire row will be excluded. If 059 * false, each KeyValue in the row will be passed to {@link #filterCell(Cell)} below. If 060 * {@link #filterAllRemaining()} returns true, then {@link #filterRowKey(Cell)} should also return 061 * true. Concrete implementers can signal a failure condition in their code by throwing an 062 * {@link IOException}. 063 * @param firstRowCell The first cell coming in the new row 064 * @return true, remove entire row, false, include the row (maybe). 065 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 066 */ 067 abstract public boolean filterRowKey(Cell firstRowCell) throws IOException; 068 069 /** 070 * If this returns true, the scan will terminate. Concrete implementers can signal a failure 071 * condition in their code by throwing an {@link IOException}. 072 * @return true to end scan, false to continue. 073 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 074 */ 075 abstract public boolean filterAllRemaining() throws IOException; 076 077 /** 078 * A way to filter based on the column family, column qualifier and/or the column value. Return 079 * code is described below. This allows filters to filter only certain number of columns, then 080 * terminate without matching ever column. If filterRowKey returns true, filterCell needs to be 081 * consistent with it. filterCell can assume that filterRowKey has already been called for the 082 * row. If your filter returns <code>ReturnCode.NEXT_ROW</code>, it should return 083 * <code>ReturnCode.NEXT_ROW</code> until {@link #reset()} is called just in case the caller calls 084 * for the next row. Concrete implementers can signal a failure condition in their code by 085 * throwing an {@link IOException}. 086 * @param c the Cell in question 087 * @return code as described below 088 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 089 * @see Filter.ReturnCode 090 */ 091 public ReturnCode filterCell(final Cell c) throws IOException { 092 return ReturnCode.INCLUDE; 093 } 094 095 /** 096 * Give the filter a chance to transform the passed KeyValue. If the Cell is changed a new Cell 097 * object must be returned. 098 * @see org.apache.hadoop.hbase.KeyValue#shallowCopy() The transformed KeyValue is what is 099 * eventually returned to the client. Most filters will return the passed KeyValue unchanged. 100 * @see org.apache.hadoop.hbase.filter.KeyOnlyFilter#transformCell(Cell) for an example of a 101 * transformation. Concrete implementers can signal a failure condition in their code by 102 * throwing an {@link IOException}. 103 * @param v the KeyValue in question 104 * @return the changed KeyValue 105 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 106 */ 107 abstract public Cell transformCell(final Cell v) throws IOException; 108 109 /** 110 * Return codes for filterValue(). 111 */ 112 @InterfaceAudience.Public 113 public enum ReturnCode { 114 /** 115 * Include the Cell 116 */ 117 INCLUDE, 118 /** 119 * Include the Cell and seek to the next column skipping older versions. 120 */ 121 INCLUDE_AND_NEXT_COL, 122 /** 123 * Skip this Cell 124 */ 125 SKIP, 126 /** 127 * Skip this column. Go to the next column in this row. 128 */ 129 NEXT_COL, 130 /** 131 * Seek to next row in current family. It may still pass a cell whose family is different but 132 * row is the same as previous cell to {@link #filterCell(Cell)} , even if we get a NEXT_ROW 133 * returned for previous cell. For more details see HBASE-18368. <br> 134 * Once reset() method was invoked, then we switch to the next row for all family, and you can 135 * catch the event by invoking CellUtils.matchingRows(previousCell, currentCell). <br> 136 * Note that filterRow() will still be called. <br> 137 */ 138 NEXT_ROW, 139 /** 140 * Seek to next key which is given as hint by the filter. 141 */ 142 SEEK_NEXT_USING_HINT, 143 /** 144 * Include KeyValue and done with row, seek to next. See NEXT_ROW. 145 */ 146 INCLUDE_AND_SEEK_NEXT_ROW, 147 } 148 149 /** 150 * Chance to alter the list of Cells to be submitted. Modifications to the list will carry on 151 * Concrete implementers can signal a failure condition in their code by throwing an 152 * {@link IOException}. 153 * @param kvs the list of Cells to be filtered 154 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 155 */ 156 abstract public void filterRowCells(List<Cell> kvs) throws IOException; 157 158 /** 159 * Primarily used to check for conflicts with scans(such as scans that do not read a full row at a 160 * time). 161 * @return True if this filter actively uses filterRowCells(List) or filterRow(). 162 */ 163 abstract public boolean hasFilterRow(); 164 165 /** 166 * Last chance to veto row based on previous {@link #filterCell(Cell)} calls. The filter needs to 167 * retain state then return a particular value for this call if they wish to exclude a row if a 168 * certain column is missing (for example). Concrete implementers can signal a failure condition 169 * in their code by throwing an {@link IOException}. 170 * @return true to exclude row, false to include row. 171 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 172 */ 173 abstract public boolean filterRow() throws IOException; 174 175 /** 176 * If the filter returns the match code SEEK_NEXT_USING_HINT, then it should also tell which is 177 * the next key it must seek to. After receiving the match code SEEK_NEXT_USING_HINT, the 178 * QueryMatcher would call this function to find out which key it must next seek to. Concrete 179 * implementers can signal a failure condition in their code by throwing an {@link IOException}. 180 * @return KeyValue which must be next seeked. return null if the filter is not sure which key to 181 * seek to next. 182 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 183 */ 184 abstract public Cell getNextCellHint(final Cell currentCell) throws IOException; 185 186 /** 187 * Check that given column family is essential for filter to check row. Most filters always return 188 * true here. But some could have more sophisticated logic which could significantly reduce 189 * scanning process by not even touching columns until we are 100% sure that it's data is needed 190 * in result. Concrete implementers can signal a failure condition in their code by throwing an 191 * {@link IOException}. 192 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 193 */ 194 abstract public boolean isFamilyEssential(byte[] name) throws IOException; 195 196 /** 197 * TODO: JAVADOC Concrete implementers can signal a failure condition in their code by throwing an 198 * {@link IOException}. 199 * @return The filter serialized using pb 200 * @throws IOException in case an I/O or an filter specific failure needs to be signaled. 201 */ 202 abstract public byte[] toByteArray() throws IOException; 203 204 /** 205 * Concrete implementers can signal a failure condition in their code by throwing an 206 * {@link IOException}. 207 * @param pbBytes A pb serialized {@link Filter} instance 208 * @return An instance of {@link Filter} made from <code>bytes</code> 209 * @throws DeserializationException if an error occurred 210 * @see #toByteArray 211 */ 212 public static Filter parseFrom(final byte[] pbBytes) throws DeserializationException { 213 throw new DeserializationException( 214 "parseFrom called on base Filter, but should be called on derived type"); 215 } 216 217 /** 218 * Concrete implementers can signal a failure condition in their code by throwing an 219 * {@link IOException}. 220 * @return true if and only if the fields of the filter that are serialized are equal to the 221 * corresponding fields in other. Used for testing. 222 */ 223 abstract boolean areSerializedFieldsEqual(Filter other); 224 225 /** 226 * alter the reversed scan flag 227 * @param reversed flag 228 */ 229 public void setReversed(boolean reversed) { 230 this.reversed = reversed; 231 } 232 233 public boolean isReversed() { 234 return this.reversed; 235 } 236}