001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.filter; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Collections; 023import java.util.List; 024import java.util.Objects; 025import org.apache.hadoop.hbase.Cell; 026import org.apache.hadoop.hbase.CellUtil; 027import org.apache.yetus.audience.InterfaceAudience; 028 029/** 030 * FilterListWithOR represents an ordered list of filters which will be evaluated with an OR 031 * operator. 032 */ 033@InterfaceAudience.Private 034public class FilterListWithOR extends FilterListBase { 035 036 /** 037 * Save previous return code and previous cell for every filter in filter list. For MUST_PASS_ONE, 038 * we use the previous return code to decide whether we should pass current cell encountered to 039 * the filter. For MUST_PASS_ALL, the two list are meaningless. 040 */ 041 private List<ReturnCode> prevFilterRCList = null; 042 private List<Cell> prevCellList = null; 043 044 public FilterListWithOR(List<Filter> filters) { 045 super(filters); 046 prevFilterRCList = new ArrayList<>(Collections.nCopies(filters.size(), null)); 047 prevCellList = new ArrayList<>(Collections.nCopies(filters.size(), null)); 048 subFiltersIncludedCell = new ArrayList<>(Collections.nCopies(filters.size(), false)); 049 } 050 051 @Override 052 public void addFilterLists(List<Filter> filters) { 053 if (checkAndGetReversed(filters, isReversed()) != isReversed()) { 054 throw new IllegalArgumentException("Filters in the list must have the same reversed flag"); 055 } 056 this.filters.addAll(filters); 057 this.subFiltersIncludedCell.addAll(Collections.nCopies(filters.size(), false)); 058 this.prevFilterRCList.addAll(Collections.nCopies(filters.size(), null)); 059 this.prevCellList.addAll(Collections.nCopies(filters.size(), null)); 060 } 061 062 @Override 063 protected String formatLogFilters(List<Filter> logFilters) { 064 return String.format("FilterList OR (%d/%d): %s", logFilters.size(), this.size(), 065 logFilters.toString()); 066 } 067 068 /** 069 * For MUST_PASS_ONE, we cannot make sure that when filter-A in filter list return NEXT_COL then 070 * the next cell passing to filterList will be the first cell in next column, because if filter-B 071 * in filter list return SKIP, then the filter list will return SKIP. In this case, we should pass 072 * the cell following the previous cell, and it's possible that the next cell has the same column 073 * as the previous cell even if filter-A has NEXT_COL returned for the previous cell. So we should 074 * save the previous cell and the return code list when checking previous cell for every filter in 075 * filter list, and verify if currentCell fit the previous return code, if fit then pass the 076 * currentCell to the corresponding filter. (HBASE-17678) <br> 077 * Note that: In StoreScanner level, NEXT_ROW will skip to the next row in current family, and in 078 * RegionScanner level, NEXT_ROW will skip to the next row in current family and switch to the 079 * next family for RegionScanner, INCLUDE_AND_NEXT_ROW is the same. so we should pass current cell 080 * to the filter, if row mismatch or row match but column family mismatch. (HBASE-18368) 081 * @see org.apache.hadoop.hbase.filter.Filter.ReturnCode 082 * @param subFilter which sub-filter to calculate the return code by using previous cell and 083 * previous return code. 084 * @param prevCell the previous cell passed to given sub-filter. 085 * @param currentCell the current cell which will pass to given sub-filter. 086 * @param prevCode the previous return code for given sub-filter. 087 * @return return code calculated by using previous cell and previous return code. null means can 088 * not decide which return code should return, so we will pass the currentCell to 089 * subFilter for getting currentCell's return code, and it won't impact the sub-filter's 090 * internal states. 091 */ 092 private ReturnCode calculateReturnCodeByPrevCellAndRC(Filter subFilter, Cell currentCell, 093 Cell prevCell, ReturnCode prevCode) throws IOException { 094 if (prevCell == null || prevCode == null) { 095 return null; 096 } 097 switch (prevCode) { 098 case INCLUDE: 099 case SKIP: 100 return null; 101 case SEEK_NEXT_USING_HINT: 102 Cell nextHintCell = subFilter.getNextCellHint(prevCell); 103 return nextHintCell != null && compareCell(currentCell, nextHintCell) < 0 104 ? ReturnCode.SEEK_NEXT_USING_HINT 105 : null; 106 case NEXT_COL: 107 case INCLUDE_AND_NEXT_COL: 108 // Once row changed, reset() will clear prevCells, so we need not to compare their rows 109 // because rows are the same here. 110 return CellUtil.matchingColumn(prevCell, currentCell) ? ReturnCode.NEXT_COL : null; 111 case NEXT_ROW: 112 case INCLUDE_AND_SEEK_NEXT_ROW: 113 // As described above, rows are definitely the same, so we only compare the family. 114 return CellUtil.matchingFamily(prevCell, currentCell) ? ReturnCode.NEXT_ROW : null; 115 default: 116 throw new IllegalStateException("Received code is not valid."); 117 } 118 } 119 120 /** 121 * FilterList with MUST_PASS_ONE choose the minimal forward step among sub-filter in filter list. 122 * Let's call it: The Minimal Step Rule. So if filter-A in filter list return INCLUDE and filter-B 123 * in filter list return INCLUDE_AND_NEXT_COL, then the filter list should return INCLUDE. For 124 * SEEK_NEXT_USING_HINT, it's more special, because we do not know how far it will forward, so we 125 * use SKIP by default.<br/> 126 * <br/> 127 * The jump step will be: 128 * 129 * <pre> 130 * INCLUDE < SKIP < INCLUDE_AND_NEXT_COL < NEXT_COL < INCLUDE_AND_SEEK_NEXT_ROW < NEXT_ROW 131 * < SEEK_NEXT_USING_HINT 132 * </pre> 133 * 134 * Here, we have the following map to describe The Minimal Step Rule. if current return code (for 135 * previous sub-filters in filter list) is <strong>ReturnCode</strong>, and current filter returns 136 * <strong>localRC</strong>, then we should return map[ReturnCode][localRC] for the merged result, 137 * according to The Minimal Step Rule.<br/> 138 * 139 * <pre> 140 * LocalCode\ReturnCode INCLUDE INCLUDE_AND_NEXT_COL INCLUDE_AND_SEEK_NEXT_ROW SKIP NEXT_COL NEXT_ROW SEEK_NEXT_USING_HINT 141 * INCLUDE INCLUDE INCLUDE INCLUDE INCLUDE INCLUDE INCLUDE INCLUDE 142 * INCLUDE_AND_NEXT_COL INCLUDE INCLUDE_AND_NEXT_COL INCLUDE_AND_NEXT_COL INCLUDE INCLUDE_AND_NEXT_COL INCLUDE_AND_NEXT_COL INCLUDE 143 * INCLUDE_AND_SEEK_NEXT_ROW INCLUDE INCLUDE_AND_NEXT_COL INCLUDE_AND_SEEK_NEXT_ROW INCLUDE INCLUDE_AND_NEXT_COL INCLUDE_AND_SEEK_NEXT_ROW INCLUDE 144 * SKIP INCLUDE INCLUDE INCLUDE SKIP SKIP SKIP SKIP 145 * NEXT_COL INCLUDE INCLUDE_AND_NEXT_COL INCLUDE_AND_NEXT_COL SKIP NEXT_COL NEXT_COL SKIP 146 * NEXT_ROW INCLUDE INCLUDE_AND_NEXT_COL INCLUDE_AND_SEEK_NEXT_ROW SKIP NEXT_COL NEXT_ROW SKIP 147 * SEEK_NEXT_USING_HINT INCLUDE INCLUDE INCLUDE SKIP SKIP SKIP SEEK_NEXT_USING_HINT 148 * </pre> 149 * 150 * @param rc Return code which is calculated by previous sub-filter(s) in filter list. 151 * @param localRC Return code of the current sub-filter in filter list. 152 * @return Return code which is merged by the return code of previous sub-filter(s) and the return 153 * code of current sub-filter. 154 */ 155 private ReturnCode mergeReturnCode(ReturnCode rc, ReturnCode localRC) { 156 if (rc == null) return localRC; 157 switch (localRC) { 158 case INCLUDE: 159 return ReturnCode.INCLUDE; 160 case INCLUDE_AND_NEXT_COL: 161 if ( 162 isInReturnCodes(rc, ReturnCode.INCLUDE, ReturnCode.SKIP, ReturnCode.SEEK_NEXT_USING_HINT) 163 ) { 164 return ReturnCode.INCLUDE; 165 } 166 if ( 167 isInReturnCodes(rc, ReturnCode.INCLUDE_AND_NEXT_COL, ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW, 168 ReturnCode.NEXT_COL, ReturnCode.NEXT_ROW) 169 ) { 170 return ReturnCode.INCLUDE_AND_NEXT_COL; 171 } 172 break; 173 case INCLUDE_AND_SEEK_NEXT_ROW: 174 if ( 175 isInReturnCodes(rc, ReturnCode.INCLUDE, ReturnCode.SKIP, ReturnCode.SEEK_NEXT_USING_HINT) 176 ) { 177 return ReturnCode.INCLUDE; 178 } 179 if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_NEXT_COL, ReturnCode.NEXT_COL)) { 180 return ReturnCode.INCLUDE_AND_NEXT_COL; 181 } 182 if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW, ReturnCode.NEXT_ROW)) { 183 return ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW; 184 } 185 break; 186 case SKIP: 187 if ( 188 isInReturnCodes(rc, ReturnCode.INCLUDE, ReturnCode.INCLUDE_AND_NEXT_COL, 189 ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW) 190 ) { 191 return ReturnCode.INCLUDE; 192 } 193 if ( 194 isInReturnCodes(rc, ReturnCode.SKIP, ReturnCode.NEXT_COL, ReturnCode.NEXT_ROW, 195 ReturnCode.SEEK_NEXT_USING_HINT) 196 ) { 197 return ReturnCode.SKIP; 198 } 199 break; 200 case NEXT_COL: 201 if (isInReturnCodes(rc, ReturnCode.INCLUDE)) { 202 return ReturnCode.INCLUDE; 203 } 204 if (isInReturnCodes(rc, ReturnCode.NEXT_COL, ReturnCode.NEXT_ROW)) { 205 return ReturnCode.NEXT_COL; 206 } 207 if ( 208 isInReturnCodes(rc, ReturnCode.INCLUDE_AND_NEXT_COL, ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW) 209 ) { 210 return ReturnCode.INCLUDE_AND_NEXT_COL; 211 } 212 if (isInReturnCodes(rc, ReturnCode.SKIP, ReturnCode.SEEK_NEXT_USING_HINT)) { 213 return ReturnCode.SKIP; 214 } 215 break; 216 case NEXT_ROW: 217 if (isInReturnCodes(rc, ReturnCode.INCLUDE)) { 218 return ReturnCode.INCLUDE; 219 } 220 if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_NEXT_COL)) { 221 return ReturnCode.INCLUDE_AND_NEXT_COL; 222 } 223 if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW)) { 224 return ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW; 225 } 226 if (isInReturnCodes(rc, ReturnCode.SKIP, ReturnCode.SEEK_NEXT_USING_HINT)) { 227 return ReturnCode.SKIP; 228 } 229 if (isInReturnCodes(rc, ReturnCode.NEXT_COL)) { 230 return ReturnCode.NEXT_COL; 231 } 232 if (isInReturnCodes(rc, ReturnCode.NEXT_ROW)) { 233 return ReturnCode.NEXT_ROW; 234 } 235 break; 236 case SEEK_NEXT_USING_HINT: 237 if ( 238 isInReturnCodes(rc, ReturnCode.INCLUDE, ReturnCode.INCLUDE_AND_NEXT_COL, 239 ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW) 240 ) { 241 return ReturnCode.INCLUDE; 242 } 243 if (isInReturnCodes(rc, ReturnCode.SKIP, ReturnCode.NEXT_COL, ReturnCode.NEXT_ROW)) { 244 return ReturnCode.SKIP; 245 } 246 if (isInReturnCodes(rc, ReturnCode.SEEK_NEXT_USING_HINT)) { 247 return ReturnCode.SEEK_NEXT_USING_HINT; 248 } 249 break; 250 } 251 throw new IllegalStateException( 252 "Received code is not valid. rc: " + rc + ", localRC: " + localRC); 253 } 254 255 private void updatePrevFilterRCList(int index, ReturnCode currentRC) { 256 prevFilterRCList.set(index, currentRC); 257 } 258 259 private void updatePrevCellList(int index, Cell currentCell, ReturnCode currentRC) { 260 if (currentCell == null || currentRC == ReturnCode.INCLUDE || currentRC == ReturnCode.SKIP) { 261 // If previous return code is INCLUDE or SKIP, we should always pass the next cell to the 262 // corresponding sub-filter(need not test calculateReturnCodeByPrevCellAndRC() method), So we 263 // need not save current cell to prevCellList for saving heap memory. 264 prevCellList.set(index, null); 265 } else { 266 prevCellList.set(index, currentCell); 267 } 268 } 269 270 @Override 271 public ReturnCode filterCell(Cell c) throws IOException { 272 if (isEmpty()) { 273 return ReturnCode.INCLUDE; 274 } 275 ReturnCode rc = null; 276 for (int i = 0, n = filters.size(); i < n; i++) { 277 Filter filter = filters.get(i); 278 subFiltersIncludedCell.set(i, false); 279 280 Cell prevCell = this.prevCellList.get(i); 281 ReturnCode prevCode = this.prevFilterRCList.get(i); 282 if (filter.filterAllRemaining()) { 283 continue; 284 } 285 ReturnCode localRC = calculateReturnCodeByPrevCellAndRC(filter, c, prevCell, prevCode); 286 if (localRC == null) { 287 // Can not get return code based on previous cell and previous return code. In other words, 288 // we should pass the current cell to this sub-filter to get the return code, and it won't 289 // impact the sub-filter's internal state. 290 localRC = filter.filterCell(c); 291 } 292 293 // Update previous return code and previous cell for filter[i]. 294 updatePrevFilterRCList(i, localRC); 295 updatePrevCellList(i, c, localRC); 296 297 rc = mergeReturnCode(rc, localRC); 298 299 // For INCLUDE* case, we need to update the transformed cell. 300 if ( 301 isInReturnCodes(localRC, ReturnCode.INCLUDE, ReturnCode.INCLUDE_AND_NEXT_COL, 302 ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW) 303 ) { 304 subFiltersIncludedCell.set(i, true); 305 } 306 } 307 // Each sub-filter in filter list got true for filterAllRemaining(), if rc is null, so we should 308 // return SKIP. 309 return rc == null ? ReturnCode.SKIP : rc; 310 } 311 312 @Override 313 public void reset() throws IOException { 314 for (int i = 0, n = filters.size(); i < n; i++) { 315 filters.get(i).reset(); 316 subFiltersIncludedCell.set(i, false); 317 prevFilterRCList.set(i, null); 318 prevCellList.set(i, null); 319 } 320 } 321 322 @Override 323 public boolean filterRowKey(Cell firstRowCell) throws IOException { 324 if (isEmpty()) { 325 return super.filterRowKey(firstRowCell); 326 } 327 boolean retVal = true; 328 for (int i = 0, n = filters.size(); i < n; i++) { 329 Filter filter = filters.get(i); 330 if (!filter.filterAllRemaining() && !filter.filterRowKey(firstRowCell)) { 331 // Can't just return false here, because there are some filters (such as PrefixFilter) which 332 // will catch the row changed event by filterRowKey(). If we return early here, those 333 // filters will have no chance to update their row state. 334 retVal = false; 335 } 336 } 337 return retVal; 338 } 339 340 @Override 341 public boolean filterAllRemaining() throws IOException { 342 if (isEmpty()) { 343 return super.filterAllRemaining(); 344 } 345 for (int i = 0, n = filters.size(); i < n; i++) { 346 if (!filters.get(i).filterAllRemaining()) { 347 return false; 348 } 349 } 350 return true; 351 } 352 353 @Override 354 public boolean filterRow() throws IOException { 355 if (isEmpty()) { 356 return super.filterRow(); 357 } 358 for (int i = 0, n = filters.size(); i < n; i++) { 359 Filter filter = filters.get(i); 360 if (!filter.filterRow()) { 361 return false; 362 } 363 } 364 return true; 365 } 366 367 @Override 368 public Cell getNextCellHint(Cell currentCell) throws IOException { 369 if (isEmpty()) { 370 return super.getNextCellHint(currentCell); 371 } 372 Cell minKeyHint = null; 373 // If any condition can pass, we need to keep the min hint 374 for (int i = 0, n = filters.size(); i < n; i++) { 375 if (filters.get(i).filterAllRemaining()) { 376 continue; 377 } 378 Cell curKeyHint = filters.get(i).getNextCellHint(currentCell); 379 if (curKeyHint == null) { 380 // If we ever don't have a hint and this is must-pass-one, then no hint 381 return null; 382 } 383 // If this is the first hint we find, set it 384 if (minKeyHint == null) { 385 minKeyHint = curKeyHint; 386 continue; 387 } 388 if (this.compareCell(minKeyHint, curKeyHint) > 0) { 389 minKeyHint = curKeyHint; 390 } 391 } 392 return minKeyHint; 393 } 394 395 @Override 396 public boolean equals(Object obj) { 397 if (this == obj) { 398 return true; 399 } 400 if (!(obj instanceof FilterListWithOR)) { 401 return false; 402 } 403 FilterListWithOR f = (FilterListWithOR) obj; 404 return this.filters.equals(f.getFilters()) && this.prevFilterRCList.equals(f.prevFilterRCList) 405 && this.prevCellList.equals(f.prevCellList); 406 } 407 408 @Override 409 public int hashCode() { 410 return Objects.hash(this.prevFilterRCList, this.prevCellList, this.filters); 411 } 412}