001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.filter; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Collections; 023import java.util.List; 024import java.util.Objects; 025import org.apache.hadoop.hbase.Cell; 026import org.apache.hadoop.hbase.CellUtil; 027import org.apache.yetus.audience.InterfaceAudience; 028 029/** 030 * FilterListWithOR represents an ordered list of filters which will be evaluated with an OR 031 * operator. 032 */ 033@InterfaceAudience.Private 034public class FilterListWithOR extends FilterListBase { 035 036 /** 037 * Save previous return code and previous cell for every filter in filter list. For MUST_PASS_ONE, 038 * we use the previous return code to decide whether we should pass current cell encountered to 039 * the filter. For MUST_PASS_ALL, the two list are meaningless. 040 */ 041 private List<ReturnCode> prevFilterRCList = null; 042 private List<Cell> prevCellList = null; 043 044 public FilterListWithOR(List<Filter> filters) { 045 super(filters); 046 prevFilterRCList = new ArrayList<>(Collections.nCopies(filters.size(), null)); 047 prevCellList = new ArrayList<>(Collections.nCopies(filters.size(), null)); 048 subFiltersIncludedCell = new ArrayList<>(Collections.nCopies(filters.size(), false)); 049 } 050 051 @Override 052 public void addFilterLists(List<Filter> filters) { 053 if (checkAndGetReversed(filters, isReversed()) != isReversed()) { 054 throw new IllegalArgumentException("Filters in the list must have the same reversed flag"); 055 } 056 this.filters.addAll(filters); 057 this.subFiltersIncludedCell.addAll(Collections.nCopies(filters.size(), false)); 058 this.prevFilterRCList.addAll(Collections.nCopies(filters.size(), null)); 059 this.prevCellList.addAll(Collections.nCopies(filters.size(), null)); 060 } 061 062 @Override 063 protected String formatLogFilters(List<Filter> logFilters) { 064 return String.format("FilterList OR (%d/%d): %s", logFilters.size(), this.size(), 065 logFilters.toString()); 066 } 067 068 /** 069 * For MUST_PASS_ONE, we cannot make sure that when filter-A in filter list return NEXT_COL then 070 * the next cell passing to filterList will be the first cell in next column, because if filter-B 071 * in filter list return SKIP, then the filter list will return SKIP. In this case, we should pass 072 * the cell following the previous cell, and it's possible that the next cell has the same column 073 * as the previous cell even if filter-A has NEXT_COL returned for the previous cell. So we should 074 * save the previous cell and the return code list when checking previous cell for every filter in 075 * filter list, and verify if currentCell fit the previous return code, if fit then pass the 076 * currentCell to the corresponding filter. (HBASE-17678) <br> 077 * Note that: In StoreScanner level, NEXT_ROW will skip to the next row in current family, and in 078 * RegionScanner level, NEXT_ROW will skip to the next row in current family and switch to the 079 * next family for RegionScanner, INCLUDE_AND_NEXT_ROW is the same. so we should pass current cell 080 * to the filter, if row mismatch or row match but column family mismatch. (HBASE-18368) 081 * @see org.apache.hadoop.hbase.filter.Filter.ReturnCode 082 * @param subFilter which sub-filter to calculate the return code by using previous cell and 083 * previous return code. 084 * @param prevCell the previous cell passed to given sub-filter. 085 * @param currentCell the current cell which will pass to given sub-filter. 086 * @param prevCode the previous return code for given sub-filter. 087 * @return return code calculated by using previous cell and previous return code. null means can 088 * not decide which return code should return, so we will pass the currentCell to 089 * subFilter for getting currentCell's return code, and it won't impact the sub-filter's 090 * internal states. 091 */ 092 private ReturnCode calculateReturnCodeByPrevCellAndRC(Filter subFilter, Cell currentCell, 093 Cell prevCell, ReturnCode prevCode) throws IOException { 094 if (prevCell == null || prevCode == null) { 095 return null; 096 } 097 switch (prevCode) { 098 case INCLUDE: 099 case SKIP: 100 return null; 101 case SEEK_NEXT_USING_HINT: 102 Cell nextHintCell = subFilter.getNextCellHint(prevCell); 103 return nextHintCell != null && compareCell(currentCell, nextHintCell) < 0 104 ? ReturnCode.SEEK_NEXT_USING_HINT 105 : null; 106 case NEXT_COL: 107 case INCLUDE_AND_NEXT_COL: 108 // Once row changed, reset() will clear prevCells, so we need not to compare their rows 109 // because rows are the same here. 110 return CellUtil.matchingColumn(prevCell, currentCell) ? ReturnCode.NEXT_COL : null; 111 case NEXT_ROW: 112 case INCLUDE_AND_SEEK_NEXT_ROW: 113 // As described above, rows are definitely the same, so we only compare the family. 114 return CellUtil.matchingFamily(prevCell, currentCell) ? ReturnCode.NEXT_ROW : null; 115 default: 116 throw new IllegalStateException("Received code is not valid."); 117 } 118 } 119 120 /** 121 * FilterList with MUST_PASS_ONE choose the minimal forward step among sub-filter in filter list. 122 * Let's call it: The Minimal Step Rule. So if filter-A in filter list return INCLUDE and filter-B 123 * in filter list return INCLUDE_AND_NEXT_COL, then the filter list should return INCLUDE. For 124 * SEEK_NEXT_USING_HINT, it's more special, because we do not know how far it will forward, so we 125 * use SKIP by default.<br/> 126 * <br/> 127 * The jump step will be: 128 * 129 * <pre> 130 * INCLUDE < SKIP < INCLUDE_AND_NEXT_COL < NEXT_COL < INCLUDE_AND_SEEK_NEXT_ROW < NEXT_ROW 131 * < SEEK_NEXT_USING_HINT 132 * </pre> 133 * 134 * Here, we have the following map to describe The Minimal Step Rule. if current return code (for 135 * previous sub-filters in filter list) is <strong>ReturnCode</strong>, and current filter returns 136 * <strong>localRC</strong>, then we should return map[ReturnCode][localRC] for the merged result, 137 * according to The Minimal Step Rule.<br/> 138 * 139 * <pre> 140 * LocalCode\ReturnCode INCLUDE INCLUDE_AND_NEXT_COL INCLUDE_AND_SEEK_NEXT_ROW SKIP NEXT_COL NEXT_ROW SEEK_NEXT_USING_HINT 141 * INCLUDE INCLUDE INCLUDE INCLUDE INCLUDE INCLUDE INCLUDE INCLUDE 142 * INCLUDE_AND_NEXT_COL INCLUDE INCLUDE_AND_NEXT_COL INCLUDE_AND_NEXT_COL INCLUDE INCLUDE_AND_NEXT_COL INCLUDE_AND_NEXT_COL INCLUDE 143 * INCLUDE_AND_SEEK_NEXT_ROW INCLUDE INCLUDE_AND_NEXT_COL INCLUDE_AND_SEEK_NEXT_ROW INCLUDE INCLUDE_AND_NEXT_COL INCLUDE_AND_SEEK_NEXT_ROW INCLUDE 144 * SKIP INCLUDE INCLUDE INCLUDE SKIP SKIP SKIP SKIP 145 * NEXT_COL INCLUDE INCLUDE_AND_NEXT_COL INCLUDE_AND_NEXT_COL SKIP NEXT_COL NEXT_COL SKIP 146 * NEXT_ROW INCLUDE INCLUDE_AND_NEXT_COL INCLUDE_AND_SEEK_NEXT_ROW SKIP NEXT_COL NEXT_ROW SKIP 147 * SEEK_NEXT_USING_HINT INCLUDE INCLUDE INCLUDE SKIP SKIP SKIP SEEK_NEXT_USING_HINT 148 * </pre> 149 * 150 * @param rc Return code which is calculated by previous sub-filter(s) in filter list. 151 * @param localRC Return code of the current sub-filter in filter list. 152 * @return Return code which is merged by the return code of previous sub-filter(s) and the return 153 * code of current sub-filter. 154 */ 155 private ReturnCode mergeReturnCode(ReturnCode rc, ReturnCode localRC) { 156 if (rc == null) return localRC; 157 switch (localRC) { 158 case INCLUDE: 159 return ReturnCode.INCLUDE; 160 case INCLUDE_AND_NEXT_COL: 161 if ( 162 isInReturnCodes(rc, ReturnCode.INCLUDE, ReturnCode.SKIP, ReturnCode.SEEK_NEXT_USING_HINT) 163 ) { 164 return ReturnCode.INCLUDE; 165 } 166 if ( 167 isInReturnCodes(rc, ReturnCode.INCLUDE_AND_NEXT_COL, ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW, 168 ReturnCode.NEXT_COL, ReturnCode.NEXT_ROW) 169 ) { 170 return ReturnCode.INCLUDE_AND_NEXT_COL; 171 } 172 break; 173 case INCLUDE_AND_SEEK_NEXT_ROW: 174 if ( 175 isInReturnCodes(rc, ReturnCode.INCLUDE, ReturnCode.SKIP, ReturnCode.SEEK_NEXT_USING_HINT) 176 ) { 177 return ReturnCode.INCLUDE; 178 } 179 if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_NEXT_COL, ReturnCode.NEXT_COL)) { 180 return ReturnCode.INCLUDE_AND_NEXT_COL; 181 } 182 if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW, ReturnCode.NEXT_ROW)) { 183 return ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW; 184 } 185 break; 186 case SKIP: 187 if ( 188 isInReturnCodes(rc, ReturnCode.INCLUDE, ReturnCode.INCLUDE_AND_NEXT_COL, 189 ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW) 190 ) { 191 return ReturnCode.INCLUDE; 192 } 193 if ( 194 isInReturnCodes(rc, ReturnCode.SKIP, ReturnCode.NEXT_COL, ReturnCode.NEXT_ROW, 195 ReturnCode.SEEK_NEXT_USING_HINT) 196 ) { 197 return ReturnCode.SKIP; 198 } 199 break; 200 case NEXT_COL: 201 if (isInReturnCodes(rc, ReturnCode.INCLUDE)) { 202 return ReturnCode.INCLUDE; 203 } 204 if (isInReturnCodes(rc, ReturnCode.NEXT_COL, ReturnCode.NEXT_ROW)) { 205 return ReturnCode.NEXT_COL; 206 } 207 if ( 208 isInReturnCodes(rc, ReturnCode.INCLUDE_AND_NEXT_COL, ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW) 209 ) { 210 return ReturnCode.INCLUDE_AND_NEXT_COL; 211 } 212 if (isInReturnCodes(rc, ReturnCode.SKIP, ReturnCode.SEEK_NEXT_USING_HINT)) { 213 return ReturnCode.SKIP; 214 } 215 break; 216 case NEXT_ROW: 217 if (isInReturnCodes(rc, ReturnCode.INCLUDE)) { 218 return ReturnCode.INCLUDE; 219 } 220 if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_NEXT_COL)) { 221 return ReturnCode.INCLUDE_AND_NEXT_COL; 222 } 223 if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW)) { 224 return ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW; 225 } 226 if (isInReturnCodes(rc, ReturnCode.SKIP, ReturnCode.SEEK_NEXT_USING_HINT)) { 227 return ReturnCode.SKIP; 228 } 229 if (isInReturnCodes(rc, ReturnCode.NEXT_COL)) { 230 return ReturnCode.NEXT_COL; 231 } 232 if (isInReturnCodes(rc, ReturnCode.NEXT_ROW)) { 233 return ReturnCode.NEXT_ROW; 234 } 235 break; 236 case SEEK_NEXT_USING_HINT: 237 if ( 238 isInReturnCodes(rc, ReturnCode.INCLUDE, ReturnCode.INCLUDE_AND_NEXT_COL, 239 ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW) 240 ) { 241 return ReturnCode.INCLUDE; 242 } 243 if (isInReturnCodes(rc, ReturnCode.SKIP, ReturnCode.NEXT_COL, ReturnCode.NEXT_ROW)) { 244 return ReturnCode.SKIP; 245 } 246 if (isInReturnCodes(rc, ReturnCode.SEEK_NEXT_USING_HINT)) { 247 return ReturnCode.SEEK_NEXT_USING_HINT; 248 } 249 break; 250 } 251 throw new IllegalStateException( 252 "Received code is not valid. rc: " + rc + ", localRC: " + localRC); 253 } 254 255 private void updatePrevFilterRCList(int index, ReturnCode currentRC) { 256 prevFilterRCList.set(index, currentRC); 257 } 258 259 private void updatePrevCellList(int index, Cell currentCell, ReturnCode currentRC) { 260 if (currentCell == null || currentRC == ReturnCode.INCLUDE || currentRC == ReturnCode.SKIP) { 261 // If previous return code is INCLUDE or SKIP, we should always pass the next cell to the 262 // corresponding sub-filter(need not test calculateReturnCodeByPrevCellAndRC() method), So we 263 // need not save current cell to prevCellList for saving heap memory. 264 prevCellList.set(index, null); 265 } else { 266 prevCellList.set(index, currentCell); 267 } 268 } 269 270 @Override 271 public ReturnCode filterCell(Cell c) throws IOException { 272 if (isEmpty()) { 273 return ReturnCode.INCLUDE; 274 } 275 ReturnCode rc = null; 276 for (int i = 0, n = filters.size(); i < n; i++) { 277 Filter filter = filters.get(i); 278 subFiltersIncludedCell.set(i, false); 279 280 Cell prevCell = this.prevCellList.get(i); 281 ReturnCode prevCode = this.prevFilterRCList.get(i); 282 if (filter.filterAllRemaining()) { 283 continue; 284 } 285 ReturnCode localRC = calculateReturnCodeByPrevCellAndRC(filter, c, prevCell, prevCode); 286 if (localRC == null) { 287 // Can not get return code based on previous cell and previous return code. In other words, 288 // we should pass the current cell to this sub-filter to get the return code, and it won't 289 // impact the sub-filter's internal state. 290 localRC = filter.filterCell(c); 291 } 292 293 // Update previous return code and previous cell for filter[i]. 294 updatePrevFilterRCList(i, localRC); 295 updatePrevCellList(i, c, localRC); 296 297 rc = mergeReturnCode(rc, localRC); 298 299 // For INCLUDE* case, we need to update the transformed cell. 300 if ( 301 isInReturnCodes(localRC, ReturnCode.INCLUDE, ReturnCode.INCLUDE_AND_NEXT_COL, 302 ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW) 303 ) { 304 subFiltersIncludedCell.set(i, true); 305 } 306 } 307 // Each sub-filter in filter list got true for filterAllRemaining(), if rc is null, so we should 308 // return SKIP. 309 return rc == null ? ReturnCode.SKIP : rc; 310 } 311 312 @Override 313 public void reset() throws IOException { 314 for (int i = 0, n = filters.size(); i < n; i++) { 315 filters.get(i).reset(); 316 subFiltersIncludedCell.set(i, false); 317 prevFilterRCList.set(i, null); 318 prevCellList.set(i, null); 319 } 320 } 321 322 @Override 323 public boolean filterRowKey(byte[] rowKey, int offset, int length) throws IOException { 324 if (isEmpty()) { 325 return super.filterRowKey(rowKey, offset, length); 326 } 327 boolean retVal = true; 328 for (int i = 0, n = filters.size(); i < n; i++) { 329 Filter filter = filters.get(i); 330 if (!filter.filterAllRemaining() && !filter.filterRowKey(rowKey, offset, length)) { 331 retVal = false; 332 } 333 } 334 return retVal; 335 } 336 337 @Override 338 public boolean filterRowKey(Cell firstRowCell) throws IOException { 339 if (isEmpty()) { 340 return super.filterRowKey(firstRowCell); 341 } 342 boolean retVal = true; 343 for (int i = 0, n = filters.size(); i < n; i++) { 344 Filter filter = filters.get(i); 345 if (!filter.filterAllRemaining() && !filter.filterRowKey(firstRowCell)) { 346 // Can't just return false here, because there are some filters (such as PrefixFilter) which 347 // will catch the row changed event by filterRowKey(). If we return early here, those 348 // filters will have no chance to update their row state. 349 retVal = false; 350 } 351 } 352 return retVal; 353 } 354 355 @Override 356 public boolean filterAllRemaining() throws IOException { 357 if (isEmpty()) { 358 return super.filterAllRemaining(); 359 } 360 for (int i = 0, n = filters.size(); i < n; i++) { 361 if (!filters.get(i).filterAllRemaining()) { 362 return false; 363 } 364 } 365 return true; 366 } 367 368 @Override 369 public boolean filterRow() throws IOException { 370 if (isEmpty()) { 371 return super.filterRow(); 372 } 373 for (int i = 0, n = filters.size(); i < n; i++) { 374 Filter filter = filters.get(i); 375 if (!filter.filterRow()) { 376 return false; 377 } 378 } 379 return true; 380 } 381 382 @Override 383 public Cell getNextCellHint(Cell currentCell) throws IOException { 384 if (isEmpty()) { 385 return super.getNextCellHint(currentCell); 386 } 387 Cell minKeyHint = null; 388 // If any condition can pass, we need to keep the min hint 389 for (int i = 0, n = filters.size(); i < n; i++) { 390 if (filters.get(i).filterAllRemaining()) { 391 continue; 392 } 393 Cell curKeyHint = filters.get(i).getNextCellHint(currentCell); 394 if (curKeyHint == null) { 395 // If we ever don't have a hint and this is must-pass-one, then no hint 396 return null; 397 } 398 // If this is the first hint we find, set it 399 if (minKeyHint == null) { 400 minKeyHint = curKeyHint; 401 continue; 402 } 403 if (this.compareCell(minKeyHint, curKeyHint) > 0) { 404 minKeyHint = curKeyHint; 405 } 406 } 407 return minKeyHint; 408 } 409 410 @Override 411 public boolean equals(Object obj) { 412 if (this == obj) { 413 return true; 414 } 415 if (!(obj instanceof FilterListWithOR)) { 416 return false; 417 } 418 FilterListWithOR f = (FilterListWithOR) obj; 419 return this.filters.equals(f.getFilters()) && this.prevFilterRCList.equals(f.prevFilterRCList) 420 && this.prevCellList.equals(f.prevCellList); 421 } 422 423 @Override 424 public int hashCode() { 425 return Objects.hash(this.prevFilterRCList, this.prevCellList, this.filters); 426 } 427}