001/** 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019 020package org.apache.hadoop.hbase.filter; 021 022import org.apache.hadoop.hbase.Cell; 023import org.apache.hadoop.hbase.CellUtil; 024import org.apache.yetus.audience.InterfaceAudience; 025 026import java.io.IOException; 027import java.util.ArrayList; 028import java.util.Collections; 029import java.util.List; 030import java.util.Objects; 031 032/** 033 * FilterListWithOR represents an ordered list of filters which will be evaluated with an OR 034 * operator. 035 */ 036@InterfaceAudience.Private 037public class FilterListWithOR extends FilterListBase { 038 039 /** 040 * Save previous return code and previous cell for every filter in filter list. For MUST_PASS_ONE, 041 * we use the previous return code to decide whether we should pass current cell encountered to 042 * the filter. For MUST_PASS_ALL, the two list are meaningless. 043 */ 044 private List<ReturnCode> prevFilterRCList = null; 045 private List<Cell> prevCellList = null; 046 047 public FilterListWithOR(List<Filter> filters) { 048 super(filters); 049 prevFilterRCList = new ArrayList<>(Collections.nCopies(filters.size(), null)); 050 prevCellList = new ArrayList<>(Collections.nCopies(filters.size(), null)); 051 subFiltersIncludedCell = new ArrayList<>(Collections.nCopies(filters.size(), false)); 052 } 053 054 @Override 055 public void addFilterLists(List<Filter> filters) { 056 if (checkAndGetReversed(filters, isReversed()) != isReversed()) { 057 throw new IllegalArgumentException("Filters in the list must have the same reversed flag"); 058 } 059 this.filters.addAll(filters); 060 this.subFiltersIncludedCell.addAll(Collections.nCopies(filters.size(), false)); 061 this.prevFilterRCList.addAll(Collections.nCopies(filters.size(), null)); 062 this.prevCellList.addAll(Collections.nCopies(filters.size(), null)); 063 } 064 065 @Override 066 protected String formatLogFilters(List<Filter> logFilters) { 067 return String.format("FilterList OR (%d/%d): %s", logFilters.size(), this.size(), 068 logFilters.toString()); 069 } 070 071 /** 072 * For MUST_PASS_ONE, we cannot make sure that when filter-A in filter list return NEXT_COL then 073 * the next cell passing to filterList will be the first cell in next column, because if filter-B 074 * in filter list return SKIP, then the filter list will return SKIP. In this case, we should pass 075 * the cell following the previous cell, and it's possible that the next cell has the same column 076 * as the previous cell even if filter-A has NEXT_COL returned for the previous cell. So we should 077 * save the previous cell and the return code list when checking previous cell for every filter in 078 * filter list, and verify if currentCell fit the previous return code, if fit then pass the 079 * currentCell to the corresponding filter. (HBASE-17678) <br> 080 * Note that: In StoreScanner level, NEXT_ROW will skip to the next row in current family, and in 081 * RegionScanner level, NEXT_ROW will skip to the next row in current family and switch to the 082 * next family for RegionScanner, INCLUDE_AND_NEXT_ROW is the same. so we should pass current cell 083 * to the filter, if row mismatch or row match but column family mismatch. (HBASE-18368) 084 * @see org.apache.hadoop.hbase.filter.Filter.ReturnCode 085 * @param subFilter which sub-filter to calculate the return code by using previous cell and 086 * previous return code. 087 * @param prevCell the previous cell passed to given sub-filter. 088 * @param currentCell the current cell which will pass to given sub-filter. 089 * @param prevCode the previous return code for given sub-filter. 090 * @return return code calculated by using previous cell and previous return code. null means can 091 * not decide which return code should return, so we will pass the currentCell to 092 * subFilter for getting currentCell's return code, and it won't impact the sub-filter's 093 * internal states. 094 */ 095 private ReturnCode calculateReturnCodeByPrevCellAndRC(Filter subFilter, Cell currentCell, 096 Cell prevCell, ReturnCode prevCode) throws IOException { 097 if (prevCell == null || prevCode == null) { 098 return null; 099 } 100 switch (prevCode) { 101 case INCLUDE: 102 case SKIP: 103 return null; 104 case SEEK_NEXT_USING_HINT: 105 Cell nextHintCell = subFilter.getNextCellHint(prevCell); 106 return nextHintCell != null && compareCell(currentCell, nextHintCell) < 0 107 ? ReturnCode.SEEK_NEXT_USING_HINT : null; 108 case NEXT_COL: 109 case INCLUDE_AND_NEXT_COL: 110 // Once row changed, reset() will clear prevCells, so we need not to compare their rows 111 // because rows are the same here. 112 return CellUtil.matchingColumn(prevCell, currentCell) ? ReturnCode.NEXT_COL : null; 113 case NEXT_ROW: 114 case INCLUDE_AND_SEEK_NEXT_ROW: 115 // As described above, rows are definitely the same, so we only compare the family. 116 return CellUtil.matchingFamily(prevCell, currentCell) ? ReturnCode.NEXT_ROW : null; 117 default: 118 throw new IllegalStateException("Received code is not valid."); 119 } 120 } 121 122 /** 123 * FilterList with MUST_PASS_ONE choose the minimal forward step among sub-filter in filter list. 124 * Let's call it: The Minimal Step Rule. So if filter-A in filter list return INCLUDE and filter-B 125 * in filter list return INCLUDE_AND_NEXT_COL, then the filter list should return INCLUDE. For 126 * SEEK_NEXT_USING_HINT, it's more special, because we do not know how far it will forward, so we 127 * use SKIP by default.<br/> 128 * <br/> 129 * The jump step will be: 130 * 131 * <pre> 132 * INCLUDE < SKIP < INCLUDE_AND_NEXT_COL < NEXT_COL < INCLUDE_AND_SEEK_NEXT_ROW < NEXT_ROW < SEEK_NEXT_USING_HINT 133 * </pre> 134 * 135 * Here, we have the following map to describe The Minimal Step Rule. if current return code (for 136 * previous sub-filters in filter list) is <strong>ReturnCode</strong>, and current filter returns 137 * <strong>localRC</strong>, then we should return map[ReturnCode][localRC] for the merged result, 138 * according to The Minimal Step Rule.<br/> 139 * 140 * <pre> 141 * LocalCode\ReturnCode INCLUDE INCLUDE_AND_NEXT_COL INCLUDE_AND_SEEK_NEXT_ROW SKIP NEXT_COL NEXT_ROW SEEK_NEXT_USING_HINT 142 * INCLUDE INCLUDE INCLUDE INCLUDE INCLUDE INCLUDE INCLUDE INCLUDE 143 * INCLUDE_AND_NEXT_COL INCLUDE INCLUDE_AND_NEXT_COL INCLUDE_AND_NEXT_COL INCLUDE INCLUDE_AND_NEXT_COL INCLUDE_AND_NEXT_COL INCLUDE 144 * INCLUDE_AND_SEEK_NEXT_ROW INCLUDE INCLUDE_AND_NEXT_COL INCLUDE_AND_SEEK_NEXT_ROW INCLUDE INCLUDE_AND_NEXT_COL INCLUDE_AND_SEEK_NEXT_ROW INCLUDE 145 * SKIP INCLUDE INCLUDE INCLUDE SKIP SKIP SKIP SKIP 146 * NEXT_COL INCLUDE INCLUDE_AND_NEXT_COL INCLUDE_AND_NEXT_COL SKIP NEXT_COL NEXT_COL SKIP 147 * NEXT_ROW INCLUDE INCLUDE_AND_NEXT_COL INCLUDE_AND_SEEK_NEXT_ROW SKIP NEXT_COL NEXT_ROW SKIP 148 * SEEK_NEXT_USING_HINT INCLUDE INCLUDE INCLUDE SKIP SKIP SKIP SEEK_NEXT_USING_HINT 149 * </pre> 150 * 151 * @param rc Return code which is calculated by previous sub-filter(s) in filter list. 152 * @param localRC Return code of the current sub-filter in filter list. 153 * @return Return code which is merged by the return code of previous sub-filter(s) and the return 154 * code of current sub-filter. 155 */ 156 private ReturnCode mergeReturnCode(ReturnCode rc, ReturnCode localRC) { 157 if (rc == null) return localRC; 158 switch (localRC) { 159 case INCLUDE: 160 return ReturnCode.INCLUDE; 161 case INCLUDE_AND_NEXT_COL: 162 if (isInReturnCodes(rc, ReturnCode.INCLUDE, ReturnCode.SKIP, 163 ReturnCode.SEEK_NEXT_USING_HINT)) { 164 return ReturnCode.INCLUDE; 165 } 166 if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_NEXT_COL, ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW, 167 ReturnCode.NEXT_COL, ReturnCode.NEXT_ROW)) { 168 return ReturnCode.INCLUDE_AND_NEXT_COL; 169 } 170 break; 171 case INCLUDE_AND_SEEK_NEXT_ROW: 172 if (isInReturnCodes(rc, ReturnCode.INCLUDE, ReturnCode.SKIP, 173 ReturnCode.SEEK_NEXT_USING_HINT)) { 174 return ReturnCode.INCLUDE; 175 } 176 if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_NEXT_COL, ReturnCode.NEXT_COL)) { 177 return ReturnCode.INCLUDE_AND_NEXT_COL; 178 } 179 if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW, ReturnCode.NEXT_ROW)) { 180 return ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW; 181 } 182 break; 183 case SKIP: 184 if (isInReturnCodes(rc, ReturnCode.INCLUDE, ReturnCode.INCLUDE_AND_NEXT_COL, 185 ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW)) { 186 return ReturnCode.INCLUDE; 187 } 188 if (isInReturnCodes(rc, ReturnCode.SKIP, ReturnCode.NEXT_COL, ReturnCode.NEXT_ROW, 189 ReturnCode.SEEK_NEXT_USING_HINT)) { 190 return ReturnCode.SKIP; 191 } 192 break; 193 case NEXT_COL: 194 if (isInReturnCodes(rc, ReturnCode.INCLUDE)) { 195 return ReturnCode.INCLUDE; 196 } 197 if (isInReturnCodes(rc, ReturnCode.NEXT_COL, ReturnCode.NEXT_ROW)) { 198 return ReturnCode.NEXT_COL; 199 } 200 if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_NEXT_COL, 201 ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW)) { 202 return ReturnCode.INCLUDE_AND_NEXT_COL; 203 } 204 if (isInReturnCodes(rc, ReturnCode.SKIP, ReturnCode.SEEK_NEXT_USING_HINT)) { 205 return ReturnCode.SKIP; 206 } 207 break; 208 case NEXT_ROW: 209 if (isInReturnCodes(rc, ReturnCode.INCLUDE)) { 210 return ReturnCode.INCLUDE; 211 } 212 if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_NEXT_COL)) { 213 return ReturnCode.INCLUDE_AND_NEXT_COL; 214 } 215 if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW)) { 216 return ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW; 217 } 218 if (isInReturnCodes(rc, ReturnCode.SKIP, ReturnCode.SEEK_NEXT_USING_HINT)) { 219 return ReturnCode.SKIP; 220 } 221 if (isInReturnCodes(rc, ReturnCode.NEXT_COL)) { 222 return ReturnCode.NEXT_COL; 223 } 224 if (isInReturnCodes(rc, ReturnCode.NEXT_ROW)) { 225 return ReturnCode.NEXT_ROW; 226 } 227 break; 228 case SEEK_NEXT_USING_HINT: 229 if (isInReturnCodes(rc, ReturnCode.INCLUDE, ReturnCode.INCLUDE_AND_NEXT_COL, 230 ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW)) { 231 return ReturnCode.INCLUDE; 232 } 233 if (isInReturnCodes(rc, ReturnCode.SKIP, ReturnCode.NEXT_COL, ReturnCode.NEXT_ROW)) { 234 return ReturnCode.SKIP; 235 } 236 if (isInReturnCodes(rc, ReturnCode.SEEK_NEXT_USING_HINT)) { 237 return ReturnCode.SEEK_NEXT_USING_HINT; 238 } 239 break; 240 } 241 throw new IllegalStateException( 242 "Received code is not valid. rc: " + rc + ", localRC: " + localRC); 243 } 244 245 private void updatePrevFilterRCList(int index, ReturnCode currentRC) { 246 prevFilterRCList.set(index, currentRC); 247 } 248 249 private void updatePrevCellList(int index, Cell currentCell, ReturnCode currentRC) { 250 if (currentCell == null || currentRC == ReturnCode.INCLUDE || currentRC == ReturnCode.SKIP) { 251 // If previous return code is INCLUDE or SKIP, we should always pass the next cell to the 252 // corresponding sub-filter(need not test calculateReturnCodeByPrevCellAndRC() method), So we 253 // need not save current cell to prevCellList for saving heap memory. 254 prevCellList.set(index, null); 255 } else { 256 prevCellList.set(index, currentCell); 257 } 258 } 259 260 @Override 261 public ReturnCode filterCell(Cell c) throws IOException { 262 if (isEmpty()) { 263 return ReturnCode.INCLUDE; 264 } 265 ReturnCode rc = null; 266 for (int i = 0, n = filters.size(); i < n; i++) { 267 Filter filter = filters.get(i); 268 subFiltersIncludedCell.set(i, false); 269 270 Cell prevCell = this.prevCellList.get(i); 271 ReturnCode prevCode = this.prevFilterRCList.get(i); 272 if (filter.filterAllRemaining()) { 273 continue; 274 } 275 ReturnCode localRC = calculateReturnCodeByPrevCellAndRC(filter, c, prevCell, prevCode); 276 if (localRC == null) { 277 // Can not get return code based on previous cell and previous return code. In other words, 278 // we should pass the current cell to this sub-filter to get the return code, and it won't 279 // impact the sub-filter's internal state. 280 localRC = filter.filterCell(c); 281 } 282 283 // Update previous return code and previous cell for filter[i]. 284 updatePrevFilterRCList(i, localRC); 285 updatePrevCellList(i, c, localRC); 286 287 rc = mergeReturnCode(rc, localRC); 288 289 // For INCLUDE* case, we need to update the transformed cell. 290 if (isInReturnCodes(localRC, ReturnCode.INCLUDE, ReturnCode.INCLUDE_AND_NEXT_COL, 291 ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW)) { 292 subFiltersIncludedCell.set(i, true); 293 } 294 } 295 // Each sub-filter in filter list got true for filterAllRemaining(), if rc is null, so we should 296 // return SKIP. 297 return rc == null ? ReturnCode.SKIP : rc; 298 } 299 300 @Override 301 public void reset() throws IOException { 302 for (int i = 0, n = filters.size(); i < n; i++) { 303 filters.get(i).reset(); 304 subFiltersIncludedCell.set(i, false); 305 prevFilterRCList.set(i, null); 306 prevCellList.set(i, null); 307 } 308 } 309 310 @Override 311 public boolean filterRowKey(byte[] rowKey, int offset, int length) throws IOException { 312 if (isEmpty()) { 313 return super.filterRowKey(rowKey, offset, length); 314 } 315 boolean retVal = true; 316 for (int i = 0, n = filters.size(); i < n; i++) { 317 Filter filter = filters.get(i); 318 if (!filter.filterAllRemaining() && !filter.filterRowKey(rowKey, offset, length)) { 319 retVal = false; 320 } 321 } 322 return retVal; 323 } 324 325 @Override 326 public boolean filterRowKey(Cell firstRowCell) throws IOException { 327 if (isEmpty()) { 328 return super.filterRowKey(firstRowCell); 329 } 330 boolean retVal = true; 331 for (int i = 0, n = filters.size(); i < n; i++) { 332 Filter filter = filters.get(i); 333 if (!filter.filterAllRemaining() && !filter.filterRowKey(firstRowCell)) { 334 // Can't just return false here, because there are some filters (such as PrefixFilter) which 335 // will catch the row changed event by filterRowKey(). If we return early here, those 336 // filters will have no chance to update their row state. 337 retVal = false; 338 } 339 } 340 return retVal; 341 } 342 343 @Override 344 public boolean filterAllRemaining() throws IOException { 345 if (isEmpty()) { 346 return super.filterAllRemaining(); 347 } 348 for (int i = 0, n = filters.size(); i < n; i++) { 349 if (!filters.get(i).filterAllRemaining()) { 350 return false; 351 } 352 } 353 return true; 354 } 355 356 @Override 357 public boolean filterRow() throws IOException { 358 if (isEmpty()) { 359 return super.filterRow(); 360 } 361 for (int i = 0, n = filters.size(); i < n; i++) { 362 Filter filter = filters.get(i); 363 if (!filter.filterRow()) { 364 return false; 365 } 366 } 367 return true; 368 } 369 370 @Override 371 public Cell getNextCellHint(Cell currentCell) throws IOException { 372 if (isEmpty()) { 373 return super.getNextCellHint(currentCell); 374 } 375 Cell minKeyHint = null; 376 // If any condition can pass, we need to keep the min hint 377 for (int i = 0, n = filters.size(); i < n; i++) { 378 if (filters.get(i).filterAllRemaining()) { 379 continue; 380 } 381 Cell curKeyHint = filters.get(i).getNextCellHint(currentCell); 382 if (curKeyHint == null) { 383 // If we ever don't have a hint and this is must-pass-one, then no hint 384 return null; 385 } 386 // If this is the first hint we find, set it 387 if (minKeyHint == null) { 388 minKeyHint = curKeyHint; 389 continue; 390 } 391 if (this.compareCell(minKeyHint, curKeyHint) > 0) { 392 minKeyHint = curKeyHint; 393 } 394 } 395 return minKeyHint; 396 } 397 398 399 @Override 400 public boolean equals(Object obj) { 401 if (obj == null || (!(obj instanceof FilterListWithOR))) { 402 return false; 403 } 404 if (this == obj) { 405 return true; 406 } 407 FilterListWithOR f = (FilterListWithOR) obj; 408 return this.filters.equals(f.getFilters()) && 409 this.prevFilterRCList.equals(f.prevFilterRCList) && 410 this.prevCellList.equals(f.prevCellList); 411 } 412 413 @Override 414 public int hashCode() { 415 return Objects.hash(this.prevFilterRCList, this.prevCellList, this.filters); 416 } 417}