001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.filter;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.Collections;
023import java.util.List;
024import java.util.Objects;
025import org.apache.hadoop.hbase.Cell;
026import org.apache.hadoop.hbase.CellUtil;
027import org.apache.yetus.audience.InterfaceAudience;
028
029/**
030 * FilterListWithOR represents an ordered list of filters which will be evaluated with an OR
031 * operator.
032 */
033@InterfaceAudience.Private
034public class FilterListWithOR extends FilterListBase {
035
036  /**
037   * Save previous return code and previous cell for every filter in filter list. For MUST_PASS_ONE,
038   * we use the previous return code to decide whether we should pass current cell encountered to
039   * the filter. For MUST_PASS_ALL, the two list are meaningless.
040   */
041  private List<ReturnCode> prevFilterRCList = null;
042  private List<Cell> prevCellList = null;
043
044  public FilterListWithOR(List<Filter> filters) {
045    super(filters);
046    prevFilterRCList = new ArrayList<>(Collections.nCopies(filters.size(), null));
047    prevCellList = new ArrayList<>(Collections.nCopies(filters.size(), null));
048    subFiltersIncludedCell = new ArrayList<>(Collections.nCopies(filters.size(), false));
049  }
050
051  @Override
052  public void addFilterLists(List<Filter> filters) {
053    if (checkAndGetReversed(filters, isReversed()) != isReversed()) {
054      throw new IllegalArgumentException("Filters in the list must have the same reversed flag");
055    }
056    this.filters.addAll(filters);
057    this.subFiltersIncludedCell.addAll(Collections.nCopies(filters.size(), false));
058    this.prevFilterRCList.addAll(Collections.nCopies(filters.size(), null));
059    this.prevCellList.addAll(Collections.nCopies(filters.size(), null));
060  }
061
062  @Override
063  protected String formatLogFilters(List<Filter> logFilters) {
064    return String.format("FilterList OR (%d/%d): %s", logFilters.size(), this.size(),
065      logFilters.toString());
066  }
067
068  /**
069   * For MUST_PASS_ONE, we cannot make sure that when filter-A in filter list return NEXT_COL then
070   * the next cell passing to filterList will be the first cell in next column, because if filter-B
071   * in filter list return SKIP, then the filter list will return SKIP. In this case, we should pass
072   * the cell following the previous cell, and it's possible that the next cell has the same column
073   * as the previous cell even if filter-A has NEXT_COL returned for the previous cell. So we should
074   * save the previous cell and the return code list when checking previous cell for every filter in
075   * filter list, and verify if currentCell fit the previous return code, if fit then pass the
076   * currentCell to the corresponding filter. (HBASE-17678) <br>
077   * Note that: In StoreScanner level, NEXT_ROW will skip to the next row in current family, and in
078   * RegionScanner level, NEXT_ROW will skip to the next row in current family and switch to the
079   * next family for RegionScanner, INCLUDE_AND_NEXT_ROW is the same. so we should pass current cell
080   * to the filter, if row mismatch or row match but column family mismatch. (HBASE-18368)
081   * @see org.apache.hadoop.hbase.filter.Filter.ReturnCode
082   * @param subFilter   which sub-filter to calculate the return code by using previous cell and
083   *                    previous return code.
084   * @param prevCell    the previous cell passed to given sub-filter.
085   * @param currentCell the current cell which will pass to given sub-filter.
086   * @param prevCode    the previous return code for given sub-filter.
087   * @return return code calculated by using previous cell and previous return code. null means can
088   *         not decide which return code should return, so we will pass the currentCell to
089   *         subFilter for getting currentCell's return code, and it won't impact the sub-filter's
090   *         internal states.
091   */
092  private ReturnCode calculateReturnCodeByPrevCellAndRC(Filter subFilter, Cell currentCell,
093    Cell prevCell, ReturnCode prevCode) throws IOException {
094    if (prevCell == null || prevCode == null) {
095      return null;
096    }
097    switch (prevCode) {
098      case INCLUDE:
099      case SKIP:
100        return null;
101      case SEEK_NEXT_USING_HINT:
102        Cell nextHintCell = subFilter.getNextCellHint(prevCell);
103        return nextHintCell != null && compareCell(currentCell, nextHintCell) < 0
104          ? ReturnCode.SEEK_NEXT_USING_HINT
105          : null;
106      case NEXT_COL:
107      case INCLUDE_AND_NEXT_COL:
108        // Once row changed, reset() will clear prevCells, so we need not to compare their rows
109        // because rows are the same here.
110        return CellUtil.matchingColumn(prevCell, currentCell) ? ReturnCode.NEXT_COL : null;
111      case NEXT_ROW:
112      case INCLUDE_AND_SEEK_NEXT_ROW:
113        // As described above, rows are definitely the same, so we only compare the family.
114        return CellUtil.matchingFamily(prevCell, currentCell) ? ReturnCode.NEXT_ROW : null;
115      default:
116        throw new IllegalStateException("Received code is not valid.");
117    }
118  }
119
120  /**
121   * FilterList with MUST_PASS_ONE choose the minimal forward step among sub-filter in filter list.
122   * Let's call it: The Minimal Step Rule. So if filter-A in filter list return INCLUDE and filter-B
123   * in filter list return INCLUDE_AND_NEXT_COL, then the filter list should return INCLUDE. For
124   * SEEK_NEXT_USING_HINT, it's more special, because we do not know how far it will forward, so we
125   * use SKIP by default.<br/>
126   * <br/>
127   * The jump step will be:
128   *
129   * <pre>
130   * INCLUDE &lt; SKIP &lt; INCLUDE_AND_NEXT_COL &lt; NEXT_COL &lt; INCLUDE_AND_SEEK_NEXT_ROW &lt; NEXT_ROW
131   *     &lt; SEEK_NEXT_USING_HINT
132   * </pre>
133   *
134   * Here, we have the following map to describe The Minimal Step Rule. if current return code (for
135   * previous sub-filters in filter list) is <strong>ReturnCode</strong>, and current filter returns
136   * <strong>localRC</strong>, then we should return map[ReturnCode][localRC] for the merged result,
137   * according to The Minimal Step Rule.<br/>
138   *
139   * <pre>
140   * LocalCode\ReturnCode       INCLUDE INCLUDE_AND_NEXT_COL     INCLUDE_AND_SEEK_NEXT_ROW  SKIP      NEXT_COL              NEXT_ROW                  SEEK_NEXT_USING_HINT
141   * INCLUDE                    INCLUDE INCLUDE                  INCLUDE                    INCLUDE   INCLUDE               INCLUDE                   INCLUDE
142   * INCLUDE_AND_NEXT_COL       INCLUDE INCLUDE_AND_NEXT_COL     INCLUDE_AND_NEXT_COL       INCLUDE   INCLUDE_AND_NEXT_COL  INCLUDE_AND_NEXT_COL      INCLUDE
143   * INCLUDE_AND_SEEK_NEXT_ROW  INCLUDE INCLUDE_AND_NEXT_COL     INCLUDE_AND_SEEK_NEXT_ROW  INCLUDE   INCLUDE_AND_NEXT_COL  INCLUDE_AND_SEEK_NEXT_ROW INCLUDE
144   * SKIP                       INCLUDE INCLUDE                  INCLUDE                    SKIP      SKIP                  SKIP                      SKIP
145   * NEXT_COL                   INCLUDE INCLUDE_AND_NEXT_COL     INCLUDE_AND_NEXT_COL       SKIP      NEXT_COL              NEXT_COL                  SKIP
146   * NEXT_ROW                   INCLUDE INCLUDE_AND_NEXT_COL     INCLUDE_AND_SEEK_NEXT_ROW  SKIP      NEXT_COL              NEXT_ROW                  SKIP
147   * SEEK_NEXT_USING_HINT       INCLUDE INCLUDE                  INCLUDE                    SKIP      SKIP                  SKIP                      SEEK_NEXT_USING_HINT
148   * </pre>
149   *
150   * @param rc      Return code which is calculated by previous sub-filter(s) in filter list.
151   * @param localRC Return code of the current sub-filter in filter list.
152   * @return Return code which is merged by the return code of previous sub-filter(s) and the return
153   *         code of current sub-filter.
154   */
155  private ReturnCode mergeReturnCode(ReturnCode rc, ReturnCode localRC) {
156    if (rc == null) return localRC;
157    switch (localRC) {
158      case INCLUDE:
159        return ReturnCode.INCLUDE;
160      case INCLUDE_AND_NEXT_COL:
161        if (
162          isInReturnCodes(rc, ReturnCode.INCLUDE, ReturnCode.SKIP, ReturnCode.SEEK_NEXT_USING_HINT)
163        ) {
164          return ReturnCode.INCLUDE;
165        }
166        if (
167          isInReturnCodes(rc, ReturnCode.INCLUDE_AND_NEXT_COL, ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW,
168            ReturnCode.NEXT_COL, ReturnCode.NEXT_ROW)
169        ) {
170          return ReturnCode.INCLUDE_AND_NEXT_COL;
171        }
172        break;
173      case INCLUDE_AND_SEEK_NEXT_ROW:
174        if (
175          isInReturnCodes(rc, ReturnCode.INCLUDE, ReturnCode.SKIP, ReturnCode.SEEK_NEXT_USING_HINT)
176        ) {
177          return ReturnCode.INCLUDE;
178        }
179        if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_NEXT_COL, ReturnCode.NEXT_COL)) {
180          return ReturnCode.INCLUDE_AND_NEXT_COL;
181        }
182        if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW, ReturnCode.NEXT_ROW)) {
183          return ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW;
184        }
185        break;
186      case SKIP:
187        if (
188          isInReturnCodes(rc, ReturnCode.INCLUDE, ReturnCode.INCLUDE_AND_NEXT_COL,
189            ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW)
190        ) {
191          return ReturnCode.INCLUDE;
192        }
193        if (
194          isInReturnCodes(rc, ReturnCode.SKIP, ReturnCode.NEXT_COL, ReturnCode.NEXT_ROW,
195            ReturnCode.SEEK_NEXT_USING_HINT)
196        ) {
197          return ReturnCode.SKIP;
198        }
199        break;
200      case NEXT_COL:
201        if (isInReturnCodes(rc, ReturnCode.INCLUDE)) {
202          return ReturnCode.INCLUDE;
203        }
204        if (isInReturnCodes(rc, ReturnCode.NEXT_COL, ReturnCode.NEXT_ROW)) {
205          return ReturnCode.NEXT_COL;
206        }
207        if (
208          isInReturnCodes(rc, ReturnCode.INCLUDE_AND_NEXT_COL, ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW)
209        ) {
210          return ReturnCode.INCLUDE_AND_NEXT_COL;
211        }
212        if (isInReturnCodes(rc, ReturnCode.SKIP, ReturnCode.SEEK_NEXT_USING_HINT)) {
213          return ReturnCode.SKIP;
214        }
215        break;
216      case NEXT_ROW:
217        if (isInReturnCodes(rc, ReturnCode.INCLUDE)) {
218          return ReturnCode.INCLUDE;
219        }
220        if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_NEXT_COL)) {
221          return ReturnCode.INCLUDE_AND_NEXT_COL;
222        }
223        if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW)) {
224          return ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW;
225        }
226        if (isInReturnCodes(rc, ReturnCode.SKIP, ReturnCode.SEEK_NEXT_USING_HINT)) {
227          return ReturnCode.SKIP;
228        }
229        if (isInReturnCodes(rc, ReturnCode.NEXT_COL)) {
230          return ReturnCode.NEXT_COL;
231        }
232        if (isInReturnCodes(rc, ReturnCode.NEXT_ROW)) {
233          return ReturnCode.NEXT_ROW;
234        }
235        break;
236      case SEEK_NEXT_USING_HINT:
237        if (
238          isInReturnCodes(rc, ReturnCode.INCLUDE, ReturnCode.INCLUDE_AND_NEXT_COL,
239            ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW)
240        ) {
241          return ReturnCode.INCLUDE;
242        }
243        if (isInReturnCodes(rc, ReturnCode.SKIP, ReturnCode.NEXT_COL, ReturnCode.NEXT_ROW)) {
244          return ReturnCode.SKIP;
245        }
246        if (isInReturnCodes(rc, ReturnCode.SEEK_NEXT_USING_HINT)) {
247          return ReturnCode.SEEK_NEXT_USING_HINT;
248        }
249        break;
250    }
251    throw new IllegalStateException(
252      "Received code is not valid. rc: " + rc + ", localRC: " + localRC);
253  }
254
255  private void updatePrevFilterRCList(int index, ReturnCode currentRC) {
256    prevFilterRCList.set(index, currentRC);
257  }
258
259  private void updatePrevCellList(int index, Cell currentCell, ReturnCode currentRC) {
260    if (currentCell == null || currentRC == ReturnCode.INCLUDE || currentRC == ReturnCode.SKIP) {
261      // If previous return code is INCLUDE or SKIP, we should always pass the next cell to the
262      // corresponding sub-filter(need not test calculateReturnCodeByPrevCellAndRC() method), So we
263      // need not save current cell to prevCellList for saving heap memory.
264      prevCellList.set(index, null);
265    } else {
266      prevCellList.set(index, currentCell);
267    }
268  }
269
270  @Override
271  public ReturnCode filterCell(Cell c) throws IOException {
272    if (isEmpty()) {
273      return ReturnCode.INCLUDE;
274    }
275    ReturnCode rc = null;
276    for (int i = 0, n = filters.size(); i < n; i++) {
277      Filter filter = filters.get(i);
278      subFiltersIncludedCell.set(i, false);
279
280      Cell prevCell = this.prevCellList.get(i);
281      ReturnCode prevCode = this.prevFilterRCList.get(i);
282      if (filter.filterAllRemaining()) {
283        continue;
284      }
285      ReturnCode localRC = calculateReturnCodeByPrevCellAndRC(filter, c, prevCell, prevCode);
286      if (localRC == null) {
287        // Can not get return code based on previous cell and previous return code. In other words,
288        // we should pass the current cell to this sub-filter to get the return code, and it won't
289        // impact the sub-filter's internal state.
290        localRC = filter.filterCell(c);
291      }
292
293      // Update previous return code and previous cell for filter[i].
294      updatePrevFilterRCList(i, localRC);
295      updatePrevCellList(i, c, localRC);
296
297      rc = mergeReturnCode(rc, localRC);
298
299      // For INCLUDE* case, we need to update the transformed cell.
300      if (
301        isInReturnCodes(localRC, ReturnCode.INCLUDE, ReturnCode.INCLUDE_AND_NEXT_COL,
302          ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW)
303      ) {
304        subFiltersIncludedCell.set(i, true);
305      }
306    }
307    // Each sub-filter in filter list got true for filterAllRemaining(), if rc is null, so we should
308    // return SKIP.
309    return rc == null ? ReturnCode.SKIP : rc;
310  }
311
312  @Override
313  public void reset() throws IOException {
314    for (int i = 0, n = filters.size(); i < n; i++) {
315      filters.get(i).reset();
316      subFiltersIncludedCell.set(i, false);
317      prevFilterRCList.set(i, null);
318      prevCellList.set(i, null);
319    }
320  }
321
322  @Override
323  public boolean filterRowKey(Cell firstRowCell) throws IOException {
324    if (isEmpty()) {
325      return super.filterRowKey(firstRowCell);
326    }
327    boolean retVal = true;
328    for (int i = 0, n = filters.size(); i < n; i++) {
329      Filter filter = filters.get(i);
330      if (!filter.filterAllRemaining() && !filter.filterRowKey(firstRowCell)) {
331        // Can't just return false here, because there are some filters (such as PrefixFilter) which
332        // will catch the row changed event by filterRowKey(). If we return early here, those
333        // filters will have no chance to update their row state.
334        retVal = false;
335      }
336    }
337    return retVal;
338  }
339
340  @Override
341  public boolean filterAllRemaining() throws IOException {
342    if (isEmpty()) {
343      return super.filterAllRemaining();
344    }
345    for (int i = 0, n = filters.size(); i < n; i++) {
346      if (!filters.get(i).filterAllRemaining()) {
347        return false;
348      }
349    }
350    return true;
351  }
352
353  @Override
354  public boolean filterRow() throws IOException {
355    if (isEmpty()) {
356      return super.filterRow();
357    }
358    for (int i = 0, n = filters.size(); i < n; i++) {
359      Filter filter = filters.get(i);
360      if (!filter.filterRow()) {
361        return false;
362      }
363    }
364    return true;
365  }
366
367  @Override
368  public Cell getNextCellHint(Cell currentCell) throws IOException {
369    if (isEmpty()) {
370      return super.getNextCellHint(currentCell);
371    }
372    Cell minKeyHint = null;
373    // If any condition can pass, we need to keep the min hint
374    for (int i = 0, n = filters.size(); i < n; i++) {
375      if (filters.get(i).filterAllRemaining()) {
376        continue;
377      }
378      Cell curKeyHint = filters.get(i).getNextCellHint(currentCell);
379      if (curKeyHint == null) {
380        // If we ever don't have a hint and this is must-pass-one, then no hint
381        return null;
382      }
383      // If this is the first hint we find, set it
384      if (minKeyHint == null) {
385        minKeyHint = curKeyHint;
386        continue;
387      }
388      if (this.compareCell(minKeyHint, curKeyHint) > 0) {
389        minKeyHint = curKeyHint;
390      }
391    }
392    return minKeyHint;
393  }
394
395  @Override
396  public boolean equals(Object obj) {
397    if (this == obj) {
398      return true;
399    }
400    if (!(obj instanceof FilterListWithOR)) {
401      return false;
402    }
403    FilterListWithOR f = (FilterListWithOR) obj;
404    return this.filters.equals(f.getFilters()) && this.prevFilterRCList.equals(f.prevFilterRCList)
405      && this.prevCellList.equals(f.prevCellList);
406  }
407
408  @Override
409  public int hashCode() {
410    return Objects.hash(this.prevFilterRCList, this.prevCellList, this.filters);
411  }
412}