001/**
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019
020package org.apache.hadoop.hbase.filter;
021
022import org.apache.hadoop.hbase.Cell;
023import org.apache.hadoop.hbase.CellUtil;
024import org.apache.yetus.audience.InterfaceAudience;
025
026import java.io.IOException;
027import java.util.ArrayList;
028import java.util.Collections;
029import java.util.List;
030import java.util.Objects;
031
032/**
033 * FilterListWithOR represents an ordered list of filters which will be evaluated with an OR
034 * operator.
035 */
036@InterfaceAudience.Private
037public class FilterListWithOR extends FilterListBase {
038
039  /**
040   * Save previous return code and previous cell for every filter in filter list. For MUST_PASS_ONE,
041   * we use the previous return code to decide whether we should pass current cell encountered to
042   * the filter. For MUST_PASS_ALL, the two list are meaningless.
043   */
044  private List<ReturnCode> prevFilterRCList = null;
045  private List<Cell> prevCellList = null;
046
047  public FilterListWithOR(List<Filter> filters) {
048    super(filters);
049    prevFilterRCList = new ArrayList<>(Collections.nCopies(filters.size(), null));
050    prevCellList = new ArrayList<>(Collections.nCopies(filters.size(), null));
051    subFiltersIncludedCell = new ArrayList<>(Collections.nCopies(filters.size(), false));
052  }
053
054  @Override
055  public void addFilterLists(List<Filter> filters) {
056    if (checkAndGetReversed(filters, isReversed()) != isReversed()) {
057      throw new IllegalArgumentException("Filters in the list must have the same reversed flag");
058    }
059    this.filters.addAll(filters);
060    this.subFiltersIncludedCell.addAll(Collections.nCopies(filters.size(), false));
061    this.prevFilterRCList.addAll(Collections.nCopies(filters.size(), null));
062    this.prevCellList.addAll(Collections.nCopies(filters.size(), null));
063  }
064
065  @Override
066  protected String formatLogFilters(List<Filter> logFilters) {
067    return String.format("FilterList OR (%d/%d): %s", logFilters.size(), this.size(),
068      logFilters.toString());
069  }
070
071  /**
072   * For MUST_PASS_ONE, we cannot make sure that when filter-A in filter list return NEXT_COL then
073   * the next cell passing to filterList will be the first cell in next column, because if filter-B
074   * in filter list return SKIP, then the filter list will return SKIP. In this case, we should pass
075   * the cell following the previous cell, and it's possible that the next cell has the same column
076   * as the previous cell even if filter-A has NEXT_COL returned for the previous cell. So we should
077   * save the previous cell and the return code list when checking previous cell for every filter in
078   * filter list, and verify if currentCell fit the previous return code, if fit then pass the
079   * currentCell to the corresponding filter. (HBASE-17678) <br>
080   * Note that: In StoreScanner level, NEXT_ROW will skip to the next row in current family, and in
081   * RegionScanner level, NEXT_ROW will skip to the next row in current family and switch to the
082   * next family for RegionScanner, INCLUDE_AND_NEXT_ROW is the same. so we should pass current cell
083   * to the filter, if row mismatch or row match but column family mismatch. (HBASE-18368)
084   * @see org.apache.hadoop.hbase.filter.Filter.ReturnCode
085   * @param subFilter which sub-filter to calculate the return code by using previous cell and
086   *          previous return code.
087   * @param prevCell the previous cell passed to given sub-filter.
088   * @param currentCell the current cell which will pass to given sub-filter.
089   * @param prevCode the previous return code for given sub-filter.
090   * @return return code calculated by using previous cell and previous return code. null means can
091   *         not decide which return code should return, so we will pass the currentCell to
092   *         subFilter for getting currentCell's return code, and it won't impact the sub-filter's
093   *         internal states.
094   */
095  private ReturnCode calculateReturnCodeByPrevCellAndRC(Filter subFilter, Cell currentCell,
096      Cell prevCell, ReturnCode prevCode) throws IOException {
097    if (prevCell == null || prevCode == null) {
098      return null;
099    }
100    switch (prevCode) {
101    case INCLUDE:
102    case SKIP:
103        return null;
104    case SEEK_NEXT_USING_HINT:
105        Cell nextHintCell = subFilter.getNextCellHint(prevCell);
106        return nextHintCell != null && compareCell(currentCell, nextHintCell) < 0
107          ? ReturnCode.SEEK_NEXT_USING_HINT : null;
108    case NEXT_COL:
109    case INCLUDE_AND_NEXT_COL:
110        // Once row changed, reset() will clear prevCells, so we need not to compare their rows
111        // because rows are the same here.
112        return CellUtil.matchingColumn(prevCell, currentCell) ? ReturnCode.NEXT_COL : null;
113    case NEXT_ROW:
114    case INCLUDE_AND_SEEK_NEXT_ROW:
115        // As described above, rows are definitely the same, so we only compare the family.
116        return CellUtil.matchingFamily(prevCell, currentCell) ? ReturnCode.NEXT_ROW : null;
117    default:
118        throw new IllegalStateException("Received code is not valid.");
119    }
120  }
121
122  /**
123   * FilterList with MUST_PASS_ONE choose the minimal forward step among sub-filter in filter list.
124   * Let's call it: The Minimal Step Rule. So if filter-A in filter list return INCLUDE and filter-B
125   * in filter list return INCLUDE_AND_NEXT_COL, then the filter list should return INCLUDE. For
126   * SEEK_NEXT_USING_HINT, it's more special, because we do not know how far it will forward, so we
127   * use SKIP by default.<br/>
128   * <br/>
129   * The jump step will be:
130   *
131   * <pre>
132   * INCLUDE &lt; SKIP &lt; INCLUDE_AND_NEXT_COL &lt; NEXT_COL &lt; INCLUDE_AND_SEEK_NEXT_ROW &lt; NEXT_ROW &lt; SEEK_NEXT_USING_HINT
133   * </pre>
134   *
135   * Here, we have the following map to describe The Minimal Step Rule. if current return code (for
136   * previous sub-filters in filter list) is <strong>ReturnCode</strong>, and current filter returns
137   * <strong>localRC</strong>, then we should return map[ReturnCode][localRC] for the merged result,
138   * according to The Minimal Step Rule.<br/>
139   *
140   * <pre>
141   * LocalCode\ReturnCode       INCLUDE INCLUDE_AND_NEXT_COL     INCLUDE_AND_SEEK_NEXT_ROW  SKIP      NEXT_COL              NEXT_ROW                  SEEK_NEXT_USING_HINT
142   * INCLUDE                    INCLUDE INCLUDE                  INCLUDE                    INCLUDE   INCLUDE               INCLUDE                   INCLUDE
143   * INCLUDE_AND_NEXT_COL       INCLUDE INCLUDE_AND_NEXT_COL     INCLUDE_AND_NEXT_COL       INCLUDE   INCLUDE_AND_NEXT_COL  INCLUDE_AND_NEXT_COL      INCLUDE
144   * INCLUDE_AND_SEEK_NEXT_ROW  INCLUDE INCLUDE_AND_NEXT_COL     INCLUDE_AND_SEEK_NEXT_ROW  INCLUDE   INCLUDE_AND_NEXT_COL  INCLUDE_AND_SEEK_NEXT_ROW INCLUDE
145   * SKIP                       INCLUDE INCLUDE                  INCLUDE                    SKIP      SKIP                  SKIP                      SKIP
146   * NEXT_COL                   INCLUDE INCLUDE_AND_NEXT_COL     INCLUDE_AND_NEXT_COL       SKIP      NEXT_COL              NEXT_COL                  SKIP
147   * NEXT_ROW                   INCLUDE INCLUDE_AND_NEXT_COL     INCLUDE_AND_SEEK_NEXT_ROW  SKIP      NEXT_COL              NEXT_ROW                  SKIP
148   * SEEK_NEXT_USING_HINT       INCLUDE INCLUDE                  INCLUDE                    SKIP      SKIP                  SKIP                      SEEK_NEXT_USING_HINT
149   * </pre>
150   *
151   * @param rc Return code which is calculated by previous sub-filter(s) in filter list.
152   * @param localRC Return code of the current sub-filter in filter list.
153   * @return Return code which is merged by the return code of previous sub-filter(s) and the return
154   *         code of current sub-filter.
155   */
156  private ReturnCode mergeReturnCode(ReturnCode rc, ReturnCode localRC) {
157    if (rc == null) return localRC;
158    switch (localRC) {
159    case INCLUDE:
160      return ReturnCode.INCLUDE;
161    case INCLUDE_AND_NEXT_COL:
162      if (isInReturnCodes(rc, ReturnCode.INCLUDE, ReturnCode.SKIP,
163        ReturnCode.SEEK_NEXT_USING_HINT)) {
164        return ReturnCode.INCLUDE;
165      }
166      if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_NEXT_COL, ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW,
167        ReturnCode.NEXT_COL, ReturnCode.NEXT_ROW)) {
168        return ReturnCode.INCLUDE_AND_NEXT_COL;
169      }
170      break;
171    case INCLUDE_AND_SEEK_NEXT_ROW:
172      if (isInReturnCodes(rc, ReturnCode.INCLUDE, ReturnCode.SKIP,
173        ReturnCode.SEEK_NEXT_USING_HINT)) {
174        return ReturnCode.INCLUDE;
175      }
176      if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_NEXT_COL, ReturnCode.NEXT_COL)) {
177        return ReturnCode.INCLUDE_AND_NEXT_COL;
178      }
179      if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW, ReturnCode.NEXT_ROW)) {
180        return ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW;
181      }
182      break;
183    case SKIP:
184      if (isInReturnCodes(rc, ReturnCode.INCLUDE, ReturnCode.INCLUDE_AND_NEXT_COL,
185        ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW)) {
186        return ReturnCode.INCLUDE;
187      }
188      if (isInReturnCodes(rc, ReturnCode.SKIP, ReturnCode.NEXT_COL, ReturnCode.NEXT_ROW,
189        ReturnCode.SEEK_NEXT_USING_HINT)) {
190        return ReturnCode.SKIP;
191      }
192      break;
193    case NEXT_COL:
194      if (isInReturnCodes(rc, ReturnCode.INCLUDE)) {
195        return ReturnCode.INCLUDE;
196      }
197      if (isInReturnCodes(rc, ReturnCode.NEXT_COL, ReturnCode.NEXT_ROW)) {
198        return ReturnCode.NEXT_COL;
199      }
200      if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_NEXT_COL,
201        ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW)) {
202        return ReturnCode.INCLUDE_AND_NEXT_COL;
203      }
204      if (isInReturnCodes(rc, ReturnCode.SKIP, ReturnCode.SEEK_NEXT_USING_HINT)) {
205        return ReturnCode.SKIP;
206      }
207      break;
208    case NEXT_ROW:
209      if (isInReturnCodes(rc, ReturnCode.INCLUDE)) {
210        return ReturnCode.INCLUDE;
211      }
212      if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_NEXT_COL)) {
213        return ReturnCode.INCLUDE_AND_NEXT_COL;
214      }
215      if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW)) {
216        return ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW;
217      }
218      if (isInReturnCodes(rc, ReturnCode.SKIP, ReturnCode.SEEK_NEXT_USING_HINT)) {
219        return ReturnCode.SKIP;
220      }
221      if (isInReturnCodes(rc, ReturnCode.NEXT_COL)) {
222        return ReturnCode.NEXT_COL;
223      }
224      if (isInReturnCodes(rc, ReturnCode.NEXT_ROW)) {
225        return ReturnCode.NEXT_ROW;
226      }
227      break;
228    case SEEK_NEXT_USING_HINT:
229      if (isInReturnCodes(rc, ReturnCode.INCLUDE, ReturnCode.INCLUDE_AND_NEXT_COL,
230        ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW)) {
231        return ReturnCode.INCLUDE;
232      }
233      if (isInReturnCodes(rc, ReturnCode.SKIP, ReturnCode.NEXT_COL, ReturnCode.NEXT_ROW)) {
234        return ReturnCode.SKIP;
235      }
236      if (isInReturnCodes(rc, ReturnCode.SEEK_NEXT_USING_HINT)) {
237        return ReturnCode.SEEK_NEXT_USING_HINT;
238      }
239      break;
240    }
241    throw new IllegalStateException(
242        "Received code is not valid. rc: " + rc + ", localRC: " + localRC);
243  }
244
245  private void updatePrevFilterRCList(int index, ReturnCode currentRC) {
246    prevFilterRCList.set(index, currentRC);
247  }
248
249  private void updatePrevCellList(int index, Cell currentCell, ReturnCode currentRC) {
250    if (currentCell == null || currentRC == ReturnCode.INCLUDE || currentRC == ReturnCode.SKIP) {
251      // If previous return code is INCLUDE or SKIP, we should always pass the next cell to the
252      // corresponding sub-filter(need not test calculateReturnCodeByPrevCellAndRC() method), So we
253      // need not save current cell to prevCellList for saving heap memory.
254      prevCellList.set(index, null);
255    } else {
256      prevCellList.set(index, currentCell);
257    }
258  }
259
260  @Override
261  public ReturnCode filterCell(Cell c) throws IOException {
262    if (isEmpty()) {
263      return ReturnCode.INCLUDE;
264    }
265    ReturnCode rc = null;
266    for (int i = 0, n = filters.size(); i < n; i++) {
267      Filter filter = filters.get(i);
268      subFiltersIncludedCell.set(i, false);
269
270      Cell prevCell = this.prevCellList.get(i);
271      ReturnCode prevCode = this.prevFilterRCList.get(i);
272      if (filter.filterAllRemaining()) {
273        continue;
274      }
275      ReturnCode localRC = calculateReturnCodeByPrevCellAndRC(filter, c, prevCell, prevCode);
276      if (localRC == null) {
277        // Can not get return code based on previous cell and previous return code. In other words,
278        // we should pass the current cell to this sub-filter to get the return code, and it won't
279        // impact the sub-filter's internal state.
280        localRC = filter.filterCell(c);
281      }
282
283      // Update previous return code and previous cell for filter[i].
284      updatePrevFilterRCList(i, localRC);
285      updatePrevCellList(i, c, localRC);
286
287      rc = mergeReturnCode(rc, localRC);
288
289      // For INCLUDE* case, we need to update the transformed cell.
290      if (isInReturnCodes(localRC, ReturnCode.INCLUDE, ReturnCode.INCLUDE_AND_NEXT_COL,
291        ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW)) {
292        subFiltersIncludedCell.set(i, true);
293      }
294    }
295    // Each sub-filter in filter list got true for filterAllRemaining(), if rc is null, so we should
296    // return SKIP.
297    return rc == null ? ReturnCode.SKIP : rc;
298  }
299
300  @Override
301  public void reset() throws IOException {
302    for (int i = 0, n = filters.size(); i < n; i++) {
303      filters.get(i).reset();
304      subFiltersIncludedCell.set(i, false);
305      prevFilterRCList.set(i, null);
306      prevCellList.set(i, null);
307    }
308  }
309
310  @Override
311  public boolean filterRowKey(byte[] rowKey, int offset, int length) throws IOException {
312    if (isEmpty()) {
313      return super.filterRowKey(rowKey, offset, length);
314    }
315    boolean retVal = true;
316    for (int i = 0, n = filters.size(); i < n; i++) {
317      Filter filter = filters.get(i);
318      if (!filter.filterAllRemaining() && !filter.filterRowKey(rowKey, offset, length)) {
319        retVal = false;
320      }
321    }
322    return retVal;
323  }
324
325  @Override
326  public boolean filterRowKey(Cell firstRowCell) throws IOException {
327    if (isEmpty()) {
328      return super.filterRowKey(firstRowCell);
329    }
330    boolean retVal = true;
331    for (int i = 0, n = filters.size(); i < n; i++) {
332      Filter filter = filters.get(i);
333      if (!filter.filterAllRemaining() && !filter.filterRowKey(firstRowCell)) {
334        // Can't just return false here, because there are some filters (such as PrefixFilter) which
335        // will catch the row changed event by filterRowKey(). If we return early here, those
336        // filters will have no chance to update their row state.
337        retVal = false;
338      }
339    }
340    return retVal;
341  }
342
343  @Override
344  public boolean filterAllRemaining() throws IOException {
345    if (isEmpty()) {
346      return super.filterAllRemaining();
347    }
348    for (int i = 0, n = filters.size(); i < n; i++) {
349      if (!filters.get(i).filterAllRemaining()) {
350        return false;
351      }
352    }
353    return true;
354  }
355
356  @Override
357  public boolean filterRow() throws IOException {
358    if (isEmpty()) {
359      return super.filterRow();
360    }
361    for (int i = 0, n = filters.size(); i < n; i++) {
362      Filter filter = filters.get(i);
363      if (!filter.filterRow()) {
364        return false;
365      }
366    }
367    return true;
368  }
369
370  @Override
371  public Cell getNextCellHint(Cell currentCell) throws IOException {
372    if (isEmpty()) {
373      return super.getNextCellHint(currentCell);
374    }
375    Cell minKeyHint = null;
376    // If any condition can pass, we need to keep the min hint
377    for (int i = 0, n = filters.size(); i < n; i++) {
378      if (filters.get(i).filterAllRemaining()) {
379        continue;
380      }
381      Cell curKeyHint = filters.get(i).getNextCellHint(currentCell);
382      if (curKeyHint == null) {
383        // If we ever don't have a hint and this is must-pass-one, then no hint
384        return null;
385      }
386      // If this is the first hint we find, set it
387      if (minKeyHint == null) {
388        minKeyHint = curKeyHint;
389        continue;
390      }
391      if (this.compareCell(minKeyHint, curKeyHint) > 0) {
392        minKeyHint = curKeyHint;
393      }
394    }
395    return minKeyHint;
396  }
397
398
399  @Override
400  public boolean equals(Object obj) {
401    if (obj == null || (!(obj instanceof FilterListWithOR))) {
402      return false;
403    }
404    if (this == obj) {
405      return true;
406    }
407    FilterListWithOR f = (FilterListWithOR) obj;
408    return this.filters.equals(f.getFilters()) &&
409      this.prevFilterRCList.equals(f.prevFilterRCList) &&
410      this.prevCellList.equals(f.prevCellList);
411  }
412
413  @Override
414  public int hashCode() {
415    return Objects.hash(this.prevFilterRCList, this.prevCellList, this.filters);
416  }
417}