001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.filter;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.Collections;
023import java.util.List;
024import java.util.Objects;
025import org.apache.hadoop.hbase.Cell;
026import org.apache.yetus.audience.InterfaceAudience;
027
028/**
029 * FilterListWithAND represents an ordered list of filters which will be evaluated with an AND
030 * operator.
031 */
032@InterfaceAudience.Private
033public class FilterListWithAND extends FilterListBase {
034
035  private List<Filter> seekHintFilters = new ArrayList<>();
036  private boolean[] hintingFilters;
037
038  public FilterListWithAND(List<Filter> filters) {
039    super(filters);
040    // For FilterList with AND, when call FL's transformCell(), we should transform cell for all
041    // sub-filters (because all sub-filters return INCLUDE*). So here, fill this array with true. we
042    // keep this in FilterListWithAND for abstracting the transformCell() in FilterListBase.
043    subFiltersIncludedCell = new ArrayList<>(Collections.nCopies(filters.size(), true));
044    cacheHintingFilters();
045  }
046
047  @Override
048  public void addFilterLists(List<Filter> filters) {
049    if (checkAndGetReversed(filters, isReversed()) != isReversed()) {
050      throw new IllegalArgumentException("Filters in the list must have the same reversed flag");
051    }
052    this.filters.addAll(filters);
053    this.subFiltersIncludedCell.addAll(Collections.nCopies(filters.size(), true));
054    this.cacheHintingFilters();
055  }
056
057  @Override
058  protected String formatLogFilters(List<Filter> logFilters) {
059    return String.format("FilterList AND (%d/%d): %s", logFilters.size(), this.size(),
060      logFilters.toString());
061  }
062
063  /**
064   * As checks for this are in the hot path, we want them as fast as possible, so we are caching the
065   * status in an array.
066   */
067  private void cacheHintingFilters() {
068    int filtersSize = filters.size();
069    hintingFilters = new boolean[filtersSize];
070    for (int i = 0; i < filtersSize; i++) {
071      if (filters.get(i) instanceof HintingFilter) {
072        hintingFilters[i] = true;
073      }
074    }
075  }
076
077  /**
078   * FilterList with MUST_PASS_ALL choose the maximal forward step among sub-filters in filter list.
079   * Let's call it: The Maximal Step Rule. So if filter-A in filter list return INCLUDE and filter-B
080   * in filter list return INCLUDE_AND_NEXT_COL, then the filter list should return
081   * INCLUDE_AND_NEXT_COL. For SEEK_NEXT_USING_HINT, it's more special, and in method
082   * filterCellWithMustPassAll(), if any sub-filter return SEEK_NEXT_USING_HINT, then our filter
083   * list will return SEEK_NEXT_USING_HINT. so we don't care about the SEEK_NEXT_USING_HINT here.
084   * <br/>
085   * <br/>
086   * The jump step will be:
087   *
088   * <pre>
089   * INCLUDE &lt; SKIP &lt; INCLUDE_AND_NEXT_COL &lt; NEXT_COL &lt; INCLUDE_AND_SEEK_NEXT_ROW &lt; NEXT_ROW
090   *     &lt; SEEK_NEXT_USING_HINT
091   * </pre>
092   *
093   * Here, we have the following map to describe The Maximal Step Rule. if current return code (for
094   * previous sub-filters in filter list) is <strong>ReturnCode</strong>, and current filter returns
095   * <strong>localRC</strong>, then we should return map[ReturnCode][localRC] for the merged result,
096   * according to The Maximal Step Rule. <br/>
097   *
098   * <pre>
099   * LocalCode\ReturnCode       INCLUDE                    INCLUDE_AND_NEXT_COL      INCLUDE_AND_SEEK_NEXT_ROW  SKIP                  NEXT_COL              NEXT_ROW              SEEK_NEXT_USING_HINT
100   * INCLUDE                    INCLUDE                    INCLUDE_AND_NEXT_COL      INCLUDE_AND_SEEK_NEXT_ROW  SKIP                  NEXT_COL              NEXT_ROW              SEEK_NEXT_USING_HINT
101   * INCLUDE_AND_NEXT_COL       INCLUDE_AND_NEXT_COL       INCLUDE_AND_NEXT_COL      INCLUDE_AND_SEEK_NEXT_ROW  NEXT_COL              NEXT_COL              NEXT_ROW              SEEK_NEXT_USING_HINT
102   * INCLUDE_AND_SEEK_NEXT_ROW  INCLUDE_AND_SEEK_NEXT_ROW  INCLUDE_AND_SEEK_NEXT_ROW INCLUDE_AND_SEEK_NEXT_ROW  NEXT_ROW              NEXT_ROW              NEXT_ROW              SEEK_NEXT_USING_HINT
103   * SKIP                       SKIP                       NEXT_COL                  NEXT_ROW                   SKIP                  NEXT_COL              NEXT_ROW              SEEK_NEXT_USING_HINT
104   * NEXT_COL                   NEXT_COL                   NEXT_COL                  NEXT_ROW                   NEXT_COL              NEXT_COL              NEXT_ROW              SEEK_NEXT_USING_HINT
105   * NEXT_ROW                   NEXT_ROW                   NEXT_ROW                  NEXT_ROW                   NEXT_ROW              NEXT_ROW              NEXT_ROW              SEEK_NEXT_USING_HINT
106   * SEEK_NEXT_USING_HINT       SEEK_NEXT_USING_HINT       SEEK_NEXT_USING_HINT      SEEK_NEXT_USING_HINT       SEEK_NEXT_USING_HINT  SEEK_NEXT_USING_HINT  SEEK_NEXT_USING_HINT  SEEK_NEXT_USING_HINT
107   * </pre>
108   *
109   * @param rc      Return code which is calculated by previous sub-filter(s) in filter list.
110   * @param localRC Return code of the current sub-filter in filter list.
111   * @return Return code which is merged by the return code of previous sub-filter(s) and the return
112   *         code of current sub-filter.
113   */
114  private ReturnCode mergeReturnCode(ReturnCode rc, ReturnCode localRC) {
115    if (rc == ReturnCode.SEEK_NEXT_USING_HINT) {
116      return ReturnCode.SEEK_NEXT_USING_HINT;
117    }
118    switch (localRC) {
119      case SEEK_NEXT_USING_HINT:
120        return ReturnCode.SEEK_NEXT_USING_HINT;
121      case INCLUDE:
122        return rc;
123      case INCLUDE_AND_NEXT_COL:
124        if (isInReturnCodes(rc, ReturnCode.INCLUDE, ReturnCode.INCLUDE_AND_NEXT_COL)) {
125          return ReturnCode.INCLUDE_AND_NEXT_COL;
126        }
127        if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW)) {
128          return ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW;
129        }
130        if (isInReturnCodes(rc, ReturnCode.SKIP, ReturnCode.NEXT_COL)) {
131          return ReturnCode.NEXT_COL;
132        }
133        if (isInReturnCodes(rc, ReturnCode.NEXT_ROW)) {
134          return ReturnCode.NEXT_ROW;
135        }
136        break;
137      case INCLUDE_AND_SEEK_NEXT_ROW:
138        if (
139          isInReturnCodes(rc, ReturnCode.INCLUDE, ReturnCode.INCLUDE_AND_NEXT_COL,
140            ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW)
141        ) {
142          return ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW;
143        }
144        if (isInReturnCodes(rc, ReturnCode.SKIP, ReturnCode.NEXT_COL, ReturnCode.NEXT_ROW)) {
145          return ReturnCode.NEXT_ROW;
146        }
147        break;
148      case SKIP:
149        if (isInReturnCodes(rc, ReturnCode.INCLUDE, ReturnCode.SKIP)) {
150          return ReturnCode.SKIP;
151        }
152        if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_NEXT_COL, ReturnCode.NEXT_COL)) {
153          return ReturnCode.NEXT_COL;
154        }
155        if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW, ReturnCode.NEXT_ROW)) {
156          return ReturnCode.NEXT_ROW;
157        }
158        break;
159      case NEXT_COL:
160        if (
161          isInReturnCodes(rc, ReturnCode.INCLUDE, ReturnCode.INCLUDE_AND_NEXT_COL, ReturnCode.SKIP,
162            ReturnCode.NEXT_COL)
163        ) {
164          return ReturnCode.NEXT_COL;
165        }
166        if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW, ReturnCode.NEXT_ROW)) {
167          return ReturnCode.NEXT_ROW;
168        }
169        break;
170      case NEXT_ROW:
171        return ReturnCode.NEXT_ROW;
172    }
173    throw new IllegalStateException(
174      "Received code is not valid. rc: " + rc + ", localRC: " + localRC);
175  }
176
177  private boolean isIncludeRelatedReturnCode(ReturnCode rc) {
178    return isInReturnCodes(rc, ReturnCode.INCLUDE, ReturnCode.INCLUDE_AND_NEXT_COL,
179      ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW);
180  }
181
182  @Override
183  public ReturnCode filterCell(Cell c) throws IOException {
184    if (isEmpty()) {
185      return ReturnCode.INCLUDE;
186    }
187    ReturnCode rc = ReturnCode.INCLUDE;
188    this.seekHintFilters.clear();
189    int i = 0;
190    int n = filters.size();
191    for (; i < n; i++) {
192      Filter filter = filters.get(i);
193      if (filter.filterAllRemaining()) {
194        rc = ReturnCode.NEXT_ROW;
195        // See comment right after this loop
196        break;
197      }
198      ReturnCode localRC;
199      localRC = filter.filterCell(c);
200      if (localRC == ReturnCode.SEEK_NEXT_USING_HINT) {
201        seekHintFilters.add(filter);
202      }
203      rc = mergeReturnCode(rc, localRC);
204      // Only when rc is INCLUDE* case, we should pass the cell to the following sub-filters.
205      // otherwise we may mess up the global state (such as offset, count..) in the following
206      // sub-filters. (HBASE-20565)
207      if (!isIncludeRelatedReturnCode(rc)) {
208        // See comment right after this loop
209        break;
210      }
211    }
212    // We have the preliminary return code. However, if there are remaining uncalled hintingFilters,
213    // they may return hints that allow us to seek ahead and skip reading and processing a lot of
214    // cells.
215    // Process the remaining hinting filters so that we can get all seek hints.
216    // The farthest key is computed in getNextCellHint()
217    if (++i < n) {
218      for (; i < n; i++) {
219        if (hintingFilters[i]) {
220          Filter filter = filters.get(i);
221          if (filter.filterCell(c) == ReturnCode.SEEK_NEXT_USING_HINT) {
222            seekHintFilters.add(filter);
223          }
224        }
225      }
226    }
227
228    if (!seekHintFilters.isEmpty()) {
229      return ReturnCode.SEEK_NEXT_USING_HINT;
230    }
231    return rc;
232  }
233
234  @Override
235  public void reset() throws IOException {
236    for (int i = 0, n = filters.size(); i < n; i++) {
237      filters.get(i).reset();
238    }
239    seekHintFilters.clear();
240  }
241
242  @Override
243  public boolean filterRowKey(Cell firstRowCell) throws IOException {
244    if (isEmpty()) {
245      return super.filterRowKey(firstRowCell);
246    }
247    boolean anyRowKeyFiltered = false;
248    boolean anyHintingPassed = false;
249    for (int i = 0, n = filters.size(); i < n; i++) {
250      Filter filter = filters.get(i);
251      if (filter.filterAllRemaining()) {
252        // We don't need to care about any later filters, as we end the scan immediately.
253        // TODO HBASE-28633 in the normal code path, filterAllRemaining() always gets checked
254        // before filterRowKey(). We should be able to remove this check.
255        return true;
256      } else if (filter.filterRowKey(firstRowCell)) {
257        // Can't just return true here, because there are some filters (such as PrefixFilter) which
258        // will catch the row changed event by filterRowKey(). If we return early here, those
259        // filters will have no chance to update their row state.
260        anyRowKeyFiltered = true;
261      } else if (hintingFilters[i]) {
262        // If filterRowKey returns false and this is a hinting filter, then we must not filter this
263        // rowkey.
264        // Otherwise this sub-filter doesn't get a chance to provide a seek hint, and the scan may
265        // regress into a full scan.
266        anyHintingPassed = true;
267      }
268    }
269    return anyRowKeyFiltered && !anyHintingPassed;
270  }
271
272  @Override
273  public boolean filterAllRemaining() throws IOException {
274    if (isEmpty()) {
275      return super.filterAllRemaining();
276    }
277    for (int i = 0, n = filters.size(); i < n; i++) {
278      if (filters.get(i).filterAllRemaining()) {
279        return true;
280      }
281    }
282    return false;
283  }
284
285  @Override
286  public boolean filterRow() throws IOException {
287    if (isEmpty()) {
288      return super.filterRow();
289    }
290    for (int i = 0, n = filters.size(); i < n; i++) {
291      Filter filter = filters.get(i);
292      if (filter.filterRow()) {
293        return true;
294      }
295    }
296    return false;
297  }
298
299  @Override
300  public Cell getNextCellHint(Cell currentCell) throws IOException {
301    if (isEmpty()) {
302      return super.getNextCellHint(currentCell);
303    }
304    Cell maxHint = null;
305    for (Filter filter : seekHintFilters) {
306      if (filter.filterAllRemaining()) {
307        continue;
308      }
309      Cell curKeyHint = filter.getNextCellHint(currentCell);
310      if (maxHint == null) {
311        maxHint = curKeyHint;
312        continue;
313      }
314      if (this.compareCell(maxHint, curKeyHint) < 0) {
315        maxHint = curKeyHint;
316      }
317    }
318    return maxHint;
319  }
320
321  @Override
322  public boolean equals(Object obj) {
323    if (this == obj) {
324      return true;
325    }
326    if (!(obj instanceof FilterListWithAND)) {
327      return false;
328    }
329    FilterListWithAND f = (FilterListWithAND) obj;
330    return this.filters.equals(f.getFilters()) && this.seekHintFilters.equals(f.seekHintFilters);
331  }
332
333  @Override
334  public int hashCode() {
335    return Objects.hash(this.seekHintFilters, this.filters);
336  }
337}