001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.filter;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.Comparator;
023import java.util.Objects;
024import java.util.TreeSet;
025import org.apache.hadoop.hbase.Cell;
026import org.apache.hadoop.hbase.CellUtil;
027import org.apache.hadoop.hbase.PrivateCellUtil;
028import org.apache.hadoop.hbase.exceptions.DeserializationException;
029import org.apache.hadoop.hbase.util.Bytes;
030import org.apache.yetus.audience.InterfaceAudience;
031import org.slf4j.Logger;
032import org.slf4j.LoggerFactory;
033
034import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException;
035import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations;
036
037import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos;
038
039/**
040 * This filter is used for selecting only those keys with columns that match any of the given
041 * prefixes. For example, if prefixes are 'an' and 'ba', it will pass keys with columns like 'and',
042 * 'anti', 'ball' but not keys with columns like 'cat', 'act'. The prefixes are stored in a sorted
043 * set and the filter uses seek hints to efficiently skip columns that do not match any prefix.
044 */
045@InterfaceAudience.Public
046public class MultipleColumnPrefixFilter extends FilterBase implements HintingFilter {
047  private static final Logger LOG = LoggerFactory.getLogger(MultipleColumnPrefixFilter.class);
048  protected byte[] hint = null;
049  protected TreeSet<byte[]> sortedPrefixes = createTreeSet();
050  private final static int MAX_LOG_PREFIXES = 5;
051
052  public MultipleColumnPrefixFilter(final byte[][] prefixes) {
053    if (prefixes != null) {
054      for (byte[] prefix : prefixes) {
055        if (!sortedPrefixes.add(prefix)) {
056          LOG.error("prefix {} is repeated", Bytes.toString(prefix));
057          throw new IllegalArgumentException("prefixes must be distinct");
058        }
059      }
060    }
061  }
062
063  public byte[][] getPrefix() {
064    int count = 0;
065    byte[][] temp = new byte[sortedPrefixes.size()][];
066    for (byte[] prefixes : sortedPrefixes) {
067      temp[count++] = prefixes;
068    }
069    return temp;
070  }
071
072  @Override
073  public boolean filterRowKey(Cell cell) throws IOException {
074    // Impl in FilterBase might do unnecessary copy for Off heap backed Cells.
075    return false;
076  }
077
078  @Override
079  public ReturnCode filterCell(final Cell c) {
080    if (sortedPrefixes.isEmpty()) {
081      return ReturnCode.INCLUDE;
082    } else {
083      return filterColumn(c);
084    }
085  }
086
087  public ReturnCode filterColumn(Cell cell) {
088    byte[] qualifier = CellUtil.cloneQualifier(cell);
089    TreeSet<byte[]> lesserOrEqualPrefixes =
090      (TreeSet<byte[]>) sortedPrefixes.headSet(qualifier, true);
091
092    if (lesserOrEqualPrefixes.size() != 0) {
093      byte[] largestPrefixSmallerThanQualifier = lesserOrEqualPrefixes.last();
094
095      if (Bytes.startsWith(qualifier, largestPrefixSmallerThanQualifier)) {
096        return ReturnCode.INCLUDE;
097      }
098
099      if (lesserOrEqualPrefixes.size() == sortedPrefixes.size()) {
100        return ReturnCode.NEXT_ROW;
101      } else {
102        hint = sortedPrefixes.higher(largestPrefixSmallerThanQualifier);
103        return ReturnCode.SEEK_NEXT_USING_HINT;
104      }
105    } else {
106      hint = sortedPrefixes.first();
107      return ReturnCode.SEEK_NEXT_USING_HINT;
108    }
109  }
110
111  public static Filter createFilterFromArguments(ArrayList<byte[]> filterArguments) {
112    byte[][] prefixes = new byte[filterArguments.size()][];
113    for (int i = 0; i < filterArguments.size(); i++) {
114      byte[] columnPrefix = ParseFilter.removeQuotesFromByteArray(filterArguments.get(i));
115      prefixes[i] = columnPrefix;
116    }
117    return new MultipleColumnPrefixFilter(prefixes);
118  }
119
120  /** Returns The filter serialized using pb */
121  @Override
122  public byte[] toByteArray() {
123    FilterProtos.MultipleColumnPrefixFilter.Builder builder =
124      FilterProtos.MultipleColumnPrefixFilter.newBuilder();
125    for (byte[] element : sortedPrefixes) {
126      if (element != null) builder.addSortedPrefixes(UnsafeByteOperations.unsafeWrap(element));
127    }
128    return builder.build().toByteArray();
129  }
130
131  /**
132   * Parse a serialized representation of {@link MultipleColumnPrefixFilter}
133   * @param pbBytes A pb serialized {@link MultipleColumnPrefixFilter} instance
134   * @return An instance of {@link MultipleColumnPrefixFilter} made from <code>bytes</code>
135   * @throws DeserializationException if an error occurred
136   * @see #toByteArray
137   */
138  public static MultipleColumnPrefixFilter parseFrom(final byte[] pbBytes)
139    throws DeserializationException {
140    FilterProtos.MultipleColumnPrefixFilter proto;
141    try {
142      proto = FilterProtos.MultipleColumnPrefixFilter.parseFrom(pbBytes);
143    } catch (InvalidProtocolBufferException e) {
144      throw new DeserializationException(e);
145    }
146    int numPrefixes = proto.getSortedPrefixesCount();
147    byte[][] prefixes = new byte[numPrefixes][];
148    for (int i = 0; i < numPrefixes; ++i) {
149      prefixes[i] = proto.getSortedPrefixes(i).toByteArray();
150    }
151
152    return new MultipleColumnPrefixFilter(prefixes);
153  }
154
155  /**
156   * Returns true if and only if the fields of the filter that are serialized are equal to the
157   * corresponding fields in other. Used for testing.
158   */
159  @Override
160  boolean areSerializedFieldsEqual(Filter o) {
161    if (o == this) {
162      return true;
163    }
164    if (!(o instanceof MultipleColumnPrefixFilter)) {
165      return false;
166    }
167    MultipleColumnPrefixFilter other = (MultipleColumnPrefixFilter) o;
168    return this.sortedPrefixes.equals(other.sortedPrefixes);
169  }
170
171  @Override
172  public Cell getNextCellHint(Cell cell) {
173    return PrivateCellUtil.createFirstOnRowCol(cell, hint, 0, hint.length);
174  }
175
176  public TreeSet<byte[]> createTreeSet() {
177    return new TreeSet<>(new Comparator<Object>() {
178      @Override
179      public int compare(Object o1, Object o2) {
180        if (o1 == null || o2 == null) throw new IllegalArgumentException("prefixes can't be null");
181
182        byte[] b1 = (byte[]) o1;
183        byte[] b2 = (byte[]) o2;
184        return Bytes.compareTo(b1, 0, b1.length, b2, 0, b2.length);
185      }
186    });
187  }
188
189  @Override
190  public String toString() {
191    return toString(MAX_LOG_PREFIXES);
192  }
193
194  protected String toString(int maxPrefixes) {
195    StringBuilder prefixes = new StringBuilder();
196
197    int count = 0;
198    for (byte[] ba : this.sortedPrefixes) {
199      if (count >= maxPrefixes) {
200        break;
201      }
202      ++count;
203      prefixes.append(Bytes.toStringBinary(ba));
204      if (count < this.sortedPrefixes.size() && count < maxPrefixes) {
205        prefixes.append(", ");
206      }
207    }
208
209    return String.format("%s (%d/%d): [%s]", this.getClass().getSimpleName(), count,
210      this.sortedPrefixes.size(), prefixes.toString());
211  }
212
213  @Override
214  public boolean equals(Object obj) {
215    return obj instanceof Filter && areSerializedFieldsEqual((Filter) obj);
216  }
217
218  @Override
219  public int hashCode() {
220    return Objects.hash(this.sortedPrefixes);
221  }
222}