View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.filter;
19  
20  import com.google.protobuf.InvalidProtocolBufferException;
21  import org.apache.hadoop.hbase.classification.InterfaceAudience;
22  import org.apache.hadoop.hbase.classification.InterfaceStability;
23  import org.apache.hadoop.hbase.Cell;
24  import org.apache.hadoop.hbase.KeyValueUtil;
25  import org.apache.hadoop.hbase.exceptions.DeserializationException;
26  import org.apache.hadoop.hbase.protobuf.generated.FilterProtos;
27  import org.apache.hadoop.hbase.util.ByteStringer;
28  import org.apache.hadoop.hbase.util.Bytes;
29  
30  import java.util.ArrayList;
31  import java.util.Arrays;
32  import java.util.Comparator;
33  import java.util.TreeSet;
34  
35  /**
36   * This filter is used for selecting only those keys with columns that matches
37   * a particular prefix. For example, if prefix is 'an', it will pass keys will
38   * columns like 'and', 'anti' but not keys with columns like 'ball', 'act'.
39   */
40  @InterfaceAudience.Public
41  @InterfaceStability.Stable
42  public class MultipleColumnPrefixFilter extends FilterBase {
43    protected byte [] hint = null;
44    protected TreeSet<byte []> sortedPrefixes = createTreeSet();
45    private final static int MAX_LOG_PREFIXES = 5;
46  
47    public MultipleColumnPrefixFilter(final byte [][] prefixes) {
48      if (prefixes != null) {
49        for (int i = 0; i < prefixes.length; i++) {
50          if (!sortedPrefixes.add(prefixes[i]))
51            throw new IllegalArgumentException ("prefixes must be distinct");
52        }
53      }
54    }
55  
56    public byte [][] getPrefix() {
57      int count = 0;
58      byte [][] temp = new byte [sortedPrefixes.size()][];
59      for (byte [] prefixes : sortedPrefixes) {
60        temp [count++] = prefixes;
61      }
62      return temp;
63    }
64  
65    @Override
66    public ReturnCode filterKeyValue(Cell kv) {
67      if (sortedPrefixes.size() == 0 || kv.getQualifierArray() == null) {
68        return ReturnCode.INCLUDE;
69      } else {
70        return filterColumn(kv.getQualifierArray(), kv.getQualifierOffset(), kv.getQualifierLength());
71      }
72    }
73  
74    // Override here explicitly as the method in super class FilterBase might do a KeyValue recreate.
75    // See HBASE-12068
76    @Override
77    public Cell transformCell(Cell v) {
78      return v;
79    }
80  
81    public ReturnCode filterColumn(byte[] buffer, int qualifierOffset, int qualifierLength) {
82      byte [] qualifier = Arrays.copyOfRange(buffer, qualifierOffset,
83                                             qualifierLength + qualifierOffset);
84      TreeSet<byte []> lesserOrEqualPrefixes =
85        (TreeSet<byte []>) sortedPrefixes.headSet(qualifier, true);
86  
87      if (lesserOrEqualPrefixes.size() != 0) {
88        byte [] largestPrefixSmallerThanQualifier = lesserOrEqualPrefixes.last();
89        
90        if (Bytes.startsWith(qualifier, largestPrefixSmallerThanQualifier)) {
91          return ReturnCode.INCLUDE;
92        }
93        
94        if (lesserOrEqualPrefixes.size() == sortedPrefixes.size()) {
95          return ReturnCode.NEXT_ROW;
96        } else {
97          hint = sortedPrefixes.higher(largestPrefixSmallerThanQualifier);
98          return ReturnCode.SEEK_NEXT_USING_HINT;
99        }
100     } else {
101       hint = sortedPrefixes.first();
102       return ReturnCode.SEEK_NEXT_USING_HINT;
103     }
104   }
105 
106   public static Filter createFilterFromArguments(ArrayList<byte []> filterArguments) {
107     byte [][] prefixes = new byte [filterArguments.size()][];
108     for (int i = 0 ; i < filterArguments.size(); i++) {
109       byte [] columnPrefix = ParseFilter.removeQuotesFromByteArray(filterArguments.get(i));
110       prefixes[i] = columnPrefix;
111     }
112     return new MultipleColumnPrefixFilter(prefixes);
113   }
114 
115   /**
116    * @return The filter serialized using pb
117    */
118   public byte [] toByteArray() {
119     FilterProtos.MultipleColumnPrefixFilter.Builder builder =
120       FilterProtos.MultipleColumnPrefixFilter.newBuilder();
121     for (byte [] element : sortedPrefixes) {
122       if (element != null) builder.addSortedPrefixes(ByteStringer.wrap(element));
123     }
124     return builder.build().toByteArray();
125   }
126 
127   /**
128    * @param pbBytes A pb serialized {@link MultipleColumnPrefixFilter} instance
129    * @return An instance of {@link MultipleColumnPrefixFilter} made from <code>bytes</code>
130    * @throws DeserializationException
131    * @see #toByteArray
132    */
133   public static MultipleColumnPrefixFilter parseFrom(final byte [] pbBytes)
134   throws DeserializationException {
135     FilterProtos.MultipleColumnPrefixFilter proto;
136     try {
137       proto = FilterProtos.MultipleColumnPrefixFilter.parseFrom(pbBytes);
138     } catch (InvalidProtocolBufferException e) {
139       throw new DeserializationException(e);
140     }
141     int numPrefixes = proto.getSortedPrefixesCount();
142     byte [][] prefixes = new byte[numPrefixes][];
143     for (int i = 0; i < numPrefixes; ++i) {
144       prefixes[i] = proto.getSortedPrefixes(i).toByteArray();
145     }
146 
147     return new MultipleColumnPrefixFilter(prefixes);
148   }
149 
150   /**
151    * @param other
152    * @return true if and only if the fields of the filter that are serialized
153    * are equal to the corresponding fields in other.  Used for testing.
154    */
155   boolean areSerializedFieldsEqual(Filter o) {
156     if (o == this) return true;
157     if (!(o instanceof MultipleColumnPrefixFilter)) return false;
158 
159     MultipleColumnPrefixFilter other = (MultipleColumnPrefixFilter)o;
160     return this.sortedPrefixes.equals(other.sortedPrefixes);
161   }
162 
163   @Override
164   public Cell getNextCellHint(Cell kv) {
165     return KeyValueUtil.createFirstOnRow(
166       kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(), kv.getFamilyArray(),
167       kv.getFamilyOffset(), kv.getFamilyLength(), hint, 0, hint.length);
168   }
169 
170   public TreeSet<byte []> createTreeSet() {
171     return new TreeSet<byte []>(new Comparator<Object>() {
172         @Override
173           public int compare (Object o1, Object o2) {
174           if (o1 == null || o2 == null)
175             throw new IllegalArgumentException ("prefixes can't be null");
176 
177           byte [] b1 = (byte []) o1;
178           byte [] b2 = (byte []) o2;
179           return Bytes.compareTo (b1, 0, b1.length, b2, 0, b2.length);
180         }
181       });
182   }
183 
184   @Override
185   public String toString() {
186     return toString(MAX_LOG_PREFIXES);
187   }
188 
189   protected String toString(int maxPrefixes) {
190     StringBuilder prefixes = new StringBuilder();
191 
192     int count = 0;
193     for (byte[] ba : this.sortedPrefixes) {
194       if (count >= maxPrefixes) {
195         break;
196       }
197       ++count;
198       prefixes.append(Bytes.toStringBinary(ba));
199       if (count < this.sortedPrefixes.size() && count < maxPrefixes) {
200         prefixes.append(", ");
201       }
202     }
203 
204     return String.format("%s (%d/%d): [%s]", this.getClass().getSimpleName(),
205         count, this.sortedPrefixes.size(), prefixes.toString());
206   }
207 }