View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.filter;
19  
20  
21  import com.google.protobuf.HBaseZeroCopyByteString;
22  import com.google.protobuf.InvalidProtocolBufferException;
23  
24  import org.apache.hadoop.classification.InterfaceAudience;
25  import org.apache.hadoop.classification.InterfaceStability;
26  import org.apache.hadoop.hbase.Cell;
27  import org.apache.hadoop.hbase.KeyValue;
28  import org.apache.hadoop.hbase.exceptions.DeserializationException;
29  import org.apache.hadoop.hbase.protobuf.generated.FilterProtos;
30  import org.apache.hadoop.hbase.util.Bytes;
31  
32  import java.util.ArrayList;
33  import java.util.Arrays;
34  import java.util.Comparator;
35  import java.util.TreeSet;
36  
37  /**
38   * This filter is used for selecting only those keys with columns that matches
39   * a particular prefix. For example, if prefix is 'an', it will pass keys will
40   * columns like 'and', 'anti' but not keys with columns like 'ball', 'act'.
41   */
42  @InterfaceAudience.Public
43  @InterfaceStability.Stable
44  public class MultipleColumnPrefixFilter extends FilterBase {
45    protected byte [] hint = null;
46    protected TreeSet<byte []> sortedPrefixes = createTreeSet();
47    private final static int MAX_LOG_PREFIXES = 5;
48  
49    public MultipleColumnPrefixFilter(final byte [][] prefixes) {
50      if (prefixes != null) {
51        for (int i = 0; i < prefixes.length; i++) {
52          if (!sortedPrefixes.add(prefixes[i]))
53            throw new IllegalArgumentException ("prefixes must be distinct");
54        }
55      }
56    }
57  
58    public byte [][] getPrefix() {
59      int count = 0;
60      byte [][] temp = new byte [sortedPrefixes.size()][];
61      for (byte [] prefixes : sortedPrefixes) {
62        temp [count++] = prefixes;
63      }
64      return temp;
65    }
66  
67    @Override
68    public ReturnCode filterKeyValue(Cell kv) {
69      if (sortedPrefixes.size() == 0 || kv.getQualifierArray() == null) {
70        return ReturnCode.INCLUDE;
71      } else {
72        return filterColumn(kv.getQualifierArray(), kv.getQualifierOffset(), kv.getQualifierLength());
73      }
74    }
75  
76    public ReturnCode filterColumn(byte[] buffer, int qualifierOffset, int qualifierLength) {
77      byte [] qualifier = Arrays.copyOfRange(buffer, qualifierOffset,
78                                             qualifierLength + qualifierOffset);
79      TreeSet<byte []> lesserOrEqualPrefixes =
80        (TreeSet<byte []>) sortedPrefixes.headSet(qualifier, true);
81  
82      if (lesserOrEqualPrefixes.size() != 0) {
83        byte [] largestPrefixSmallerThanQualifier = lesserOrEqualPrefixes.last();
84        
85        if (Bytes.startsWith(qualifier, largestPrefixSmallerThanQualifier)) {
86          return ReturnCode.INCLUDE;
87        }
88        
89        if (lesserOrEqualPrefixes.size() == sortedPrefixes.size()) {
90          return ReturnCode.NEXT_ROW;
91        } else {
92          hint = sortedPrefixes.higher(largestPrefixSmallerThanQualifier);
93          return ReturnCode.SEEK_NEXT_USING_HINT;
94        }
95      } else {
96        hint = sortedPrefixes.first();
97        return ReturnCode.SEEK_NEXT_USING_HINT;
98      }
99    }
100 
101   public static Filter createFilterFromArguments(ArrayList<byte []> filterArguments) {
102     byte [][] prefixes = new byte [filterArguments.size()][];
103     for (int i = 0 ; i < filterArguments.size(); i++) {
104       byte [] columnPrefix = ParseFilter.removeQuotesFromByteArray(filterArguments.get(i));
105       prefixes[i] = columnPrefix;
106     }
107     return new MultipleColumnPrefixFilter(prefixes);
108   }
109 
110   /**
111    * @return The filter serialized using pb
112    */
113   public byte [] toByteArray() {
114     FilterProtos.MultipleColumnPrefixFilter.Builder builder =
115       FilterProtos.MultipleColumnPrefixFilter.newBuilder();
116     for (byte [] element : sortedPrefixes) {
117       if (element != null) builder.addSortedPrefixes(HBaseZeroCopyByteString.wrap(element));
118     }
119     return builder.build().toByteArray();
120   }
121 
122   /**
123    * @param pbBytes A pb serialized {@link MultipleColumnPrefixFilter} instance
124    * @return An instance of {@link MultipleColumnPrefixFilter} made from <code>bytes</code>
125    * @throws DeserializationException
126    * @see #toByteArray
127    */
128   public static MultipleColumnPrefixFilter parseFrom(final byte [] pbBytes)
129   throws DeserializationException {
130     FilterProtos.MultipleColumnPrefixFilter proto;
131     try {
132       proto = FilterProtos.MultipleColumnPrefixFilter.parseFrom(pbBytes);
133     } catch (InvalidProtocolBufferException e) {
134       throw new DeserializationException(e);
135     }
136     int numPrefixes = proto.getSortedPrefixesCount();
137     byte [][] prefixes = new byte[numPrefixes][];
138     for (int i = 0; i < numPrefixes; ++i) {
139       prefixes[i] = proto.getSortedPrefixes(i).toByteArray();
140     }
141 
142     return new MultipleColumnPrefixFilter(prefixes);
143   }
144 
145   /**
146    * @param other
147    * @return true if and only if the fields of the filter that are serialized
148    * are equal to the corresponding fields in other.  Used for testing.
149    */
150   boolean areSerializedFieldsEqual(Filter o) {
151     if (o == this) return true;
152     if (!(o instanceof MultipleColumnPrefixFilter)) return false;
153 
154     MultipleColumnPrefixFilter other = (MultipleColumnPrefixFilter)o;
155     return this.sortedPrefixes.equals(other.sortedPrefixes);
156   }
157 
158   @Override
159   public Cell getNextCellHint(Cell kv) {
160     return KeyValue.createFirstOnRow(
161       kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(), kv.getFamilyArray(),
162       kv.getFamilyOffset(), kv.getFamilyLength(), hint, 0, hint.length);
163   }
164 
165   public TreeSet<byte []> createTreeSet() {
166     return new TreeSet<byte []>(new Comparator<Object>() {
167         @Override
168           public int compare (Object o1, Object o2) {
169           if (o1 == null || o2 == null)
170             throw new IllegalArgumentException ("prefixes can't be null");
171 
172           byte [] b1 = (byte []) o1;
173           byte [] b2 = (byte []) o2;
174           return Bytes.compareTo (b1, 0, b1.length, b2, 0, b2.length);
175         }
176       });
177   }
178 
179   @Override
180   public String toString() {
181     return toString(MAX_LOG_PREFIXES);
182   }
183 
184   protected String toString(int maxPrefixes) {
185     StringBuilder prefixes = new StringBuilder();
186 
187     int count = 0;
188     for (byte[] ba : this.sortedPrefixes) {
189       if (count >= maxPrefixes) {
190         break;
191       }
192       ++count;
193       prefixes.append(Bytes.toStringBinary(ba));
194       if (count < this.sortedPrefixes.size() && count < maxPrefixes) {
195         prefixes.append(", ");
196       }
197     }
198 
199     return String.format("%s (%d/%d): [%s]", this.getClass().getSimpleName(),
200         count, this.sortedPrefixes.size(), prefixes.toString());
201   }
202 }