001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.filter; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Comparator; 023import java.util.Objects; 024import java.util.TreeSet; 025import org.apache.hadoop.hbase.Cell; 026import org.apache.hadoop.hbase.CellUtil; 027import org.apache.hadoop.hbase.PrivateCellUtil; 028import org.apache.hadoop.hbase.exceptions.DeserializationException; 029import org.apache.hadoop.hbase.util.Bytes; 030import org.apache.yetus.audience.InterfaceAudience; 031import org.slf4j.Logger; 032import org.slf4j.LoggerFactory; 033 034import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException; 035import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations; 036 037import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos; 038 039/** 040 * This filter is used for selecting only those keys with columns that match any of the given 041 * prefixes. For example, if prefixes are 'an' and 'ba', it will pass keys with columns like 'and', 042 * 'anti', 'ball' but not keys with columns like 'cat', 'act'. The prefixes are stored in a sorted 043 * set and the filter uses seek hints to efficiently skip columns that do not match any prefix. 044 */ 045@InterfaceAudience.Public 046public class MultipleColumnPrefixFilter extends FilterBase implements HintingFilter { 047 private static final Logger LOG = LoggerFactory.getLogger(MultipleColumnPrefixFilter.class); 048 protected byte[] hint = null; 049 protected TreeSet<byte[]> sortedPrefixes = createTreeSet(); 050 private final static int MAX_LOG_PREFIXES = 5; 051 052 public MultipleColumnPrefixFilter(final byte[][] prefixes) { 053 if (prefixes != null) { 054 for (byte[] prefix : prefixes) { 055 if (!sortedPrefixes.add(prefix)) { 056 LOG.error("prefix {} is repeated", Bytes.toString(prefix)); 057 throw new IllegalArgumentException("prefixes must be distinct"); 058 } 059 } 060 } 061 } 062 063 public byte[][] getPrefix() { 064 int count = 0; 065 byte[][] temp = new byte[sortedPrefixes.size()][]; 066 for (byte[] prefixes : sortedPrefixes) { 067 temp[count++] = prefixes; 068 } 069 return temp; 070 } 071 072 @Override 073 public boolean filterRowKey(Cell cell) throws IOException { 074 // Impl in FilterBase might do unnecessary copy for Off heap backed Cells. 075 return false; 076 } 077 078 @Override 079 public ReturnCode filterCell(final Cell c) { 080 if (sortedPrefixes.isEmpty()) { 081 return ReturnCode.INCLUDE; 082 } else { 083 return filterColumn(c); 084 } 085 } 086 087 public ReturnCode filterColumn(Cell cell) { 088 byte[] qualifier = CellUtil.cloneQualifier(cell); 089 TreeSet<byte[]> lesserOrEqualPrefixes = 090 (TreeSet<byte[]>) sortedPrefixes.headSet(qualifier, true); 091 092 if (lesserOrEqualPrefixes.size() != 0) { 093 byte[] largestPrefixSmallerThanQualifier = lesserOrEqualPrefixes.last(); 094 095 if (Bytes.startsWith(qualifier, largestPrefixSmallerThanQualifier)) { 096 return ReturnCode.INCLUDE; 097 } 098 099 if (lesserOrEqualPrefixes.size() == sortedPrefixes.size()) { 100 return ReturnCode.NEXT_ROW; 101 } else { 102 hint = sortedPrefixes.higher(largestPrefixSmallerThanQualifier); 103 return ReturnCode.SEEK_NEXT_USING_HINT; 104 } 105 } else { 106 hint = sortedPrefixes.first(); 107 return ReturnCode.SEEK_NEXT_USING_HINT; 108 } 109 } 110 111 public static Filter createFilterFromArguments(ArrayList<byte[]> filterArguments) { 112 byte[][] prefixes = new byte[filterArguments.size()][]; 113 for (int i = 0; i < filterArguments.size(); i++) { 114 byte[] columnPrefix = ParseFilter.removeQuotesFromByteArray(filterArguments.get(i)); 115 prefixes[i] = columnPrefix; 116 } 117 return new MultipleColumnPrefixFilter(prefixes); 118 } 119 120 /** Returns The filter serialized using pb */ 121 @Override 122 public byte[] toByteArray() { 123 FilterProtos.MultipleColumnPrefixFilter.Builder builder = 124 FilterProtos.MultipleColumnPrefixFilter.newBuilder(); 125 for (byte[] element : sortedPrefixes) { 126 if (element != null) builder.addSortedPrefixes(UnsafeByteOperations.unsafeWrap(element)); 127 } 128 return builder.build().toByteArray(); 129 } 130 131 /** 132 * Parse a serialized representation of {@link MultipleColumnPrefixFilter} 133 * @param pbBytes A pb serialized {@link MultipleColumnPrefixFilter} instance 134 * @return An instance of {@link MultipleColumnPrefixFilter} made from <code>bytes</code> 135 * @throws DeserializationException if an error occurred 136 * @see #toByteArray 137 */ 138 public static MultipleColumnPrefixFilter parseFrom(final byte[] pbBytes) 139 throws DeserializationException { 140 FilterProtos.MultipleColumnPrefixFilter proto; 141 try { 142 proto = FilterProtos.MultipleColumnPrefixFilter.parseFrom(pbBytes); 143 } catch (InvalidProtocolBufferException e) { 144 throw new DeserializationException(e); 145 } 146 int numPrefixes = proto.getSortedPrefixesCount(); 147 byte[][] prefixes = new byte[numPrefixes][]; 148 for (int i = 0; i < numPrefixes; ++i) { 149 prefixes[i] = proto.getSortedPrefixes(i).toByteArray(); 150 } 151 152 return new MultipleColumnPrefixFilter(prefixes); 153 } 154 155 /** 156 * Returns true if and only if the fields of the filter that are serialized are equal to the 157 * corresponding fields in other. Used for testing. 158 */ 159 @Override 160 boolean areSerializedFieldsEqual(Filter o) { 161 if (o == this) { 162 return true; 163 } 164 if (!(o instanceof MultipleColumnPrefixFilter)) { 165 return false; 166 } 167 MultipleColumnPrefixFilter other = (MultipleColumnPrefixFilter) o; 168 return this.sortedPrefixes.equals(other.sortedPrefixes); 169 } 170 171 @Override 172 public Cell getNextCellHint(Cell cell) { 173 return PrivateCellUtil.createFirstOnRowCol(cell, hint, 0, hint.length); 174 } 175 176 public TreeSet<byte[]> createTreeSet() { 177 return new TreeSet<>(new Comparator<Object>() { 178 @Override 179 public int compare(Object o1, Object o2) { 180 if (o1 == null || o2 == null) throw new IllegalArgumentException("prefixes can't be null"); 181 182 byte[] b1 = (byte[]) o1; 183 byte[] b2 = (byte[]) o2; 184 return Bytes.compareTo(b1, 0, b1.length, b2, 0, b2.length); 185 } 186 }); 187 } 188 189 @Override 190 public String toString() { 191 return toString(MAX_LOG_PREFIXES); 192 } 193 194 protected String toString(int maxPrefixes) { 195 StringBuilder prefixes = new StringBuilder(); 196 197 int count = 0; 198 for (byte[] ba : this.sortedPrefixes) { 199 if (count >= maxPrefixes) { 200 break; 201 } 202 ++count; 203 prefixes.append(Bytes.toStringBinary(ba)); 204 if (count < this.sortedPrefixes.size() && count < maxPrefixes) { 205 prefixes.append(", "); 206 } 207 } 208 209 return String.format("%s (%d/%d): [%s]", this.getClass().getSimpleName(), count, 210 this.sortedPrefixes.size(), prefixes.toString()); 211 } 212 213 @Override 214 public boolean equals(Object obj) { 215 return obj instanceof Filter && areSerializedFieldsEqual((Filter) obj); 216 } 217 218 @Override 219 public int hashCode() { 220 return Objects.hash(this.sortedPrefixes); 221 } 222}