001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.filter; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Comparator; 023import java.util.Objects; 024import java.util.TreeSet; 025 026import org.apache.hadoop.hbase.Cell; 027import org.apache.hadoop.hbase.CellUtil; 028import org.apache.hadoop.hbase.PrivateCellUtil; 029import org.apache.yetus.audience.InterfaceAudience; 030import org.apache.hadoop.hbase.exceptions.DeserializationException; 031import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException; 032import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations; 033import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos; 034import org.apache.hadoop.hbase.util.Bytes; 035import org.slf4j.Logger; 036import org.slf4j.LoggerFactory; 037 038/** 039 * This filter is used for selecting only those keys with columns that matches 040 * a particular prefix. For example, if prefix is 'an', it will pass keys will 041 * columns like 'and', 'anti' but not keys with columns like 'ball', 'act'. 042 */ 043@InterfaceAudience.Public 044public class MultipleColumnPrefixFilter extends FilterBase { 045 private static final Logger LOG = LoggerFactory.getLogger(MultipleColumnPrefixFilter.class); 046 protected byte [] hint = null; 047 protected TreeSet<byte []> sortedPrefixes = createTreeSet(); 048 private final static int MAX_LOG_PREFIXES = 5; 049 050 public MultipleColumnPrefixFilter(final byte [][] prefixes) { 051 if (prefixes != null) { 052 for (byte[] prefix : prefixes) { 053 if (!sortedPrefixes.add(prefix)) { 054 LOG.error("prefix {} is repeated", Bytes.toString(prefix)); 055 throw new IllegalArgumentException("prefixes must be distinct"); 056 } 057 } 058 } 059 } 060 061 public byte [][] getPrefix() { 062 int count = 0; 063 byte [][] temp = new byte [sortedPrefixes.size()][]; 064 for (byte [] prefixes : sortedPrefixes) { 065 temp [count++] = prefixes; 066 } 067 return temp; 068 } 069 070 @Override 071 public boolean filterRowKey(Cell cell) throws IOException { 072 // Impl in FilterBase might do unnecessary copy for Off heap backed Cells. 073 return false; 074 } 075 076 @Deprecated 077 @Override 078 public ReturnCode filterKeyValue(final Cell c) { 079 return filterCell(c); 080 } 081 082 @Override 083 public ReturnCode filterCell(final Cell c) { 084 if (sortedPrefixes.isEmpty()) { 085 return ReturnCode.INCLUDE; 086 } else { 087 return filterColumn(c); 088 } 089 } 090 091 public ReturnCode filterColumn(Cell cell) { 092 byte [] qualifier = CellUtil.cloneQualifier(cell); 093 TreeSet<byte []> lesserOrEqualPrefixes = 094 (TreeSet<byte []>) sortedPrefixes.headSet(qualifier, true); 095 096 if (lesserOrEqualPrefixes.size() != 0) { 097 byte [] largestPrefixSmallerThanQualifier = lesserOrEqualPrefixes.last(); 098 099 if (Bytes.startsWith(qualifier, largestPrefixSmallerThanQualifier)) { 100 return ReturnCode.INCLUDE; 101 } 102 103 if (lesserOrEqualPrefixes.size() == sortedPrefixes.size()) { 104 return ReturnCode.NEXT_ROW; 105 } else { 106 hint = sortedPrefixes.higher(largestPrefixSmallerThanQualifier); 107 return ReturnCode.SEEK_NEXT_USING_HINT; 108 } 109 } else { 110 hint = sortedPrefixes.first(); 111 return ReturnCode.SEEK_NEXT_USING_HINT; 112 } 113 } 114 115 public static Filter createFilterFromArguments(ArrayList<byte []> filterArguments) { 116 byte [][] prefixes = new byte [filterArguments.size()][]; 117 for (int i = 0 ; i < filterArguments.size(); i++) { 118 byte [] columnPrefix = ParseFilter.removeQuotesFromByteArray(filterArguments.get(i)); 119 prefixes[i] = columnPrefix; 120 } 121 return new MultipleColumnPrefixFilter(prefixes); 122 } 123 124 /** 125 * @return The filter serialized using pb 126 */ 127 @Override 128 public byte [] toByteArray() { 129 FilterProtos.MultipleColumnPrefixFilter.Builder builder = 130 FilterProtos.MultipleColumnPrefixFilter.newBuilder(); 131 for (byte [] element : sortedPrefixes) { 132 if (element != null) builder.addSortedPrefixes(UnsafeByteOperations.unsafeWrap(element)); 133 } 134 return builder.build().toByteArray(); 135 } 136 137 /** 138 * @param pbBytes A pb serialized {@link MultipleColumnPrefixFilter} instance 139 * @return An instance of {@link MultipleColumnPrefixFilter} made from <code>bytes</code> 140 * @throws DeserializationException 141 * @see #toByteArray 142 */ 143 public static MultipleColumnPrefixFilter parseFrom(final byte [] pbBytes) 144 throws DeserializationException { 145 FilterProtos.MultipleColumnPrefixFilter proto; 146 try { 147 proto = FilterProtos.MultipleColumnPrefixFilter.parseFrom(pbBytes); 148 } catch (InvalidProtocolBufferException e) { 149 throw new DeserializationException(e); 150 } 151 int numPrefixes = proto.getSortedPrefixesCount(); 152 byte [][] prefixes = new byte[numPrefixes][]; 153 for (int i = 0; i < numPrefixes; ++i) { 154 prefixes[i] = proto.getSortedPrefixes(i).toByteArray(); 155 } 156 157 return new MultipleColumnPrefixFilter(prefixes); 158 } 159 160 /** 161 * @param o the other filter to compare with 162 * @return true if and only if the fields of the filter that are serialized 163 * are equal to the corresponding fields in other. Used for testing. 164 */ 165 @Override 166 boolean areSerializedFieldsEqual(Filter o) { 167 if (o == this) return true; 168 if (!(o instanceof MultipleColumnPrefixFilter)) return false; 169 170 MultipleColumnPrefixFilter other = (MultipleColumnPrefixFilter)o; 171 return this.sortedPrefixes.equals(other.sortedPrefixes); 172 } 173 174 @Override 175 public Cell getNextCellHint(Cell cell) { 176 return PrivateCellUtil.createFirstOnRowCol(cell, hint, 0, hint.length); 177 } 178 179 public TreeSet<byte []> createTreeSet() { 180 return new TreeSet<>(new Comparator<Object>() { 181 @Override 182 public int compare (Object o1, Object o2) { 183 if (o1 == null || o2 == null) 184 throw new IllegalArgumentException ("prefixes can't be null"); 185 186 byte [] b1 = (byte []) o1; 187 byte [] b2 = (byte []) o2; 188 return Bytes.compareTo (b1, 0, b1.length, b2, 0, b2.length); 189 } 190 }); 191 } 192 193 @Override 194 public String toString() { 195 return toString(MAX_LOG_PREFIXES); 196 } 197 198 protected String toString(int maxPrefixes) { 199 StringBuilder prefixes = new StringBuilder(); 200 201 int count = 0; 202 for (byte[] ba : this.sortedPrefixes) { 203 if (count >= maxPrefixes) { 204 break; 205 } 206 ++count; 207 prefixes.append(Bytes.toStringBinary(ba)); 208 if (count < this.sortedPrefixes.size() && count < maxPrefixes) { 209 prefixes.append(", "); 210 } 211 } 212 213 return String.format("%s (%d/%d): [%s]", this.getClass().getSimpleName(), 214 count, this.sortedPrefixes.size(), prefixes.toString()); 215 } 216 217 @Override 218 public boolean equals(Object obj) { 219 return obj instanceof Filter && areSerializedFieldsEqual((Filter) obj); 220 } 221 222 @Override 223 public int hashCode() { 224 return Objects.hash(this.sortedPrefixes); 225 } 226}