001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.filter; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.List; 023import java.util.Objects; 024import java.util.TreeSet; 025import org.apache.hadoop.hbase.Cell; 026import org.apache.hadoop.hbase.PrivateCellUtil; 027import org.apache.hadoop.hbase.exceptions.DeserializationException; 028import org.apache.yetus.audience.InterfaceAudience; 029 030import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; 031import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException; 032 033import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos; 034 035/** 036 * Filter that returns only cells whose timestamp (version) is in the specified list of timestamps 037 * (versions). 038 * <p> 039 * Note: Use of this filter overrides any time range/time stamp options specified using 040 * {@link org.apache.hadoop.hbase.client.Get#setTimeRange(long, long)}, 041 * {@link org.apache.hadoop.hbase.client.Scan#setTimeRange(long, long)}, 042 * {@link org.apache.hadoop.hbase.client.Get#setTimestamp(long)}, or 043 * {@link org.apache.hadoop.hbase.client.Scan#setTimestamp(long)}. 044 */ 045@InterfaceAudience.Public 046public class TimestampsFilter extends FilterBase { 047 048 private final boolean canHint; 049 TreeSet<Long> timestamps; 050 private static final int MAX_LOG_TIMESTAMPS = 5; 051 052 // Used during scans to hint the scan to stop early 053 // once the timestamps fall below the minTimestamp. 054 long minTimestamp = Long.MAX_VALUE; 055 056 /** 057 * Constructor for filter that retains only the specified timestamps in the list. n 058 */ 059 public TimestampsFilter(List<Long> timestamps) { 060 this(timestamps, false); 061 } 062 063 /** 064 * Constructor for filter that retains only those cells whose timestamp (version) is in the 065 * specified list of timestamps. 066 * @param timestamps list of timestamps that are wanted. 067 * @param canHint should the filter provide a seek hint? This can skip past delete tombstones, 068 * so it should only be used when that is not an issue ( no deletes, or don't 069 * care if data becomes visible) 070 */ 071 public TimestampsFilter(List<Long> timestamps, boolean canHint) { 072 for (Long timestamp : timestamps) { 073 Preconditions.checkArgument(timestamp >= 0, "must be positive %s", timestamp); 074 } 075 this.canHint = canHint; 076 this.timestamps = new TreeSet<>(timestamps); 077 init(); 078 } 079 080 /** Returns the list of timestamps */ 081 public List<Long> getTimestamps() { 082 List<Long> list = new ArrayList<>(timestamps.size()); 083 list.addAll(timestamps); 084 return list; 085 } 086 087 private void init() { 088 if (this.timestamps.size() > 0) { 089 minTimestamp = this.timestamps.first(); 090 } 091 } 092 093 /** 094 * Gets the minimum timestamp requested by filter. 095 * @return minimum timestamp requested by filter. 096 */ 097 public long getMin() { 098 return minTimestamp; 099 } 100 101 @Override 102 public boolean filterRowKey(Cell cell) throws IOException { 103 // Impl in FilterBase might do unnecessary copy for Off heap backed Cells. 104 return false; 105 } 106 107 @Deprecated 108 @Override 109 public ReturnCode filterKeyValue(final Cell c) { 110 return filterCell(c); 111 } 112 113 @Override 114 public ReturnCode filterCell(final Cell c) { 115 if (this.timestamps.contains(c.getTimestamp())) { 116 return ReturnCode.INCLUDE; 117 } else if (c.getTimestamp() < minTimestamp) { 118 // The remaining versions of this column are guaranteed 119 // to be lesser than all of the other values. 120 return ReturnCode.NEXT_COL; 121 } 122 return canHint ? ReturnCode.SEEK_NEXT_USING_HINT : ReturnCode.SKIP; 123 } 124 125 /** 126 * Pick the next cell that the scanner should seek to. Since this can skip any number of cells any 127 * of which can be a delete this can resurect old data. The method will only be used if canHint 128 * was set to true while creating the filter. 129 * @throws IOException This will never happen. 130 */ 131 @Override 132 public Cell getNextCellHint(Cell currentCell) throws IOException { 133 if (!canHint) { 134 return null; 135 } 136 137 Long nextTimestampObject = timestamps.lower(currentCell.getTimestamp()); 138 139 if (nextTimestampObject == null) { 140 // This should only happen if the current column's 141 // timestamp is below the last one in the list. 142 // 143 // It should never happen as the filterCell should return NEXT_COL 144 // but it's always better to be extra safe and protect against future 145 // behavioral changes. 146 147 return PrivateCellUtil.createLastOnRowCol(currentCell); 148 } 149 150 // Since we know the nextTimestampObject isn't null here there must still be 151 // timestamps that can be included. Cast the Long to a long and return the 152 // a cell with the current row/cf/col and the next found timestamp. 153 long nextTimestamp = nextTimestampObject; 154 return PrivateCellUtil.createFirstOnRowColTS(currentCell, nextTimestamp); 155 } 156 157 public static Filter createFilterFromArguments(ArrayList<byte[]> filterArguments) { 158 ArrayList<Long> timestamps = new ArrayList<>(filterArguments.size()); 159 for (int i = 0; i < filterArguments.size(); i++) { 160 long timestamp = ParseFilter.convertByteArrayToLong(filterArguments.get(i)); 161 timestamps.add(timestamp); 162 } 163 return new TimestampsFilter(timestamps); 164 } 165 166 /** Returns The filter serialized using pb */ 167 @Override 168 public byte[] toByteArray() { 169 FilterProtos.TimestampsFilter.Builder builder = FilterProtos.TimestampsFilter.newBuilder(); 170 builder.addAllTimestamps(this.timestamps); 171 builder.setCanHint(canHint); 172 return builder.build().toByteArray(); 173 } 174 175 /** 176 * Parse a serialized representation of {@link TimestampsFilter} 177 * @param pbBytes A pb serialized {@link TimestampsFilter} instance 178 * @return An instance of {@link TimestampsFilter} made from <code>bytes</code> 179 * @throws DeserializationException if an error occurred 180 * @see #toByteArray 181 */ 182 public static TimestampsFilter parseFrom(final byte[] pbBytes) throws DeserializationException { 183 FilterProtos.TimestampsFilter proto; 184 try { 185 proto = FilterProtos.TimestampsFilter.parseFrom(pbBytes); 186 } catch (InvalidProtocolBufferException e) { 187 throw new DeserializationException(e); 188 } 189 return new TimestampsFilter(proto.getTimestampsList(), 190 proto.hasCanHint() && proto.getCanHint()); 191 } 192 193 /** 194 * Returns true if and only if the fields of the filter that are serialized are equal to the 195 * corresponding fields in other. Used for testing. 196 */ 197 @Override 198 boolean areSerializedFieldsEqual(Filter o) { 199 if (o == this) { 200 return true; 201 } 202 if (!(o instanceof TimestampsFilter)) { 203 return false; 204 } 205 TimestampsFilter other = (TimestampsFilter) o; 206 return this.getTimestamps().equals(other.getTimestamps()); 207 } 208 209 @Override 210 public String toString() { 211 return toString(MAX_LOG_TIMESTAMPS); 212 } 213 214 protected String toString(int maxTimestamps) { 215 StringBuilder tsList = new StringBuilder(); 216 217 int count = 0; 218 for (Long ts : this.timestamps) { 219 if (count >= maxTimestamps) { 220 break; 221 } 222 ++count; 223 tsList.append(ts.toString()); 224 if (count < this.timestamps.size() && count < maxTimestamps) { 225 tsList.append(", "); 226 } 227 } 228 229 return String.format("%s (%d/%d): [%s] canHint: [%b]", this.getClass().getSimpleName(), count, 230 this.timestamps.size(), tsList.toString(), canHint); 231 } 232 233 @Override 234 public boolean equals(Object obj) { 235 return obj instanceof Filter && areSerializedFieldsEqual((Filter) obj); 236 } 237 238 @Override 239 public int hashCode() { 240 return Objects.hash(getTimestamps()); 241 } 242}