001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.filter; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.List; 023import java.util.Objects; 024import java.util.TreeSet; 025import org.apache.hadoop.hbase.Cell; 026import org.apache.hadoop.hbase.PrivateCellUtil; 027import org.apache.hadoop.hbase.exceptions.DeserializationException; 028import org.apache.yetus.audience.InterfaceAudience; 029 030import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; 031import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException; 032 033import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos; 034 035/** 036 * Filter that returns only cells whose timestamp (version) is in the specified list of timestamps 037 * (versions). 038 * <p> 039 * Note: Use of this filter overrides any time range/time stamp options specified using 040 * {@link org.apache.hadoop.hbase.client.Get#setTimeRange(long, long)}, 041 * {@link org.apache.hadoop.hbase.client.Scan#setTimeRange(long, long)}, 042 * {@link org.apache.hadoop.hbase.client.Get#setTimestamp(long)}, or 043 * {@link org.apache.hadoop.hbase.client.Scan#setTimestamp(long)}. 044 */ 045@InterfaceAudience.Public 046public class TimestampsFilter extends FilterBase implements HintingFilter { 047 048 private final boolean canHint; 049 TreeSet<Long> timestamps; 050 private static final int MAX_LOG_TIMESTAMPS = 5; 051 052 // Used during scans to hint the scan to stop early 053 // once the timestamps fall below the minTimestamp. 054 long minTimestamp = Long.MAX_VALUE; 055 056 /** 057 * Constructor for filter that retains only the specified timestamps in the list. 058 */ 059 public TimestampsFilter(List<Long> timestamps) { 060 this(timestamps, false); 061 } 062 063 /** 064 * Constructor for filter that retains only those cells whose timestamp (version) is in the 065 * specified list of timestamps. 066 * @param timestamps list of timestamps that are wanted. 067 * @param canHint should the filter provide a seek hint? This can skip past delete tombstones, 068 * so it should only be used when that is not an issue ( no deletes, or don't 069 * care if data becomes visible) 070 */ 071 public TimestampsFilter(List<Long> timestamps, boolean canHint) { 072 for (Long timestamp : timestamps) { 073 Preconditions.checkArgument(timestamp >= 0, "must be positive %s", timestamp); 074 } 075 this.canHint = canHint; 076 this.timestamps = new TreeSet<>(timestamps); 077 init(); 078 } 079 080 /** Returns the list of timestamps */ 081 public List<Long> getTimestamps() { 082 List<Long> list = new ArrayList<>(timestamps.size()); 083 list.addAll(timestamps); 084 return list; 085 } 086 087 private void init() { 088 if (this.timestamps.size() > 0) { 089 minTimestamp = this.timestamps.first(); 090 } 091 } 092 093 /** 094 * Gets the minimum timestamp requested by filter. 095 * @return minimum timestamp requested by filter. 096 */ 097 public long getMin() { 098 return minTimestamp; 099 } 100 101 @Override 102 public boolean filterRowKey(Cell cell) throws IOException { 103 // Impl in FilterBase might do unnecessary copy for Off heap backed Cells. 104 return false; 105 } 106 107 @Override 108 public ReturnCode filterCell(final Cell c) { 109 if (this.timestamps.contains(c.getTimestamp())) { 110 return ReturnCode.INCLUDE; 111 } else if (c.getTimestamp() < minTimestamp) { 112 // The remaining versions of this column are guaranteed 113 // to be lesser than all of the other values. 114 return ReturnCode.NEXT_COL; 115 } 116 return canHint ? ReturnCode.SEEK_NEXT_USING_HINT : ReturnCode.SKIP; 117 } 118 119 /** 120 * Pick the next cell that the scanner should seek to. Since this can skip any number of cells any 121 * of which can be a delete this can resurect old data. The method will only be used if canHint 122 * was set to true while creating the filter. 123 * @throws IOException This will never happen. 124 */ 125 @Override 126 public Cell getNextCellHint(Cell currentCell) throws IOException { 127 if (!canHint) { 128 return null; 129 } 130 131 Long nextTimestampObject = timestamps.lower(currentCell.getTimestamp()); 132 133 if (nextTimestampObject == null) { 134 // This should only happen if the current column's 135 // timestamp is below the last one in the list. 136 // 137 // It should never happen as the filterCell should return NEXT_COL 138 // but it's always better to be extra safe and protect against future 139 // behavioral changes. 140 141 return PrivateCellUtil.createLastOnRowCol(currentCell); 142 } 143 144 // Since we know the nextTimestampObject isn't null here there must still be 145 // timestamps that can be included. Cast the Long to a long and return the 146 // a cell with the current row/cf/col and the next found timestamp. 147 long nextTimestamp = nextTimestampObject; 148 return PrivateCellUtil.createFirstOnRowColTS(currentCell, nextTimestamp); 149 } 150 151 public static Filter createFilterFromArguments(ArrayList<byte[]> filterArguments) { 152 ArrayList<Long> timestamps = new ArrayList<>(filterArguments.size()); 153 for (int i = 0; i < filterArguments.size(); i++) { 154 long timestamp = ParseFilter.convertByteArrayToLong(filterArguments.get(i)); 155 timestamps.add(timestamp); 156 } 157 return new TimestampsFilter(timestamps); 158 } 159 160 /** Returns The filter serialized using pb */ 161 @Override 162 public byte[] toByteArray() { 163 FilterProtos.TimestampsFilter.Builder builder = FilterProtos.TimestampsFilter.newBuilder(); 164 builder.addAllTimestamps(this.timestamps); 165 builder.setCanHint(canHint); 166 return builder.build().toByteArray(); 167 } 168 169 /** 170 * Parse a serialized representation of {@link TimestampsFilter} 171 * @param pbBytes A pb serialized {@link TimestampsFilter} instance 172 * @return An instance of {@link TimestampsFilter} made from <code>bytes</code> 173 * @throws DeserializationException if an error occurred 174 * @see #toByteArray 175 */ 176 public static TimestampsFilter parseFrom(final byte[] pbBytes) throws DeserializationException { 177 FilterProtos.TimestampsFilter proto; 178 try { 179 proto = FilterProtos.TimestampsFilter.parseFrom(pbBytes); 180 } catch (InvalidProtocolBufferException e) { 181 throw new DeserializationException(e); 182 } 183 return new TimestampsFilter(proto.getTimestampsList(), 184 proto.hasCanHint() && proto.getCanHint()); 185 } 186 187 /** 188 * Returns true if and only if the fields of the filter that are serialized are equal to the 189 * corresponding fields in other. Used for testing. 190 */ 191 @Override 192 boolean areSerializedFieldsEqual(Filter o) { 193 if (o == this) { 194 return true; 195 } 196 if (!(o instanceof TimestampsFilter)) { 197 return false; 198 } 199 TimestampsFilter other = (TimestampsFilter) o; 200 return this.getTimestamps().equals(other.getTimestamps()); 201 } 202 203 @Override 204 public String toString() { 205 return toString(MAX_LOG_TIMESTAMPS); 206 } 207 208 protected String toString(int maxTimestamps) { 209 StringBuilder tsList = new StringBuilder(); 210 211 int count = 0; 212 for (Long ts : this.timestamps) { 213 if (count >= maxTimestamps) { 214 break; 215 } 216 ++count; 217 tsList.append(ts.toString()); 218 if (count < this.timestamps.size() && count < maxTimestamps) { 219 tsList.append(", "); 220 } 221 } 222 223 return String.format("%s (%d/%d): [%s] canHint: [%b]", this.getClass().getSimpleName(), count, 224 this.timestamps.size(), tsList.toString(), canHint); 225 } 226 227 @Override 228 public boolean equals(Object obj) { 229 return obj instanceof Filter && areSerializedFieldsEqual((Filter) obj); 230 } 231 232 @Override 233 public int hashCode() { 234 return Objects.hash(getTimestamps()); 235 } 236}