001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.filter;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.List;
023import java.util.Objects;
024import java.util.TreeSet;
025
026import org.apache.hadoop.hbase.Cell;
027import org.apache.hadoop.hbase.PrivateCellUtil;
028import org.apache.yetus.audience.InterfaceAudience;
029import org.apache.hadoop.hbase.exceptions.DeserializationException;
030import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos;
031
032import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
033import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException;
034
035/**
036 * Filter that returns only cells whose timestamp (version) is
037 * in the specified list of timestamps (versions).
038 * <p>
039 * Note: Use of this filter overrides any time range/time stamp
040 * options specified using {@link org.apache.hadoop.hbase.client.Get#setTimeRange(long, long)},
041 * {@link org.apache.hadoop.hbase.client.Scan#setTimeRange(long, long)},
042 * {@link org.apache.hadoop.hbase.client.Get#setTimestamp(long)},
043 * or {@link org.apache.hadoop.hbase.client.Scan#setTimestamp(long)}.
044 */
045@InterfaceAudience.Public
046public class TimestampsFilter extends FilterBase {
047
048  private final boolean canHint;
049  TreeSet<Long> timestamps;
050  private static final int MAX_LOG_TIMESTAMPS = 5;
051
052  // Used during scans to hint the scan to stop early
053  // once the timestamps fall below the minTimestamp.
054  long minTimestamp = Long.MAX_VALUE;
055
056  /**
057   * Constructor for filter that retains only the specified timestamps in the list.
058   * @param timestamps
059   */
060  public TimestampsFilter(List<Long> timestamps) {
061    this(timestamps, false);
062  }
063
064  /**
065   * Constructor for filter that retains only those
066   * cells whose timestamp (version) is in the specified
067   * list of timestamps.
068   *
069   * @param timestamps list of timestamps that are wanted.
070   * @param canHint should the filter provide a seek hint? This can skip
071   *                past delete tombstones, so it should only be used when that
072   *                is not an issue ( no deletes, or don't care if data
073   *                becomes visible)
074   */
075  public TimestampsFilter(List<Long> timestamps, boolean canHint) {
076    for (Long timestamp : timestamps) {
077      Preconditions.checkArgument(timestamp >= 0, "must be positive %s", timestamp);
078    }
079    this.canHint = canHint;
080    this.timestamps = new TreeSet<>(timestamps);
081    init();
082  }
083
084  /**
085   * @return the list of timestamps
086   */
087  public List<Long> getTimestamps() {
088    List<Long> list = new ArrayList<>(timestamps.size());
089    list.addAll(timestamps);
090    return list;
091  }
092
093  private void init() {
094    if (this.timestamps.size() > 0) {
095      minTimestamp = this.timestamps.first();
096    }
097  }
098
099  /**
100   * Gets the minimum timestamp requested by filter.
101   * @return  minimum timestamp requested by filter.
102   */
103  public long getMin() {
104    return minTimestamp;
105  }
106
107  @Override
108  public boolean filterRowKey(Cell cell) throws IOException {
109    // Impl in FilterBase might do unnecessary copy for Off heap backed Cells.
110    return false;
111  }
112
113  @Override
114  public ReturnCode filterCell(final Cell c) {
115    if (this.timestamps.contains(c.getTimestamp())) {
116      return ReturnCode.INCLUDE;
117    } else if (c.getTimestamp() < minTimestamp) {
118      // The remaining versions of this column are guaranteed
119      // to be lesser than all of the other values.
120      return ReturnCode.NEXT_COL;
121    }
122    return canHint ? ReturnCode.SEEK_NEXT_USING_HINT : ReturnCode.SKIP;
123  }
124
125
126  /**
127   * Pick the next cell that the scanner should seek to. Since this can skip any number of cells
128   * any of which can be a delete this can resurect old data.
129   *
130   * The method will only be used if canHint was set to true while creating the filter.
131   *
132   * @throws IOException This will never happen.
133   */
134  @Override
135  public Cell getNextCellHint(Cell currentCell) throws IOException {
136    if (!canHint) {
137      return null;
138    }
139
140    Long nextTimestampObject = timestamps.lower(currentCell.getTimestamp());
141
142    if (nextTimestampObject == null) {
143      // This should only happen if the current column's
144      // timestamp is below the last one in the list.
145      //
146      // It should never happen as the filterCell should return NEXT_COL
147      // but it's always better to be extra safe and protect against future
148      // behavioral changes.
149
150      return PrivateCellUtil.createLastOnRowCol(currentCell);
151    }
152
153    // Since we know the nextTimestampObject isn't null here there must still be
154    // timestamps that can be included. Cast the Long to a long and return the
155    // a cell with the current row/cf/col and the next found timestamp.
156    long nextTimestamp = nextTimestampObject;
157    return PrivateCellUtil.createFirstOnRowColTS(currentCell, nextTimestamp);
158  }
159
160  public static Filter createFilterFromArguments(ArrayList<byte []> filterArguments) {
161    ArrayList<Long> timestamps = new ArrayList<>(filterArguments.size());
162    for (int i = 0; i<filterArguments.size(); i++) {
163      long timestamp = ParseFilter.convertByteArrayToLong(filterArguments.get(i));
164      timestamps.add(timestamp);
165    }
166    return new TimestampsFilter(timestamps);
167  }
168
169  /**
170   * @return The filter serialized using pb
171   */
172  @Override
173  public byte[] toByteArray() {
174    FilterProtos.TimestampsFilter.Builder builder =
175        FilterProtos.TimestampsFilter.newBuilder();
176    builder.addAllTimestamps(this.timestamps);
177    builder.setCanHint(canHint);
178    return builder.build().toByteArray();
179  }
180
181  /**
182   * @param pbBytes A pb serialized {@link TimestampsFilter} instance
183   *
184   * @return An instance of {@link TimestampsFilter} made from <code>bytes</code>
185   * @see #toByteArray
186   */
187  public static TimestampsFilter parseFrom(final byte[] pbBytes)
188      throws DeserializationException {
189    FilterProtos.TimestampsFilter proto;
190    try {
191      proto = FilterProtos.TimestampsFilter.parseFrom(pbBytes);
192    } catch (InvalidProtocolBufferException e) {
193      throw new DeserializationException(e);
194    }
195    return new TimestampsFilter(proto.getTimestampsList(),
196        proto.hasCanHint() && proto.getCanHint());
197  }
198
199  /**
200   * @param o the other filter to compare with
201   * @return true if and only if the fields of the filter that are serialized
202   * are equal to the corresponding fields in other.  Used for testing.
203   */
204  @Override
205  boolean areSerializedFieldsEqual(Filter o) {
206    if (o == this) return true;
207    if (!(o instanceof TimestampsFilter)) return false;
208
209    TimestampsFilter other = (TimestampsFilter)o;
210    return this.getTimestamps().equals(other.getTimestamps());
211  }
212
213  @Override
214  public String toString() {
215    return toString(MAX_LOG_TIMESTAMPS);
216  }
217
218  protected String toString(int maxTimestamps) {
219    StringBuilder tsList = new StringBuilder();
220
221    int count = 0;
222    for (Long ts : this.timestamps) {
223      if (count >= maxTimestamps) {
224        break;
225      }
226      ++count;
227      tsList.append(ts.toString());
228      if (count < this.timestamps.size() && count < maxTimestamps) {
229        tsList.append(", ");
230      }
231    }
232
233    return String.format("%s (%d/%d): [%s] canHint: [%b]", this.getClass().getSimpleName(),
234        count, this.timestamps.size(), tsList.toString(), canHint);
235  }
236
237  @Override
238  public boolean equals(Object obj) {
239    return obj instanceof Filter && areSerializedFieldsEqual((Filter) obj);
240  }
241
242  @Override
243  public int hashCode() {
244    return Objects.hash(getTimestamps());
245  }
246}