001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.filter;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.List;
023import java.util.Objects;
024import java.util.TreeSet;
025import org.apache.hadoop.hbase.Cell;
026import org.apache.hadoop.hbase.PrivateCellUtil;
027import org.apache.hadoop.hbase.exceptions.DeserializationException;
028import org.apache.yetus.audience.InterfaceAudience;
029
030import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
031import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException;
032
033import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos;
034
035/**
036 * Filter that returns only cells whose timestamp (version) is in the specified list of timestamps
037 * (versions).
038 * <p>
039 * Note: Use of this filter overrides any time range/time stamp options specified using
040 * {@link org.apache.hadoop.hbase.client.Get#setTimeRange(long, long)},
041 * {@link org.apache.hadoop.hbase.client.Scan#setTimeRange(long, long)},
042 * {@link org.apache.hadoop.hbase.client.Get#setTimestamp(long)}, or
043 * {@link org.apache.hadoop.hbase.client.Scan#setTimestamp(long)}.
044 */
045@InterfaceAudience.Public
046public class TimestampsFilter extends FilterBase {
047
048  private final boolean canHint;
049  TreeSet<Long> timestamps;
050  private static final int MAX_LOG_TIMESTAMPS = 5;
051
052  // Used during scans to hint the scan to stop early
053  // once the timestamps fall below the minTimestamp.
054  long minTimestamp = Long.MAX_VALUE;
055
056  /**
057   * Constructor for filter that retains only the specified timestamps in the list. n
058   */
059  public TimestampsFilter(List<Long> timestamps) {
060    this(timestamps, false);
061  }
062
063  /**
064   * Constructor for filter that retains only those cells whose timestamp (version) is in the
065   * specified list of timestamps.
066   * @param timestamps list of timestamps that are wanted.
067   * @param canHint    should the filter provide a seek hint? This can skip past delete tombstones,
068   *                   so it should only be used when that is not an issue ( no deletes, or don't
069   *                   care if data becomes visible)
070   */
071  public TimestampsFilter(List<Long> timestamps, boolean canHint) {
072    for (Long timestamp : timestamps) {
073      Preconditions.checkArgument(timestamp >= 0, "must be positive %s", timestamp);
074    }
075    this.canHint = canHint;
076    this.timestamps = new TreeSet<>(timestamps);
077    init();
078  }
079
080  /** Returns the list of timestamps */
081  public List<Long> getTimestamps() {
082    List<Long> list = new ArrayList<>(timestamps.size());
083    list.addAll(timestamps);
084    return list;
085  }
086
087  private void init() {
088    if (this.timestamps.size() > 0) {
089      minTimestamp = this.timestamps.first();
090    }
091  }
092
093  /**
094   * Gets the minimum timestamp requested by filter.
095   * @return minimum timestamp requested by filter.
096   */
097  public long getMin() {
098    return minTimestamp;
099  }
100
101  @Override
102  public boolean filterRowKey(Cell cell) throws IOException {
103    // Impl in FilterBase might do unnecessary copy for Off heap backed Cells.
104    return false;
105  }
106
107  @Deprecated
108  @Override
109  public ReturnCode filterKeyValue(final Cell c) {
110    return filterCell(c);
111  }
112
113  @Override
114  public ReturnCode filterCell(final Cell c) {
115    if (this.timestamps.contains(c.getTimestamp())) {
116      return ReturnCode.INCLUDE;
117    } else if (c.getTimestamp() < minTimestamp) {
118      // The remaining versions of this column are guaranteed
119      // to be lesser than all of the other values.
120      return ReturnCode.NEXT_COL;
121    }
122    return canHint ? ReturnCode.SEEK_NEXT_USING_HINT : ReturnCode.SKIP;
123  }
124
125  /**
126   * Pick the next cell that the scanner should seek to. Since this can skip any number of cells any
127   * of which can be a delete this can resurect old data. The method will only be used if canHint
128   * was set to true while creating the filter.
129   * @throws IOException This will never happen.
130   */
131  @Override
132  public Cell getNextCellHint(Cell currentCell) throws IOException {
133    if (!canHint) {
134      return null;
135    }
136
137    Long nextTimestampObject = timestamps.lower(currentCell.getTimestamp());
138
139    if (nextTimestampObject == null) {
140      // This should only happen if the current column's
141      // timestamp is below the last one in the list.
142      //
143      // It should never happen as the filterCell should return NEXT_COL
144      // but it's always better to be extra safe and protect against future
145      // behavioral changes.
146
147      return PrivateCellUtil.createLastOnRowCol(currentCell);
148    }
149
150    // Since we know the nextTimestampObject isn't null here there must still be
151    // timestamps that can be included. Cast the Long to a long and return the
152    // a cell with the current row/cf/col and the next found timestamp.
153    long nextTimestamp = nextTimestampObject;
154    return PrivateCellUtil.createFirstOnRowColTS(currentCell, nextTimestamp);
155  }
156
157  public static Filter createFilterFromArguments(ArrayList<byte[]> filterArguments) {
158    ArrayList<Long> timestamps = new ArrayList<>(filterArguments.size());
159    for (int i = 0; i < filterArguments.size(); i++) {
160      long timestamp = ParseFilter.convertByteArrayToLong(filterArguments.get(i));
161      timestamps.add(timestamp);
162    }
163    return new TimestampsFilter(timestamps);
164  }
165
166  /** Returns The filter serialized using pb */
167  @Override
168  public byte[] toByteArray() {
169    FilterProtos.TimestampsFilter.Builder builder = FilterProtos.TimestampsFilter.newBuilder();
170    builder.addAllTimestamps(this.timestamps);
171    builder.setCanHint(canHint);
172    return builder.build().toByteArray();
173  }
174
175  /**
176   * Parse a serialized representation of {@link TimestampsFilter}
177   * @param pbBytes A pb serialized {@link TimestampsFilter} instance
178   * @return An instance of {@link TimestampsFilter} made from <code>bytes</code>
179   * @throws DeserializationException if an error occurred
180   * @see #toByteArray
181   */
182  public static TimestampsFilter parseFrom(final byte[] pbBytes) throws DeserializationException {
183    FilterProtos.TimestampsFilter proto;
184    try {
185      proto = FilterProtos.TimestampsFilter.parseFrom(pbBytes);
186    } catch (InvalidProtocolBufferException e) {
187      throw new DeserializationException(e);
188    }
189    return new TimestampsFilter(proto.getTimestampsList(),
190      proto.hasCanHint() && proto.getCanHint());
191  }
192
193  /**
194   * Returns true if and only if the fields of the filter that are serialized are equal to the
195   * corresponding fields in other. Used for testing.
196   */
197  @Override
198  boolean areSerializedFieldsEqual(Filter o) {
199    if (o == this) {
200      return true;
201    }
202    if (!(o instanceof TimestampsFilter)) {
203      return false;
204    }
205    TimestampsFilter other = (TimestampsFilter) o;
206    return this.getTimestamps().equals(other.getTimestamps());
207  }
208
209  @Override
210  public String toString() {
211    return toString(MAX_LOG_TIMESTAMPS);
212  }
213
214  protected String toString(int maxTimestamps) {
215    StringBuilder tsList = new StringBuilder();
216
217    int count = 0;
218    for (Long ts : this.timestamps) {
219      if (count >= maxTimestamps) {
220        break;
221      }
222      ++count;
223      tsList.append(ts.toString());
224      if (count < this.timestamps.size() && count < maxTimestamps) {
225        tsList.append(", ");
226      }
227    }
228
229    return String.format("%s (%d/%d): [%s] canHint: [%b]", this.getClass().getSimpleName(), count,
230      this.timestamps.size(), tsList.toString(), canHint);
231  }
232
233  @Override
234  public boolean equals(Object obj) {
235    return obj instanceof Filter && areSerializedFieldsEqual((Filter) obj);
236  }
237
238  @Override
239  public int hashCode() {
240    return Objects.hash(getTimestamps());
241  }
242}