001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.filter;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.List;
023import java.util.Objects;
024import java.util.TreeSet;
025
026import org.apache.hadoop.hbase.Cell;
027import org.apache.hadoop.hbase.PrivateCellUtil;
028import org.apache.yetus.audience.InterfaceAudience;
029import org.apache.hadoop.hbase.exceptions.DeserializationException;
030import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos;
031
032import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
033import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException;
034
035/**
036 * Filter that returns only cells whose timestamp (version) is
037 * in the specified list of timestamps (versions).
038 * <p>
039 * Note: Use of this filter overrides any time range/time stamp
040 * options specified using {@link org.apache.hadoop.hbase.client.Get#setTimeRange(long, long)},
041 * {@link org.apache.hadoop.hbase.client.Scan#setTimeRange(long, long)},
042 * {@link org.apache.hadoop.hbase.client.Get#setTimestamp(long)},
043 * or {@link org.apache.hadoop.hbase.client.Scan#setTimestamp(long)}.
044 */
045@InterfaceAudience.Public
046public class TimestampsFilter extends FilterBase {
047
048  private final boolean canHint;
049  TreeSet<Long> timestamps;
050  private static final int MAX_LOG_TIMESTAMPS = 5;
051
052  // Used during scans to hint the scan to stop early
053  // once the timestamps fall below the minTimestamp.
054  long minTimestamp = Long.MAX_VALUE;
055
056  /**
057   * Constructor for filter that retains only the specified timestamps in the list.
058   * @param timestamps
059   */
060  public TimestampsFilter(List<Long> timestamps) {
061    this(timestamps, false);
062  }
063
064  /**
065   * Constructor for filter that retains only those
066   * cells whose timestamp (version) is in the specified
067   * list of timestamps.
068   *
069   * @param timestamps list of timestamps that are wanted.
070   * @param canHint should the filter provide a seek hint? This can skip
071   *                past delete tombstones, so it should only be used when that
072   *                is not an issue ( no deletes, or don't care if data
073   *                becomes visible)
074   */
075  public TimestampsFilter(List<Long> timestamps, boolean canHint) {
076    for (Long timestamp : timestamps) {
077      Preconditions.checkArgument(timestamp >= 0, "must be positive %s", timestamp);
078    }
079    this.canHint = canHint;
080    this.timestamps = new TreeSet<>(timestamps);
081    init();
082  }
083
084  /**
085   * @return the list of timestamps
086   */
087  public List<Long> getTimestamps() {
088    List<Long> list = new ArrayList<>(timestamps.size());
089    list.addAll(timestamps);
090    return list;
091  }
092
093  private void init() {
094    if (this.timestamps.size() > 0) {
095      minTimestamp = this.timestamps.first();
096    }
097  }
098
099  /**
100   * Gets the minimum timestamp requested by filter.
101   * @return  minimum timestamp requested by filter.
102   */
103  public long getMin() {
104    return minTimestamp;
105  }
106
107  @Override
108  public boolean filterRowKey(Cell cell) throws IOException {
109    // Impl in FilterBase might do unnecessary copy for Off heap backed Cells.
110    return false;
111  }
112
113  @Deprecated
114  @Override
115  public ReturnCode filterKeyValue(final Cell c) {
116    return filterCell(c);
117  }
118
119  @Override
120  public ReturnCode filterCell(final Cell c) {
121    if (this.timestamps.contains(c.getTimestamp())) {
122      return ReturnCode.INCLUDE;
123    } else if (c.getTimestamp() < minTimestamp) {
124      // The remaining versions of this column are guaranteed
125      // to be lesser than all of the other values.
126      return ReturnCode.NEXT_COL;
127    }
128    return canHint ? ReturnCode.SEEK_NEXT_USING_HINT : ReturnCode.SKIP;
129  }
130
131
132  /**
133   * Pick the next cell that the scanner should seek to. Since this can skip any number of cells
134   * any of which can be a delete this can resurect old data.
135   *
136   * The method will only be used if canHint was set to true while creating the filter.
137   *
138   * @throws IOException This will never happen.
139   */
140  @Override
141  public Cell getNextCellHint(Cell currentCell) throws IOException {
142    if (!canHint) {
143      return null;
144    }
145
146    Long nextTimestampObject = timestamps.lower(currentCell.getTimestamp());
147
148    if (nextTimestampObject == null) {
149      // This should only happen if the current column's
150      // timestamp is below the last one in the list.
151      //
152      // It should never happen as the filterCell should return NEXT_COL
153      // but it's always better to be extra safe and protect against future
154      // behavioral changes.
155
156      return PrivateCellUtil.createLastOnRowCol(currentCell);
157    }
158
159    // Since we know the nextTimestampObject isn't null here there must still be
160    // timestamps that can be included. Cast the Long to a long and return the
161    // a cell with the current row/cf/col and the next found timestamp.
162    long nextTimestamp = nextTimestampObject;
163    return PrivateCellUtil.createFirstOnRowColTS(currentCell, nextTimestamp);
164  }
165
166  public static Filter createFilterFromArguments(ArrayList<byte []> filterArguments) {
167    ArrayList<Long> timestamps = new ArrayList<>(filterArguments.size());
168    for (int i = 0; i<filterArguments.size(); i++) {
169      long timestamp = ParseFilter.convertByteArrayToLong(filterArguments.get(i));
170      timestamps.add(timestamp);
171    }
172    return new TimestampsFilter(timestamps);
173  }
174
175  /**
176   * @return The filter serialized using pb
177   */
178  @Override
179  public byte[] toByteArray() {
180    FilterProtos.TimestampsFilter.Builder builder =
181        FilterProtos.TimestampsFilter.newBuilder();
182    builder.addAllTimestamps(this.timestamps);
183    builder.setCanHint(canHint);
184    return builder.build().toByteArray();
185  }
186
187  /**
188   * @param pbBytes A pb serialized {@link TimestampsFilter} instance
189   *
190   * @return An instance of {@link TimestampsFilter} made from <code>bytes</code>
191   * @see #toByteArray
192   */
193  public static TimestampsFilter parseFrom(final byte[] pbBytes)
194      throws DeserializationException {
195    FilterProtos.TimestampsFilter proto;
196    try {
197      proto = FilterProtos.TimestampsFilter.parseFrom(pbBytes);
198    } catch (InvalidProtocolBufferException e) {
199      throw new DeserializationException(e);
200    }
201    return new TimestampsFilter(proto.getTimestampsList(),
202        proto.hasCanHint() && proto.getCanHint());
203  }
204
205  /**
206   * @param o the other filter to compare with
207   * @return true if and only if the fields of the filter that are serialized
208   * are equal to the corresponding fields in other.  Used for testing.
209   */
210  @Override
211  boolean areSerializedFieldsEqual(Filter o) {
212    if (o == this) return true;
213    if (!(o instanceof TimestampsFilter)) return false;
214
215    TimestampsFilter other = (TimestampsFilter)o;
216    return this.getTimestamps().equals(other.getTimestamps());
217  }
218
219  @Override
220  public String toString() {
221    return toString(MAX_LOG_TIMESTAMPS);
222  }
223
224  protected String toString(int maxTimestamps) {
225    StringBuilder tsList = new StringBuilder();
226
227    int count = 0;
228    for (Long ts : this.timestamps) {
229      if (count >= maxTimestamps) {
230        break;
231      }
232      ++count;
233      tsList.append(ts.toString());
234      if (count < this.timestamps.size() && count < maxTimestamps) {
235        tsList.append(", ");
236      }
237    }
238
239    return String.format("%s (%d/%d): [%s] canHint: [%b]", this.getClass().getSimpleName(),
240        count, this.timestamps.size(), tsList.toString(), canHint);
241  }
242
243  @Override
244  public boolean equals(Object obj) {
245    return obj instanceof Filter && areSerializedFieldsEqual((Filter) obj);
246  }
247
248  @Override
249  public int hashCode() {
250    return Objects.hash(getTimestamps());
251  }
252}