View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.filter;
19  
20  import java.io.IOException;
21  import java.util.ArrayList;
22  import java.util.List;
23  import java.util.TreeSet;
24  
25  import org.apache.hadoop.hbase.Cell;
26  import org.apache.hadoop.hbase.CellUtil;
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.hbase.classification.InterfaceStability;
29  import org.apache.hadoop.hbase.exceptions.DeserializationException;
30  import org.apache.hadoop.hbase.protobuf.generated.FilterProtos;
31  
32  import com.google.common.base.Preconditions;
33  import com.google.protobuf.InvalidProtocolBufferException;
34  
35  /**
36   * Filter that returns only cells whose timestamp (version) is
37   * in the specified list of timestamps (versions).
38   * <p>
39   * Note: Use of this filter overrides any time range/time stamp
40   * options specified using {@link org.apache.hadoop.hbase.client.Get#setTimeRange(long, long)},
41   * {@link org.apache.hadoop.hbase.client.Scan#setTimeRange(long, long)}, {@link org.apache.hadoop.hbase.client.Get#setTimeStamp(long)},
42   * or {@link org.apache.hadoop.hbase.client.Scan#setTimeStamp(long)}.
43   */
44  @InterfaceAudience.Public
45  @InterfaceStability.Stable
46  public class TimestampsFilter extends FilterBase {
47  
48    private final boolean canHint;
49    TreeSet<Long> timestamps;
50    private static final int MAX_LOG_TIMESTAMPS = 5;
51  
52    // Used during scans to hint the scan to stop early
53    // once the timestamps fall below the minTimeStamp.
54    long minTimeStamp = Long.MAX_VALUE;
55  
56    /**
57     * Constructor for filter that retains only the specified timestamps in the list.
58     * @param timestamps
59     */
60    public TimestampsFilter(List<Long> timestamps) {
61      this(timestamps, false);
62    }
63  
64    /**
65     * Constructor for filter that retains only those
66     * cells whose timestamp (version) is in the specified
67     * list of timestamps.
68     *
69     * @param timestamps list of timestamps that are wanted.
70     * @param canHint should the filter provide a seek hint? This can skip
71     *                past delete tombstones, so it should only be used when that
72     *                is not an issue ( no deletes, or don't care if data
73     *                becomes visible)
74     */
75    public TimestampsFilter(List<Long> timestamps, boolean canHint) {
76      for (Long timestamp : timestamps) {
77        Preconditions.checkArgument(timestamp >= 0, "must be positive %s", timestamp);
78      }
79      this.canHint = canHint;
80      this.timestamps = new TreeSet<Long>(timestamps);
81      init();
82    }
83  
84    /**
85     * @return the list of timestamps
86     */
87    public List<Long> getTimestamps() {
88      List<Long> list = new ArrayList<Long>(timestamps.size());
89      list.addAll(timestamps);
90      return list;
91    }
92  
93    private void init() {
94      if (this.timestamps.size() > 0) {
95        minTimeStamp = this.timestamps.first();
96      }
97    }
98  
99    /**
100    * Gets the minimum timestamp requested by filter.
101    * @return  minimum timestamp requested by filter.
102    */
103   public long getMin() {
104     return minTimeStamp;
105   }
106 
107   @Override
108   public boolean filterRowKey(Cell cell) throws IOException {
109     // Impl in FilterBase might do unnecessary copy for Off heap backed Cells.
110     return false;
111   }
112 
113   @Override
114   public ReturnCode filterKeyValue(Cell v) {
115     if (this.timestamps.contains(v.getTimestamp())) {
116       return ReturnCode.INCLUDE;
117     } else if (v.getTimestamp() < minTimeStamp) {
118       // The remaining versions of this column are guaranteed
119       // to be lesser than all of the other values.
120       return ReturnCode.NEXT_COL;
121     }
122     return canHint ? ReturnCode.SEEK_NEXT_USING_HINT : ReturnCode.SKIP;
123   }
124 
125 
126   /**
127    * Pick the next cell that the scanner should seek to. Since this can skip any number of cells
128    * any of which can be a delete this can resurect old data.
129    *
130    * The method will only be used if canHint was set to true while creating the filter.
131    *
132    * @throws IOException This will never happen.
133    */
134   public Cell getNextCellHint(Cell currentCell) throws IOException {
135     if (!canHint) {
136       return null;
137     }
138 
139     Long nextTimestampObject = timestamps.lower(currentCell.getTimestamp());
140 
141     if (nextTimestampObject == null) {
142       // This should only happen if the current column's
143       // timestamp is below the last one in the list.
144       //
145       // It should never happen as the filterKeyValue should return NEXT_COL
146       // but it's always better to be extra safe and protect against future
147       // behavioral changes.
148 
149       return CellUtil.createLastOnRowCol(currentCell);
150     }
151 
152     // Since we know the nextTimestampObject isn't null here there must still be
153     // timestamps that can be included. Cast the Long to a long and return the
154     // a cell with the current row/cf/col and the next found timestamp.
155     long nextTimestamp = nextTimestampObject;
156     return CellUtil.createFirstOnRowColTS(currentCell, nextTimestamp);
157   }
158 
159   public static Filter createFilterFromArguments(ArrayList<byte []> filterArguments) {
160     ArrayList<Long> timestamps = new ArrayList<Long>();
161     for (int i = 0; i<filterArguments.size(); i++) {
162       long timestamp = ParseFilter.convertByteArrayToLong(filterArguments.get(i));
163       timestamps.add(timestamp);
164     }
165     return new TimestampsFilter(timestamps);
166   }
167 
168   /**
169    * @return The filter serialized using pb
170    */
171   public byte[] toByteArray() {
172     FilterProtos.TimestampsFilter.Builder builder =
173         FilterProtos.TimestampsFilter.newBuilder();
174     builder.addAllTimestamps(this.timestamps);
175     builder.setCanHint(canHint);
176     return builder.build().toByteArray();
177   }
178 
179   /**
180    * @param pbBytes A pb serialized {@link TimestampsFilter} instance
181    *
182    * @return An instance of {@link TimestampsFilter} made from <code>bytes</code>
183    * @see #toByteArray
184    */
185   public static TimestampsFilter parseFrom(final byte[] pbBytes)
186       throws DeserializationException {
187     FilterProtos.TimestampsFilter proto;
188     try {
189       proto = FilterProtos.TimestampsFilter.parseFrom(pbBytes);
190     } catch (InvalidProtocolBufferException e) {
191       throw new DeserializationException(e);
192     }
193     return new TimestampsFilter(proto.getTimestampsList(),
194         proto.hasCanHint() && proto.getCanHint());
195   }
196 
197   /**
198    * @param other
199    * @return true if and only if the fields of the filter that are serialized
200    * are equal to the corresponding fields in other.  Used for testing.
201    */
202   boolean areSerializedFieldsEqual(Filter o) {
203     if (o == this) return true;
204     if (!(o instanceof TimestampsFilter)) return false;
205 
206     TimestampsFilter other = (TimestampsFilter)o;
207     return this.getTimestamps().equals(other.getTimestamps());
208   }
209 
210   @Override
211   public String toString() {
212     return toString(MAX_LOG_TIMESTAMPS);
213   }
214 
215   protected String toString(int maxTimestamps) {
216     StringBuilder tsList = new StringBuilder();
217 
218     int count = 0;
219     for (Long ts : this.timestamps) {
220       if (count >= maxTimestamps) {
221         break;
222       }
223       ++count;
224       tsList.append(ts.toString());
225       if (count < this.timestamps.size() && count < maxTimestamps) {
226         tsList.append(", ");
227       }
228     }
229 
230     return String.format("%s (%d/%d): [%s] canHint: [%b]", this.getClass().getSimpleName(),
231         count, this.timestamps.size(), tsList.toString(), canHint);
232   }
233 }