View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.IOException;
23  
24  import org.apache.hadoop.hbase.classification.InterfaceAudience;
25  import org.apache.hadoop.hbase.Cell;
26  import org.apache.hadoop.hbase.CellComparator;
27  import org.apache.hadoop.hbase.CellUtil;
28  import org.apache.hadoop.hbase.HConstants;
29  import org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode;
30  import org.apache.hadoop.hbase.util.Bytes;
31  
32  /**
33   * Keeps track of the columns for a scan if they are not explicitly specified
34   */
35  @InterfaceAudience.Private
36  public class ScanWildcardColumnTracker implements ColumnTracker {
37    private Cell columnCell = null;
38    private int currentCount = 0;
39    private int maxVersions;
40    private int minVersions;
41    /* Keeps track of the latest timestamp and type included for current column.
42     * Used to eliminate duplicates. */
43    private long latestTSOfCurrentColumn;
44    private byte latestTypeOfCurrentColumn;
45  
46    private long oldestStamp;
47  
48    /**
49     * Return maxVersions of every row.
50     * @param minVersion Minimum number of versions to keep
51     * @param maxVersion Maximum number of versions to return
52     * @param oldestUnexpiredTS oldest timestamp that has not expired according
53     *          to the TTL.
54     */
55    public ScanWildcardColumnTracker(int minVersion, int maxVersion,
56        long oldestUnexpiredTS) {
57      this.maxVersions = maxVersion;
58      this.minVersions = minVersion;
59      this.oldestStamp = oldestUnexpiredTS;
60    }
61  
62    /**
63     * {@inheritDoc}
64     * This receives puts *and* deletes.
65     */
66    @Override
67    public MatchCode checkColumn(Cell cell, byte type) throws IOException {
68      return MatchCode.INCLUDE;
69    }
70  
71    /**
72     * {@inheritDoc}
73     * This receives puts *and* deletes. Deletes do not count as a version, but rather
74     * take the version of the previous put (so eventually all but the last can be reclaimed).
75     */
76    @Override
77    public ScanQueryMatcher.MatchCode checkVersions(Cell cell,
78        long timestamp, byte type, boolean ignoreCount) throws IOException {
79  
80      if (columnCell == null) {
81        // first iteration.
82        resetCell(cell);
83        if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
84        // do not count a delete marker as another version
85        return checkVersion(type, timestamp);
86      }
87      int cmp = CellComparator.compareQualifiers(cell, this.columnCell);
88      if (cmp == 0) {
89        if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
90  
91        //If column matches, check if it is a duplicate timestamp
92        if (sameAsPreviousTSAndType(timestamp, type)) {
93          return ScanQueryMatcher.MatchCode.SKIP;
94        }
95        return checkVersion(type, timestamp);
96      }
97  
98      resetTSAndType();
99  
100     // new col > old col
101     if (cmp > 0) {
102       // switched columns, lets do something.x
103       resetCell(cell);
104       if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
105       return checkVersion(type, timestamp);
106     }
107 
108     // new col < oldcol
109     // WARNING: This means that very likely an edit for some other family
110     // was incorrectly stored into the store for this one. Throw an exception,
111     // because this might lead to data corruption.
112     throw new IOException(
113         "ScanWildcardColumnTracker.checkColumn ran into a column actually " +
114         "smaller than the previous column: " +
115         Bytes.toStringBinary(CellUtil.cloneQualifier(cell)));
116   }
117 
118   private void resetCell(Cell columnCell) {
119     this.columnCell = columnCell;
120     currentCount = 0;
121   }
122 
123   /**
124    * Check whether this version should be retained.
125    * There are 4 variables considered:
126    * If this version is past max versions -> skip it
127    * If this kv has expired or was deleted, check min versions
128    * to decide whther to skip it or not.
129    *
130    * Increase the version counter unless this is a delete
131    */
132   private MatchCode checkVersion(byte type, long timestamp) {
133     if (!CellUtil.isDelete(type)) {
134       currentCount++;
135     }
136     if (currentCount > maxVersions) {
137       return ScanQueryMatcher.MatchCode.SEEK_NEXT_COL; // skip to next col
138     }
139     // keep the KV if required by minversions or it is not expired, yet
140     if (currentCount <= minVersions || !isExpired(timestamp)) {
141       setTSAndType(timestamp, type);
142       return ScanQueryMatcher.MatchCode.INCLUDE;
143     } else {
144       return MatchCode.SEEK_NEXT_COL;
145     }
146 
147   }
148 
149   @Override
150   public void reset() {
151     columnCell = null;
152     resetTSAndType();
153   }
154 
155   private void resetTSAndType() {
156     latestTSOfCurrentColumn = HConstants.LATEST_TIMESTAMP;
157     latestTypeOfCurrentColumn = 0;
158   }
159 
160   private void setTSAndType(long timestamp, byte type) {
161     latestTSOfCurrentColumn = timestamp;
162     latestTypeOfCurrentColumn = type;
163   }
164 
165   private boolean sameAsPreviousTSAndType(long timestamp, byte type) {
166     return timestamp == latestTSOfCurrentColumn && type == latestTypeOfCurrentColumn;
167   }
168 
169   private boolean isExpired(long timestamp) {
170     return timestamp < oldestStamp;
171   }
172 
173   /**
174    * Used by matcher and scan/get to get a hint of the next column
175    * to seek to after checkColumn() returns SKIP.  Returns the next interesting
176    * column we want, or NULL there is none (wildcard scanner).
177    *
178    * @return The column count.
179    */
180   public ColumnCount getColumnHint() {
181     return null;
182   }
183 
184   /**
185    * We can never know a-priori if we are done, so always return false.
186    * @return false
187    */
188   @Override
189   public boolean done() {
190     return false;
191   }
192 
193   @Override
194   public MatchCode getNextRowOrNextColumn(Cell cell) {
195     return MatchCode.SEEK_NEXT_COL;
196   }
197 
198   public boolean isDone(long timestamp) {
199     return minVersions <= 0 && isExpired(timestamp);
200   }
201 }