View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.IOException;
23  
24  import org.apache.hadoop.hbase.classification.InterfaceAudience;
25  import org.apache.hadoop.hbase.CellUtil;
26  import org.apache.hadoop.hbase.HConstants;
27  import org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode;
28  import org.apache.hadoop.hbase.util.Bytes;
29  
30  /**
31   * Keeps track of the columns for a scan if they are not explicitly specified
32   */
33  @InterfaceAudience.Private
34  public class ScanWildcardColumnTracker implements ColumnTracker {
35    private byte [] columnBuffer = null;
36    private int columnOffset = 0;
37    private int columnLength = 0;
38    private int currentCount = 0;
39    private int maxVersions;
40    private int minVersions;
41    /* Keeps track of the latest timestamp and type included for current column.
42     * Used to eliminate duplicates. */
43    private long latestTSOfCurrentColumn;
44    private byte latestTypeOfCurrentColumn;
45  
46    private long oldestStamp;
47  
48    /**
49     * Return maxVersions of every row.
50     * @param minVersion Minimum number of versions to keep
51     * @param maxVersion Maximum number of versions to return
52     * @param oldestUnexpiredTS oldest timestamp that has not expired according
53     *          to the TTL.
54     */
55    public ScanWildcardColumnTracker(int minVersion, int maxVersion,
56        long oldestUnexpiredTS) {
57      this.maxVersions = maxVersion;
58      this.minVersions = minVersion;
59      this.oldestStamp = oldestUnexpiredTS;
60    }
61  
62    /**
63     * {@inheritDoc}
64     * This receives puts *and* deletes.
65     */
66    @Override
67    public MatchCode checkColumn(byte[] bytes, int offset, int length, byte type)
68        throws IOException {
69      return MatchCode.INCLUDE;
70    }
71  
72    /**
73     * {@inheritDoc}
74     * This receives puts *and* deletes. Deletes do not count as a version, but rather
75     * take the version of the previous put (so eventually all but the last can be reclaimed).
76     */
77    @Override
78    public ScanQueryMatcher.MatchCode checkVersions(byte[] bytes, int offset, int length,
79        long timestamp, byte type, boolean ignoreCount) throws IOException {
80  
81      if (columnBuffer == null) {
82        // first iteration.
83        resetBuffer(bytes, offset, length);
84        if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
85        // do not count a delete marker as another version
86        return checkVersion(type, timestamp);
87      }
88      int cmp = Bytes.compareTo(bytes, offset, length,
89          columnBuffer, columnOffset, columnLength);
90      if (cmp == 0) {
91        if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
92  
93        //If column matches, check if it is a duplicate timestamp
94        if (sameAsPreviousTSAndType(timestamp, type)) {
95          return ScanQueryMatcher.MatchCode.SKIP;
96        }
97        return checkVersion(type, timestamp);
98      }
99  
100     resetTSAndType();
101 
102     // new col > old col
103     if (cmp > 0) {
104       // switched columns, lets do something.x
105       resetBuffer(bytes, offset, length);
106       if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
107       return checkVersion(type, timestamp);
108     }
109 
110     // new col < oldcol
111     // WARNING: This means that very likely an edit for some other family
112     // was incorrectly stored into the store for this one. Throw an exception,
113     // because this might lead to data corruption.
114     throw new IOException(
115         "ScanWildcardColumnTracker.checkColumn ran into a column actually " +
116         "smaller than the previous column: " +
117         Bytes.toStringBinary(bytes, offset, length));
118   }
119 
120   private void resetBuffer(byte[] bytes, int offset, int length) {
121     columnBuffer = bytes;
122     columnOffset = offset;
123     columnLength = length;
124     currentCount = 0;
125   }
126 
127   /**
128    * Check whether this version should be retained.
129    * There are 4 variables considered:
130    * If this version is past max versions -> skip it
131    * If this kv has expired or was deleted, check min versions
132    * to decide whther to skip it or not.
133    *
134    * Increase the version counter unless this is a delete
135    */
136   private MatchCode checkVersion(byte type, long timestamp) {
137     if (!CellUtil.isDelete(type)) {
138       currentCount++;
139     }
140     if (currentCount > maxVersions) {
141       return ScanQueryMatcher.MatchCode.SEEK_NEXT_COL; // skip to next col
142     }
143     // keep the KV if required by minversions or it is not expired, yet
144     if (currentCount <= minVersions || !isExpired(timestamp)) {
145       setTSAndType(timestamp, type);
146       return ScanQueryMatcher.MatchCode.INCLUDE;
147     } else {
148       return MatchCode.SEEK_NEXT_COL;
149     }
150 
151   }
152 
153   @Override
154   public void reset() {
155     columnBuffer = null;
156     resetTSAndType();
157   }
158 
159   private void resetTSAndType() {
160     latestTSOfCurrentColumn = HConstants.LATEST_TIMESTAMP;
161     latestTypeOfCurrentColumn = 0;
162   }
163 
164   private void setTSAndType(long timestamp, byte type) {
165     latestTSOfCurrentColumn = timestamp;
166     latestTypeOfCurrentColumn = type;
167   }
168 
169   private boolean sameAsPreviousTSAndType(long timestamp, byte type) {
170     return timestamp == latestTSOfCurrentColumn && type == latestTypeOfCurrentColumn;
171   }
172 
173   private boolean isExpired(long timestamp) {
174     return timestamp < oldestStamp;
175   }
176 
177   /**
178    * Used by matcher and scan/get to get a hint of the next column
179    * to seek to after checkColumn() returns SKIP.  Returns the next interesting
180    * column we want, or NULL there is none (wildcard scanner).
181    *
182    * @return The column count.
183    */
184   public ColumnCount getColumnHint() {
185     return null;
186   }
187 
188   /**
189    * We can never know a-priori if we are done, so always return false.
190    * @return false
191    */
192   @Override
193   public boolean done() {
194     return false;
195   }
196 
197   public MatchCode getNextRowOrNextColumn(byte[] bytes, int offset,
198       int qualLength) {
199     return MatchCode.SEEK_NEXT_COL;
200   }
201 
202   public boolean isDone(long timestamp) {
203     return minVersions <= 0 && isExpired(timestamp);
204   }
205 }