1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.IOException;
23  
24  import org.apache.hadoop.classification.InterfaceAudience;
25  import org.apache.hadoop.hbase.HConstants;
26  import org.apache.hadoop.hbase.KeyValue;
27  import org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode;
28  import org.apache.hadoop.hbase.util.Bytes;
29  
30  /**
31   * Keeps track of the columns for a scan if they are not explicitly specified
32   */
33  @InterfaceAudience.Private
34  public class ScanWildcardColumnTracker implements ColumnTracker {
35    private byte [] columnBuffer = null;
36    private int columnOffset = 0;
37    private int columnLength = 0;
38    private int currentCount = 0;
39    private int maxVersions;
40    private int minVersions;
41    /* Keeps track of the latest timestamp and type included for current column.
42     * Used to eliminate duplicates. */
43    private long latestTSOfCurrentColumn;
44    private byte latestTypeOfCurrentColumn;
45  
46    private long oldestStamp;
47  
48    /**
49     * Return maxVersions of every row.
50     * @param minVersion Minimum number of versions to keep
51     * @param maxVersion Maximum number of versions to return
52     * @param oldestUnexpiredTS oldest timestamp that has not expired according
53     *          to the TTL.
54     */
55    public ScanWildcardColumnTracker(int minVersion, int maxVersion,
56        long oldestUnexpiredTS) {
57      this.maxVersions = maxVersion;
58      this.minVersions = minVersion;
59      this.oldestStamp = oldestUnexpiredTS;
60    }
61  
62    /**
63     * {@inheritDoc}
64     * This receives puts *and* deletes.
65     * Deletes do not count as a version, but rather take the version
66     * of the previous put (so eventually all but the last can be reclaimed).
67     */
68    @Override
69    public MatchCode checkColumn(byte[] bytes, int offset, int length,
70        long timestamp, byte type, boolean ignoreCount) throws IOException {
71      
72      if (columnBuffer == null) {
73        // first iteration.
74        resetBuffer(bytes, offset, length);
75        if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
76        // do not count a delete marker as another version
77        return checkVersion(type, timestamp);
78      }
79      int cmp = Bytes.compareTo(bytes, offset, length,
80          columnBuffer, columnOffset, columnLength);
81      if (cmp == 0) {
82        if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
83  
84        //If column matches, check if it is a duplicate timestamp
85        if (sameAsPreviousTSAndType(timestamp, type)) {
86          return ScanQueryMatcher.MatchCode.SKIP;
87        }
88        return checkVersion(type, timestamp);
89      }
90  
91      resetTSAndType();
92  
93      // new col > old col
94      if (cmp > 0) {
95        // switched columns, lets do something.x
96        resetBuffer(bytes, offset, length);
97        if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
98        return checkVersion(type, timestamp);
99      }
100 
101     // new col < oldcol
102     // WARNING: This means that very likely an edit for some other family
103     // was incorrectly stored into the store for this one. Throw an exception,
104     // because this might lead to data corruption.
105     throw new IOException(
106         "ScanWildcardColumnTracker.checkColumn ran into a column actually " +
107         "smaller than the previous column: " +
108         Bytes.toStringBinary(bytes, offset, length));
109   }
110 
111   private void resetBuffer(byte[] bytes, int offset, int length) {
112     columnBuffer = bytes;
113     columnOffset = offset;
114     columnLength = length;
115     currentCount = 0;
116   }
117 
118   /**
119    * Check whether this version should be retained.
120    * There are 4 variables considered:
121    * If this version is past max versions -> skip it
122    * If this kv has expired or was deleted, check min versions
123    * to decide whther to skip it or not.
124    *
125    * Increase the version counter unless this is a delete
126    */
127   private MatchCode checkVersion(byte type, long timestamp) {
128     if (!KeyValue.isDelete(type)) {
129       currentCount++;
130     }
131     if (currentCount > maxVersions) {
132       return ScanQueryMatcher.MatchCode.SEEK_NEXT_COL; // skip to next col
133     }
134     // keep the KV if required by minversions or it is not expired, yet
135     if (currentCount <= minVersions || !isExpired(timestamp)) {
136       setTSAndType(timestamp, type);
137       return ScanQueryMatcher.MatchCode.INCLUDE;
138     } else {
139       return MatchCode.SEEK_NEXT_COL;
140     }
141 
142   }
143 
144   @Override
145   public void update() {
146     // no-op, shouldn't even be called
147     throw new UnsupportedOperationException(
148         "ScanWildcardColumnTracker.update should never be called!");
149   }
150 
151   @Override
152   public void reset() {
153     columnBuffer = null;
154     resetTSAndType();
155   }
156 
157   private void resetTSAndType() {
158     latestTSOfCurrentColumn = HConstants.LATEST_TIMESTAMP;
159     latestTypeOfCurrentColumn = 0;
160   }
161 
162   private void setTSAndType(long timestamp, byte type) {
163     latestTSOfCurrentColumn = timestamp;
164     latestTypeOfCurrentColumn = type;
165   }
166 
167   private boolean sameAsPreviousTSAndType(long timestamp, byte type) {
168     return timestamp == latestTSOfCurrentColumn && type == latestTypeOfCurrentColumn;
169   }
170 
171   private boolean isExpired(long timestamp) {
172     return timestamp < oldestStamp;
173   }
174 
175   /**
176    * Used by matcher and scan/get to get a hint of the next column
177    * to seek to after checkColumn() returns SKIP.  Returns the next interesting
178    * column we want, or NULL there is none (wildcard scanner).
179    *
180    * @return The column count.
181    */
182   public ColumnCount getColumnHint() {
183     return null;
184   }
185 
186 
187   /**
188    * We can never know a-priori if we are done, so always return false.
189    * @return false
190    */
191   @Override
192   public boolean done() {
193     return false;
194   }
195 
196   public MatchCode getNextRowOrNextColumn(byte[] bytes, int offset,
197       int qualLength) {
198     return MatchCode.SEEK_NEXT_COL;
199   }
200 
201   public boolean isDone(long timestamp) {
202     return minVersions <= 0 && isExpired(timestamp);
203   }
204 }