View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.regionserver;
22  
23  import java.io.IOException;
24  
25  import org.apache.hadoop.hbase.HConstants;
26  import org.apache.hadoop.hbase.KeyValue;
27  import org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode;
28  import org.apache.hadoop.hbase.util.Bytes;
29  
30  /**
31   * Keeps track of the columns for a scan if they are not explicitly specified
32   */
33  public class ScanWildcardColumnTracker implements ColumnTracker {
34    private byte [] columnBuffer = null;
35    private int columnOffset = 0;
36    private int columnLength = 0;
37    private int currentCount = 0;
38    private int maxVersions;
39    private int minVersions;
40    /* Keeps track of the latest timestamp and type included for current column.
41     * Used to eliminate duplicates. */
42    private long latestTSOfCurrentColumn;
43    private byte latestTypeOfCurrentColumn;
44  
45    private long oldestStamp;
46  
47    /**
48     * Return maxVersions of every row.
49     * @param minVersion Minimum number of versions to keep
50     * @param maxVersion Maximum number of versions to return
51     * @param oldestUnexpiredTS oldest timestamp that has not expired according
52     *          to the TTL.
53     */
54    public ScanWildcardColumnTracker(int minVersion, int maxVersion,
55        long oldestUnexpiredTS) {
56      this.maxVersions = maxVersion;
57      this.minVersions = minVersion;
58      this.oldestStamp = oldestUnexpiredTS;
59    }
60  
61    /**
62     * {@inheritDoc}
63     * This receives puts *and* deletes.
64     */
65    @Override
66    public MatchCode checkColumn(byte[] bytes, int offset, int length, byte type)
67        throws IOException {
68      return MatchCode.INCLUDE;
69    }
70  
71    /**
72     * {@inheritDoc}
73     * This receives puts *and* deletes. Deletes do not count as a version, but rather
74     * take the version of the previous put (so eventually all but the last can be reclaimed).
75     */
76    @Override
77    public MatchCode checkVersions(byte[] bytes, int offset, int length, long timestamp, byte type,
78        boolean ignoreCount) throws IOException {
79  
80      if (columnBuffer == null) {
81        // first iteration.
82        resetBuffer(bytes, offset, length);
83        if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
84        // do not count a delete marker as another version
85        return checkVersion(type, timestamp);
86      }
87      int cmp = Bytes.compareTo(bytes, offset, length,
88          columnBuffer, columnOffset, columnLength);
89      if (cmp == 0) {
90        if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
91  
92        //If column matches, check if it is a duplicate timestamp
93        if (sameAsPreviousTSAndType(timestamp, type)) {
94          return ScanQueryMatcher.MatchCode.SKIP;
95        }
96        return checkVersion(type, timestamp);
97      }
98  
99      resetTSAndType();
100 
101     // new col > old col
102     if (cmp > 0) {
103       // switched columns, lets do something.x
104       resetBuffer(bytes, offset, length);
105       if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
106       return checkVersion(type, timestamp);
107     }
108 
109     // new col < oldcol
110     // WARNING: This means that very likely an edit for some other family
111     // was incorrectly stored into the store for this one. Throw an exception,
112     // because this might lead to data corruption.
113     throw new IOException(
114         "ScanWildcardColumnTracker.checkColumn ran into a column actually " +
115         "smaller than the previous column: " +
116         Bytes.toStringBinary(bytes, offset, length));
117   }
118 
119   private void resetBuffer(byte[] bytes, int offset, int length) {
120     columnBuffer = bytes;
121     columnOffset = offset;
122     columnLength = length;
123     currentCount = 0;
124   }
125 
126   /**
127    * Check whether this version should be retained.
128    * There are 4 variables considered:
129    * If this version is past max versions -> skip it
130    * If this kv has expired or was deleted, check min versions
131    * to decide whther to skip it or not.
132    *
133    * Increase the version counter unless this is a delete
134    */
135   private MatchCode checkVersion(byte type, long timestamp) {
136     if (!KeyValue.isDelete(type)) {
137       currentCount++;
138     }
139     if (currentCount > maxVersions) {
140       return ScanQueryMatcher.MatchCode.SEEK_NEXT_COL; // skip to next col
141     }
142     // keep the KV if required by minversions or it is not expired, yet
143     if (currentCount <= minVersions || !isExpired(timestamp)) {
144       setTSAndType(timestamp, type);
145       return ScanQueryMatcher.MatchCode.INCLUDE;
146     } else {
147       return MatchCode.SEEK_NEXT_COL;
148     }
149 
150   }
151 
152   @Override
153   public void reset() {
154     columnBuffer = null;
155     resetTSAndType();
156   }
157 
158   private void resetTSAndType() {
159     latestTSOfCurrentColumn = HConstants.LATEST_TIMESTAMP;
160     latestTypeOfCurrentColumn = 0;
161   }
162 
163   private void setTSAndType(long timestamp, byte type) {
164     latestTSOfCurrentColumn = timestamp;
165     latestTypeOfCurrentColumn = type;
166   }
167 
168   private boolean sameAsPreviousTSAndType(long timestamp, byte type) {
169     return timestamp == latestTSOfCurrentColumn && type == latestTypeOfCurrentColumn;
170   }
171 
172   private boolean isExpired(long timestamp) {
173     return timestamp < oldestStamp;
174   }
175 
176   /**
177    * Used by matcher and scan/get to get a hint of the next column
178    * to seek to after checkColumn() returns SKIP.  Returns the next interesting
179    * column we want, or NULL there is none (wildcard scanner).
180    *
181    * @return The column count.
182    */
183   public ColumnCount getColumnHint() {
184     return null;
185   }
186 
187 
188   /**
189    * We can never know a-priori if we are done, so always return false.
190    * @return false
191    */
192   @Override
193   public boolean done() {
194     return false;
195   }
196 
197   public MatchCode getNextRowOrNextColumn(byte[] bytes, int offset,
198       int qualLength) {
199     return MatchCode.SEEK_NEXT_COL;
200   }
201 
202   public boolean isDone(long timestamp) {
203     return minVersions <= 0 && isExpired(timestamp);
204   }
205 }