1 /*
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19 package org.apache.hadoop.hbase.regionserver;
20
21 import java.util.ArrayList;
22 import java.util.List;
23 import java.util.NavigableSet;
24
25 import org.apache.hadoop.classification.InterfaceAudience;
26 import org.apache.hadoop.hbase.HConstants;
27 import org.apache.hadoop.hbase.KeyValue;
28 import org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode;
29 import org.apache.hadoop.hbase.util.Bytes;
30
31 /**
32 * This class is used for the tracking and enforcement of columns and numbers
33 * of versions during the course of a Get or Scan operation, when explicit
34 * column qualifiers have been asked for in the query.
35 *
36 * With a little magic (see {@link ScanQueryMatcher}), we can use this matcher
37 * for both scans and gets. The main difference is 'next' and 'done' collapse
38 * for the scan case (since we see all columns in order), and we only reset
39 * between rows.
40 *
41 * <p>
42 * This class is utilized by {@link ScanQueryMatcher} through two methods:
43 * <ul><li>{@link #checkColumn} is called when a Put satisfies all other
44 * conditions of the query. This method returns a {@link org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode} to define
45 * what action should be taken.
46 * <li>{@link #update} is called at the end of every StoreFile or memstore.
47 * <p>
48 * This class is NOT thread-safe as queries are never multi-threaded
49 */
50 @InterfaceAudience.Private
51 public class ExplicitColumnTracker implements ColumnTracker {
52
53 private final int maxVersions;
54 private final int minVersions;
55
56 /**
57 * Contains the list of columns that the ExplicitColumnTracker is tracking.
58 * Each ColumnCount instance also tracks how many versions of the requested
59 * column have been returned.
60 */
61 private final List<ColumnCount> columns;
62 private final List<ColumnCount> columnsToReuse;
63 private int index;
64 private ColumnCount column;
65 /** Keeps track of the latest timestamp included for current column.
66 * Used to eliminate duplicates. */
67 private long latestTSOfCurrentColumn;
68 private long oldestStamp;
69
70 /**
71 * Default constructor.
72 * @param columns columns specified user in query
73 * @param minVersions minimum number of versions to keep
74 * @param maxVersions maximum versions to return per column
75 * @param oldestUnexpiredTS the oldest timestamp we are interested in,
76 * based on TTL
77 */
78 public ExplicitColumnTracker(NavigableSet<byte[]> columns, int minVersions,
79 int maxVersions, long oldestUnexpiredTS) {
80 this.maxVersions = maxVersions;
81 this.minVersions = minVersions;
82 this.oldestStamp = oldestUnexpiredTS;
83 this.columns = new ArrayList<ColumnCount>(columns.size());
84 this.columnsToReuse = new ArrayList<ColumnCount>(columns.size());
85 for(byte [] column : columns) {
86 this.columnsToReuse.add(new ColumnCount(column));
87 }
88 reset();
89 }
90
91 /**
92 * Done when there are no more columns to match against.
93 */
94 public boolean done() {
95 return this.columns.size() == 0;
96 }
97
98 public ColumnCount getColumnHint() {
99 return this.column;
100 }
101
102 /**
103 * {@inheritDoc}
104 */
105 @Override
106 public ScanQueryMatcher.MatchCode checkColumn(byte [] bytes, int offset,
107 int length, long timestamp, byte type, boolean ignoreCount) {
108 // delete markers should never be passed to an
109 // *Explicit*ColumnTracker
110 assert !KeyValue.isDelete(type);
111 do {
112 // No more columns left, we are done with this query
113 if(this.columns.size() == 0) {
114 return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
115 }
116
117 // No more columns to match against, done with storefile
118 if(this.column == null) {
119 return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
120 }
121
122 // Compare specific column to current column
123 int ret = Bytes.compareTo(column.getBuffer(), column.getOffset(),
124 column.getLength(), bytes, offset, length);
125
126 // Column Matches. If it is not a duplicate key, increment the version count
127 // and include.
128 if(ret == 0) {
129 if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
130
131 //If column matches, check if it is a duplicate timestamp
132 if (sameAsPreviousTS(timestamp)) {
133 //If duplicate, skip this Key
134 return ScanQueryMatcher.MatchCode.SKIP;
135 }
136 int count = this.column.increment();
137 if(count >= maxVersions || (count >= minVersions && isExpired(timestamp))) {
138 // Done with versions for this column
139 // Note: because we are done with this column, and are removing
140 // it from columns, we don't do a ++this.index. The index stays
141 // the same but the columns have shifted within the array such
142 // that index now points to the next column we are interested in.
143 this.columns.remove(this.index);
144
145 resetTS();
146 if (this.columns.size() == this.index) {
147 // We have served all the requested columns.
148 this.column = null;
149 return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW;
150 } else {
151 // We are done with current column; advance to next column
152 // of interest.
153 this.column = this.columns.get(this.index);
154 return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL;
155 }
156 } else {
157 setTS(timestamp);
158 }
159 return ScanQueryMatcher.MatchCode.INCLUDE;
160 }
161
162 resetTS();
163
164 if (ret > 0) {
165 // The current KV is smaller than the column the ExplicitColumnTracker
166 // is interested in, so seek to that column of interest.
167 return ScanQueryMatcher.MatchCode.SEEK_NEXT_COL;
168 }
169
170 // The current KV is bigger than the column the ExplicitColumnTracker
171 // is interested in. That means there is no more data for the column
172 // of interest. Advance the ExplicitColumnTracker state to next
173 // column of interest, and check again.
174 if (ret <= -1) {
175 if (++this.index >= this.columns.size()) {
176 // No more to match, do not include, done with this row.
177 return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
178 }
179 // This is the recursive case.
180 this.column = this.columns.get(this.index);
181 }
182 } while(true);
183 }
184
185 /**
186 * Called at the end of every StoreFile or memstore.
187 */
188 public void update() {
189 if(this.columns.size() != 0) {
190 this.index = 0;
191 this.column = this.columns.get(this.index);
192 } else {
193 this.index = -1;
194 this.column = null;
195 }
196 }
197
198 // Called between every row.
199 public void reset() {
200 buildColumnList();
201 this.index = 0;
202 this.column = this.columns.get(this.index);
203 resetTS();
204 }
205
206 private void resetTS() {
207 latestTSOfCurrentColumn = HConstants.LATEST_TIMESTAMP;
208 }
209
210 private void setTS(long timestamp) {
211 latestTSOfCurrentColumn = timestamp;
212 }
213
214 private boolean sameAsPreviousTS(long timestamp) {
215 return timestamp == latestTSOfCurrentColumn;
216 }
217
218 private boolean isExpired(long timestamp) {
219 return timestamp < oldestStamp;
220 }
221
222 private void buildColumnList() {
223 this.columns.clear();
224 this.columns.addAll(this.columnsToReuse);
225 for(ColumnCount col : this.columns) {
226 col.setCount(0);
227 }
228 }
229
230 /**
231 * This method is used to inform the column tracker that we are done with
232 * this column. We may get this information from external filters or
233 * timestamp range and we then need to indicate this information to
234 * tracker. It is required only in case of ExplicitColumnTracker.
235 * @param bytes
236 * @param offset
237 * @param length
238 */
239 public void doneWithColumn(byte [] bytes, int offset, int length) {
240 while (this.column != null) {
241 int compare = Bytes.compareTo(column.getBuffer(), column.getOffset(),
242 column.getLength(), bytes, offset, length);
243 resetTS();
244 if (compare == 0) {
245 this.columns.remove(this.index);
246 if (this.columns.size() == this.index) {
247 // Will not hit any more columns in this storefile
248 this.column = null;
249 } else {
250 this.column = this.columns.get(this.index);
251 }
252 return;
253 } else if ( compare <= -1) {
254 if(++this.index != this.columns.size()) {
255 this.column = this.columns.get(this.index);
256 } else {
257 this.column = null;
258 }
259 } else {
260 return;
261 }
262 }
263 }
264
265 public MatchCode getNextRowOrNextColumn(byte[] bytes, int offset,
266 int qualLength) {
267 doneWithColumn(bytes, offset,qualLength);
268
269 if (getColumnHint() == null) {
270 return MatchCode.SEEK_NEXT_ROW;
271 } else {
272 return MatchCode.SEEK_NEXT_COL;
273 }
274 }
275
276 public boolean isDone(long timestamp) {
277 return minVersions <= 0 && isExpired(timestamp);
278 }
279 }