View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import java.io.IOException;
22  import java.util.NavigableSet;
23  
24  import org.apache.hadoop.hbase.classification.InterfaceAudience;
25  import org.apache.hadoop.hbase.Cell;
26  import org.apache.hadoop.hbase.CellUtil;
27  import org.apache.hadoop.hbase.HConstants;
28  import org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode;
29  import org.apache.hadoop.hbase.util.Bytes;
30  
31  /**
32   * This class is used for the tracking and enforcement of columns and numbers
33   * of versions during the course of a Get or Scan operation, when explicit
34   * column qualifiers have been asked for in the query.
35   *
36   * With a little magic (see {@link ScanQueryMatcher}), we can use this matcher
37   * for both scans and gets.  The main difference is 'next' and 'done' collapse
38   * for the scan case (since we see all columns in order), and we only reset
39   * between rows.
40   *
41   * <p>
42   * This class is utilized by {@link ScanQueryMatcher} mainly through two methods:
43   * <ul><li>{@link #checkColumn} is called when a Put satisfies all other
44   * conditions of the query.</li>
45   * <li>{@link #getNextRowOrNextColumn} is called whenever ScanQueryMatcher
46   * believes that the current column should be skipped (by timestamp, filter etc.)</li>
47   * </ul>
48   * <p>
49   * These two methods returns a 
50   * {@link org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode}
51   * to define what action should be taken.
52   * <p>
53   * This class is NOT thread-safe as queries are never multi-threaded
54   */
55  @InterfaceAudience.Private
56  public class ExplicitColumnTracker implements ColumnTracker {
57  
58    private final int maxVersions;
59    private final int minVersions;
60  
61   /**
62    * Contains the list of columns that the ExplicitColumnTracker is tracking.
63    * Each ColumnCount instance also tracks how many versions of the requested
64    * column have been returned.
65    */
66    private final ColumnCount[] columns;
67    private int index;
68    private ColumnCount column;
69    /** Keeps track of the latest timestamp included for current column.
70     * Used to eliminate duplicates. */
71    private long latestTSOfCurrentColumn;
72    private long oldestStamp;
73  
74    /**
75     * Default constructor.
76     * @param columns columns specified user in query
77     * @param minVersions minimum number of versions to keep
78     * @param maxVersions maximum versions to return per column
79     * @param oldestUnexpiredTS the oldest timestamp we are interested in,
80     *  based on TTL 
81     */
82    public ExplicitColumnTracker(NavigableSet<byte[]> columns, int minVersions,
83        int maxVersions, long oldestUnexpiredTS) {
84      this.maxVersions = maxVersions;
85      this.minVersions = minVersions;
86      this.oldestStamp = oldestUnexpiredTS;
87      this.columns = new ColumnCount[columns.size()];
88      int i=0;
89      for(byte [] column : columns) {
90        this.columns[i++] = new ColumnCount(column);
91      }
92      reset();
93    }
94  
95      /**
96     * Done when there are no more columns to match against.
97     */
98    public boolean done() {
99      return this.index >= columns.length;
100   }
101 
102   public ColumnCount getColumnHint() {
103     return this.column;
104   }
105 
106   /**
107    * {@inheritDoc}
108    */
109   @Override
110   public ScanQueryMatcher.MatchCode checkColumn(Cell cell, byte type) {
111     // delete markers should never be passed to an
112     // *Explicit*ColumnTracker
113     assert !CellUtil.isDelete(type);
114     do {
115       // No more columns left, we are done with this query
116       if(done()) {
117         return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
118       }
119 
120       // No more columns to match against, done with storefile
121       if(this.column == null) {
122         return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
123       }
124 
125       // Compare specific column to current column
126       // TODO when cell is offheap backed, we won't use getQualifierArray()
127       int ret = Bytes.compareTo(column.getBuffer(), column.getOffset(), column.getLength(),
128           cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength());
129 
130       // Column Matches. Return include code. The caller would call checkVersions
131       // to limit the number of versions.
132       if(ret == 0) {
133         return ScanQueryMatcher.MatchCode.INCLUDE;
134       }
135 
136       resetTS();
137 
138       if (ret > 0) {
139         // The current KV is smaller than the column the ExplicitColumnTracker
140         // is interested in, so seek to that column of interest.
141         return ScanQueryMatcher.MatchCode.SEEK_NEXT_COL;
142       }
143 
144       // The current KV is bigger than the column the ExplicitColumnTracker
145       // is interested in. That means there is no more data for the column
146       // of interest. Advance the ExplicitColumnTracker state to next
147       // column of interest, and check again.
148       if (ret <= -1) {
149         ++this.index;
150         if (done()) {
151           // No more to match, do not include, done with this row.
152           return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
153         }
154         // This is the recursive case.
155         this.column = this.columns[this.index];
156       }
157     } while(true);
158   }
159 
160   @Override
161   public ScanQueryMatcher.MatchCode checkVersions(Cell cell,
162       long timestamp, byte type, boolean ignoreCount) throws IOException {
163     assert !CellUtil.isDelete(type);
164     if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
165     // Check if it is a duplicate timestamp
166     if (sameAsPreviousTS(timestamp)) {
167       // If duplicate, skip this Key
168       return ScanQueryMatcher.MatchCode.SKIP;
169     }
170     int count = this.column.increment();
171     if (count >= maxVersions || (count >= minVersions && isExpired(timestamp))) {
172       // Done with versions for this column
173       ++this.index;
174       resetTS();
175       if (done()) {
176         // We have served all the requested columns.
177         this.column = null;
178         return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW;
179       }
180       // We are done with current column; advance to next column
181       // of interest.
182       this.column = this.columns[this.index];
183       return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL;
184     }
185     setTS(timestamp);
186     return ScanQueryMatcher.MatchCode.INCLUDE;
187   }
188 
189   // Called between every row.
190   public void reset() {
191     this.index = 0;
192     this.column = this.columns[this.index];
193     for(ColumnCount col : this.columns) {
194       col.setCount(0);
195     }
196     resetTS();
197   }
198 
199   private void resetTS() {
200     latestTSOfCurrentColumn = HConstants.LATEST_TIMESTAMP;
201   }
202 
203   private void setTS(long timestamp) {
204     latestTSOfCurrentColumn = timestamp;
205   }
206 
207   private boolean sameAsPreviousTS(long timestamp) {
208     return timestamp == latestTSOfCurrentColumn;
209   }
210 
211   private boolean isExpired(long timestamp) {
212     return timestamp < oldestStamp;
213   }
214 
215   /**
216    * This method is used to inform the column tracker that we are done with
217    * this column. We may get this information from external filters or
218    * timestamp range and we then need to indicate this information to
219    * tracker. It is required only in case of ExplicitColumnTracker.
220    * @param cell
221    */
222   public void doneWithColumn(Cell cell) {
223     while (this.column != null) {
224       int compare = Bytes.compareTo(column.getBuffer(), column.getOffset(), column.getLength(),
225           cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength());
226       resetTS();
227       if (compare <= 0) {
228         ++this.index;
229         if (done()) {
230           // Will not hit any more columns in this storefile
231           this.column = null;
232         } else {
233           this.column = this.columns[this.index];
234         }
235         if (compare <= -1)
236           continue;
237       }
238       return;
239     }
240   }
241 
242   @Override
243   public MatchCode getNextRowOrNextColumn(Cell cell) {
244     doneWithColumn(cell);
245 
246     if (getColumnHint() == null) {
247       return MatchCode.SEEK_NEXT_ROW;
248     } else {
249       return MatchCode.SEEK_NEXT_COL;
250     }
251   }
252 
253   public boolean isDone(long timestamp) {
254     return minVersions <= 0 && isExpired(timestamp);
255   }
256 }