001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver.querymatcher;
019
020import java.io.IOException;
021import org.apache.hadoop.hbase.Cell;
022import org.apache.hadoop.hbase.CellComparator;
023import org.apache.hadoop.hbase.CellUtil;
024import org.apache.hadoop.hbase.HConstants;
025import org.apache.hadoop.hbase.KeyValueUtil;
026import org.apache.hadoop.hbase.PrivateCellUtil;
027import org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher.MatchCode;
028import org.apache.hadoop.hbase.util.Bytes;
029import org.apache.yetus.audience.InterfaceAudience;
030
031/**
032 * Keeps track of the columns for a scan if they are not explicitly specified
033 */
034@InterfaceAudience.Private
035public class ScanWildcardColumnTracker implements ColumnTracker {
036  private Cell columnCell = null;
037  private int currentCount = 0;
038  private final int maxVersions;
039  private final int minVersions;
040
041  /*
042   * Keeps track of the latest timestamp and type included for current column. Used to eliminate
043   * duplicates.
044   */
045  private long latestTSOfCurrentColumn;
046  private byte latestTypeOfCurrentColumn;
047
048  private long oldestStamp;
049
050  private final CellComparator comparator;
051
052  /**
053   * Return maxVersions of every row.
054   * @param minVersion        Minimum number of versions to keep
055   * @param maxVersion        Maximum number of versions to return
056   * @param oldestUnexpiredTS oldest timestamp that has not expired according to the TTL.
057   * @param comparator        used to compare the qualifier of cell
058   */
059  public ScanWildcardColumnTracker(int minVersion, int maxVersion, long oldestUnexpiredTS,
060    CellComparator comparator) {
061    this.maxVersions = maxVersion;
062    this.minVersions = minVersion;
063    this.oldestStamp = oldestUnexpiredTS;
064    this.comparator = comparator;
065  }
066
067  /**
068   * {@inheritDoc} This receives puts *and* deletes.
069   */
070  @Override
071  public MatchCode checkColumn(Cell cell, byte type) throws IOException {
072    return MatchCode.INCLUDE;
073  }
074
075  /**
076   * {@inheritDoc} This receives puts *and* deletes. Deletes do not count as a version, but rather
077   * take the version of the previous put (so eventually all but the last can be reclaimed).
078   */
079  @Override
080  public ScanQueryMatcher.MatchCode checkVersions(Cell cell, long timestamp, byte type,
081    boolean ignoreCount) throws IOException {
082    if (columnCell == null) {
083      // first iteration.
084      resetCell(cell);
085      if (ignoreCount) {
086        return ScanQueryMatcher.MatchCode.INCLUDE;
087      }
088      // do not count a delete marker as another version
089      return checkVersion(type, timestamp);
090    }
091    int cmp = comparator.compareQualifiers(cell, this.columnCell);
092    if (cmp == 0) {
093      if (ignoreCount) {
094        return ScanQueryMatcher.MatchCode.INCLUDE;
095      }
096
097      // If column matches, check if it is a duplicate timestamp
098      if (sameAsPreviousTSAndType(timestamp, type)) {
099        return ScanQueryMatcher.MatchCode.SKIP;
100      }
101      return checkVersion(type, timestamp);
102    }
103
104    resetTSAndType();
105
106    // new col > old col
107    if (cmp > 0) {
108      // switched columns, lets do something.x
109      resetCell(cell);
110      if (ignoreCount) {
111        return ScanQueryMatcher.MatchCode.INCLUDE;
112      }
113      return checkVersion(type, timestamp);
114    }
115
116    // new col < oldcol
117    // WARNING: This means that very likely an edit for some other family
118    // was incorrectly stored into the store for this one. Throw an exception,
119    // because this might lead to data corruption.
120    throw new IOException("ScanWildcardColumnTracker.checkColumn ran into a column actually "
121      + "smaller than the previous column: " + Bytes.toStringBinary(CellUtil.cloneQualifier(cell)));
122  }
123
124  private void resetCell(Cell columnCell) {
125    this.columnCell = columnCell;
126    currentCount = 0;
127  }
128
129  /**
130   * Check whether this version should be retained. There are 4 variables considered: If this
131   * version is past max versions -> skip it If this kv has expired or was deleted, check min
132   * versions to decide whther to skip it or not. Increase the version counter unless this is a
133   * delete
134   */
135  private MatchCode checkVersion(byte type, long timestamp) {
136    if (!PrivateCellUtil.isDelete(type)) {
137      currentCount++;
138    }
139    if (currentCount > maxVersions) {
140      return ScanQueryMatcher.MatchCode.SEEK_NEXT_COL; // skip to next col
141    }
142    // keep the KV if required by minversions or it is not expired, yet
143    if (currentCount <= minVersions || !isExpired(timestamp)) {
144      setTSAndType(timestamp, type);
145      return ScanQueryMatcher.MatchCode.INCLUDE;
146    } else {
147      return MatchCode.SEEK_NEXT_COL;
148    }
149  }
150
151  @Override
152  public void reset() {
153    columnCell = null;
154    resetTSAndType();
155  }
156
157  private void resetTSAndType() {
158    latestTSOfCurrentColumn = HConstants.LATEST_TIMESTAMP;
159    latestTypeOfCurrentColumn = 0;
160  }
161
162  private void setTSAndType(long timestamp, byte type) {
163    latestTSOfCurrentColumn = timestamp;
164    latestTypeOfCurrentColumn = type;
165  }
166
167  private boolean sameAsPreviousTSAndType(long timestamp, byte type) {
168    return timestamp == latestTSOfCurrentColumn && type == latestTypeOfCurrentColumn;
169  }
170
171  private boolean isExpired(long timestamp) {
172    return timestamp < oldestStamp;
173  }
174
175  /**
176   * Used by matcher and scan/get to get a hint of the next column to seek to after checkColumn()
177   * returns SKIP. Returns the next interesting column we want, or NULL there is none (wildcard
178   * scanner).
179   * @return The column count.
180   */
181  @Override
182  public ColumnCount getColumnHint() {
183    return null;
184  }
185
186  /**
187   * We can never know a-priori if we are done, so always return false.
188   */
189  @Override
190  public boolean done() {
191    return false;
192  }
193
194  @Override
195  public MatchCode getNextRowOrNextColumn(Cell cell) {
196    return MatchCode.SEEK_NEXT_COL;
197  }
198
199  @Override
200  public void beforeShipped() {
201    if (columnCell != null) {
202      this.columnCell = KeyValueUtil.toNewKeyCell(this.columnCell);
203    }
204  }
205
206  @Override
207  public boolean isDone(long timestamp) {
208    return minVersions <= 0 && isExpired(timestamp);
209  }
210}