001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.hbase.regionserver.querymatcher; 020 021import java.io.IOException; 022 023import org.apache.hadoop.hbase.Cell; 024import org.apache.hadoop.hbase.CellComparator; 025import org.apache.hadoop.hbase.CellUtil; 026import org.apache.hadoop.hbase.HConstants; 027import org.apache.hadoop.hbase.PrivateCellUtil; 028import org.apache.hadoop.hbase.KeyValueUtil; 029import org.apache.yetus.audience.InterfaceAudience; 030import org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher.MatchCode; 031import org.apache.hadoop.hbase.util.Bytes; 032 033/** 034 * Keeps track of the columns for a scan if they are not explicitly specified 035 */ 036@InterfaceAudience.Private 037public class ScanWildcardColumnTracker implements ColumnTracker { 038 private Cell columnCell = null; 039 private int currentCount = 0; 040 private final int maxVersions; 041 private final int minVersions; 042 043 /* 044 * Keeps track of the latest timestamp and type included for current column. Used to eliminate 045 * duplicates. 046 */ 047 private long latestTSOfCurrentColumn; 048 private byte latestTypeOfCurrentColumn; 049 050 private long oldestStamp; 051 052 private final CellComparator comparator; 053 /** 054 * Return maxVersions of every row. 055 * @param minVersion Minimum number of versions to keep 056 * @param maxVersion Maximum number of versions to return 057 * @param oldestUnexpiredTS oldest timestamp that has not expired according to the TTL. 058 * @param comparator used to compare the qualifier of cell 059 */ 060 public ScanWildcardColumnTracker(int minVersion, int maxVersion, 061 long oldestUnexpiredTS, CellComparator comparator) { 062 this.maxVersions = maxVersion; 063 this.minVersions = minVersion; 064 this.oldestStamp = oldestUnexpiredTS; 065 this.comparator = comparator; 066 } 067 068 /** 069 * {@inheritDoc} This receives puts *and* deletes. 070 */ 071 @Override 072 public MatchCode checkColumn(Cell cell, byte type) throws IOException { 073 return MatchCode.INCLUDE; 074 } 075 076 /** 077 * {@inheritDoc} This receives puts *and* deletes. Deletes do not count as a version, but rather 078 * take the version of the previous put (so eventually all but the last can be reclaimed). 079 */ 080 @Override 081 public ScanQueryMatcher.MatchCode checkVersions(Cell cell, long timestamp, byte type, 082 boolean ignoreCount) throws IOException { 083 if (columnCell == null) { 084 // first iteration. 085 resetCell(cell); 086 if (ignoreCount) { 087 return ScanQueryMatcher.MatchCode.INCLUDE; 088 } 089 // do not count a delete marker as another version 090 return checkVersion(type, timestamp); 091 } 092 int cmp = comparator.compareQualifiers(cell, this.columnCell); 093 if (cmp == 0) { 094 if (ignoreCount) { 095 return ScanQueryMatcher.MatchCode.INCLUDE; 096 } 097 098 // If column matches, check if it is a duplicate timestamp 099 if (sameAsPreviousTSAndType(timestamp, type)) { 100 return ScanQueryMatcher.MatchCode.SKIP; 101 } 102 return checkVersion(type, timestamp); 103 } 104 105 resetTSAndType(); 106 107 // new col > old col 108 if (cmp > 0) { 109 // switched columns, lets do something.x 110 resetCell(cell); 111 if (ignoreCount) { 112 return ScanQueryMatcher.MatchCode.INCLUDE; 113 } 114 return checkVersion(type, timestamp); 115 } 116 117 // new col < oldcol 118 // WARNING: This means that very likely an edit for some other family 119 // was incorrectly stored into the store for this one. Throw an exception, 120 // because this might lead to data corruption. 121 throw new IOException("ScanWildcardColumnTracker.checkColumn ran into a column actually " 122 + "smaller than the previous column: " 123 + Bytes.toStringBinary(CellUtil.cloneQualifier(cell))); 124 } 125 126 private void resetCell(Cell columnCell) { 127 this.columnCell = columnCell; 128 currentCount = 0; 129 } 130 131 /** 132 * Check whether this version should be retained. There are 4 variables considered: If this 133 * version is past max versions -> skip it If this kv has expired or was deleted, check min 134 * versions to decide whther to skip it or not. Increase the version counter unless this is a 135 * delete 136 */ 137 private MatchCode checkVersion(byte type, long timestamp) { 138 if (!PrivateCellUtil.isDelete(type)) { 139 currentCount++; 140 } 141 if (currentCount > maxVersions) { 142 return ScanQueryMatcher.MatchCode.SEEK_NEXT_COL; // skip to next col 143 } 144 // keep the KV if required by minversions or it is not expired, yet 145 if (currentCount <= minVersions || !isExpired(timestamp)) { 146 setTSAndType(timestamp, type); 147 return ScanQueryMatcher.MatchCode.INCLUDE; 148 } else { 149 return MatchCode.SEEK_NEXT_COL; 150 } 151 } 152 153 @Override 154 public void reset() { 155 columnCell = null; 156 resetTSAndType(); 157 } 158 159 private void resetTSAndType() { 160 latestTSOfCurrentColumn = HConstants.LATEST_TIMESTAMP; 161 latestTypeOfCurrentColumn = 0; 162 } 163 164 private void setTSAndType(long timestamp, byte type) { 165 latestTSOfCurrentColumn = timestamp; 166 latestTypeOfCurrentColumn = type; 167 } 168 169 private boolean sameAsPreviousTSAndType(long timestamp, byte type) { 170 return timestamp == latestTSOfCurrentColumn && type == latestTypeOfCurrentColumn; 171 } 172 173 private boolean isExpired(long timestamp) { 174 return timestamp < oldestStamp; 175 } 176 177 /** 178 * Used by matcher and scan/get to get a hint of the next column to seek to after checkColumn() 179 * returns SKIP. Returns the next interesting column we want, or NULL there is none (wildcard 180 * scanner). 181 * @return The column count. 182 */ 183 @Override 184 public ColumnCount getColumnHint() { 185 return null; 186 } 187 188 /** 189 * We can never know a-priori if we are done, so always return false. 190 * @return false 191 */ 192 @Override 193 public boolean done() { 194 return false; 195 } 196 197 @Override 198 public MatchCode getNextRowOrNextColumn(Cell cell) { 199 return MatchCode.SEEK_NEXT_COL; 200 } 201 202 @Override 203 public void beforeShipped() { 204 if (columnCell != null) { 205 this.columnCell = KeyValueUtil.toNewKeyCell(this.columnCell); 206 } 207 } 208 209 @Override 210 public boolean isDone(long timestamp) { 211 return minVersions <= 0 && isExpired(timestamp); 212 } 213}