001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver.querymatcher; 019 020import java.io.IOException; 021import org.apache.hadoop.hbase.CellComparator; 022import org.apache.hadoop.hbase.CellUtil; 023import org.apache.hadoop.hbase.ExtendedCell; 024import org.apache.hadoop.hbase.HConstants; 025import org.apache.hadoop.hbase.KeyValueUtil; 026import org.apache.hadoop.hbase.PrivateCellUtil; 027import org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher.MatchCode; 028import org.apache.hadoop.hbase.util.Bytes; 029import org.apache.yetus.audience.InterfaceAudience; 030 031/** 032 * Keeps track of the columns for a scan if they are not explicitly specified 033 */ 034@InterfaceAudience.Private 035public class ScanWildcardColumnTracker implements ColumnTracker { 036 private ExtendedCell columnCell = null; 037 private int currentCount = 0; 038 private final int maxVersions; 039 private final int minVersions; 040 041 /* 042 * Keeps track of the latest timestamp and type included for current column. Used to eliminate 043 * duplicates. 044 */ 045 private long latestTSOfCurrentColumn; 046 private byte latestTypeOfCurrentColumn; 047 048 private long oldestStamp; 049 050 private final CellComparator comparator; 051 052 /** 053 * Return maxVersions of every row. 054 * @param minVersion Minimum number of versions to keep 055 * @param maxVersion Maximum number of versions to return 056 * @param oldestUnexpiredTS oldest timestamp that has not expired according to the TTL. 057 * @param comparator used to compare the qualifier of cell 058 */ 059 public ScanWildcardColumnTracker(int minVersion, int maxVersion, long oldestUnexpiredTS, 060 CellComparator comparator) { 061 this.maxVersions = maxVersion; 062 this.minVersions = minVersion; 063 this.oldestStamp = oldestUnexpiredTS; 064 this.comparator = comparator; 065 } 066 067 /** 068 * {@inheritDoc} This receives puts *and* deletes. 069 */ 070 @Override 071 public MatchCode checkColumn(ExtendedCell cell, byte type) throws IOException { 072 return MatchCode.INCLUDE; 073 } 074 075 /** 076 * {@inheritDoc} This receives puts *and* deletes. Deletes do not count as a version, but rather 077 * take the version of the previous put (so eventually all but the last can be reclaimed). 078 */ 079 @Override 080 public ScanQueryMatcher.MatchCode checkVersions(ExtendedCell cell, long timestamp, byte type, 081 boolean ignoreCount) throws IOException { 082 if (columnCell == null) { 083 // first iteration. 084 resetCell(cell); 085 if (ignoreCount) { 086 return ScanQueryMatcher.MatchCode.INCLUDE; 087 } 088 // do not count a delete marker as another version 089 return checkVersion(type, timestamp); 090 } 091 int cmp = comparator.compareQualifiers(cell, this.columnCell); 092 if (cmp == 0) { 093 if (ignoreCount) { 094 return ScanQueryMatcher.MatchCode.INCLUDE; 095 } 096 097 // If column matches, check if it is a duplicate timestamp 098 if (sameAsPreviousTSAndType(timestamp, type)) { 099 return ScanQueryMatcher.MatchCode.SKIP; 100 } 101 return checkVersion(type, timestamp); 102 } 103 104 resetTSAndType(); 105 106 // new col > old col 107 if (cmp > 0) { 108 // switched columns, lets do something.x 109 resetCell(cell); 110 if (ignoreCount) { 111 return ScanQueryMatcher.MatchCode.INCLUDE; 112 } 113 return checkVersion(type, timestamp); 114 } 115 116 // new col < oldcol 117 // WARNING: This means that very likely an edit for some other family 118 // was incorrectly stored into the store for this one. Throw an exception, 119 // because this might lead to data corruption. 120 throw new IOException("ScanWildcardColumnTracker.checkColumn ran into a column actually " 121 + "smaller than the previous column: " + Bytes.toStringBinary(CellUtil.cloneQualifier(cell))); 122 } 123 124 private void resetCell(ExtendedCell columnCell) { 125 this.columnCell = columnCell; 126 currentCount = 0; 127 } 128 129 /** 130 * Check whether this version should be retained. There are 4 variables considered: If this 131 * version is past max versions -> skip it If this kv has expired or was deleted, check min 132 * versions to decide whther to skip it or not. Increase the version counter unless this is a 133 * delete 134 */ 135 private MatchCode checkVersion(byte type, long timestamp) { 136 if (!PrivateCellUtil.isDelete(type)) { 137 currentCount++; 138 } 139 if (currentCount > maxVersions) { 140 return ScanQueryMatcher.MatchCode.SEEK_NEXT_COL; // skip to next col 141 } 142 // keep the KV if required by minversions or it is not expired, yet 143 if (currentCount <= minVersions || !isExpired(timestamp)) { 144 setTSAndType(timestamp, type); 145 return ScanQueryMatcher.MatchCode.INCLUDE; 146 } else { 147 return MatchCode.SEEK_NEXT_COL; 148 } 149 } 150 151 @Override 152 public void reset() { 153 columnCell = null; 154 resetTSAndType(); 155 } 156 157 private void resetTSAndType() { 158 latestTSOfCurrentColumn = HConstants.LATEST_TIMESTAMP; 159 latestTypeOfCurrentColumn = 0; 160 } 161 162 private void setTSAndType(long timestamp, byte type) { 163 latestTSOfCurrentColumn = timestamp; 164 latestTypeOfCurrentColumn = type; 165 } 166 167 private boolean sameAsPreviousTSAndType(long timestamp, byte type) { 168 return timestamp == latestTSOfCurrentColumn && type == latestTypeOfCurrentColumn; 169 } 170 171 private boolean isExpired(long timestamp) { 172 return timestamp < oldestStamp; 173 } 174 175 /** 176 * Used by matcher and scan/get to get a hint of the next column to seek to after checkColumn() 177 * returns SKIP. Returns the next interesting column we want, or NULL there is none (wildcard 178 * scanner). 179 * @return The column count. 180 */ 181 @Override 182 public ColumnCount getColumnHint() { 183 return null; 184 } 185 186 /** 187 * We can never know a-priori if we are done, so always return false. 188 */ 189 @Override 190 public boolean done() { 191 return false; 192 } 193 194 @Override 195 public MatchCode getNextRowOrNextColumn(ExtendedCell cell) { 196 return MatchCode.SEEK_NEXT_COL; 197 } 198 199 @Override 200 public void beforeShipped() { 201 if (columnCell != null) { 202 this.columnCell = KeyValueUtil.toNewKeyCell(this.columnCell); 203 } 204 } 205 206 @Override 207 public boolean isDone(long timestamp) { 208 return minVersions <= 0 && isExpired(timestamp); 209 } 210}