001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver.querymatcher; 019 020import java.io.IOException; 021import java.util.NavigableSet; 022import org.apache.hadoop.hbase.Cell; 023import org.apache.hadoop.hbase.CellUtil; 024import org.apache.hadoop.hbase.DoNotRetryIOException; 025import org.apache.hadoop.hbase.ExtendedCell; 026import org.apache.hadoop.hbase.KeyValueUtil; 027import org.apache.hadoop.hbase.PrivateCellUtil; 028import org.apache.hadoop.hbase.client.Scan; 029import org.apache.hadoop.hbase.filter.Filter; 030import org.apache.hadoop.hbase.filter.Filter.ReturnCode; 031import org.apache.hadoop.hbase.io.TimeRange; 032import org.apache.hadoop.hbase.regionserver.RegionCoprocessorHost; 033import org.apache.hadoop.hbase.regionserver.ScanInfo; 034import org.apache.hadoop.hbase.util.Pair; 035import org.apache.yetus.audience.InterfaceAudience; 036 037/** 038 * Query matcher for user scan. 039 * <p> 040 * We do not consider mvcc here because 041 * {@link org.apache.hadoop.hbase.regionserver.StoreFileScanner} and 042 * {@link org.apache.hadoop.hbase.regionserver.SegmentScanner} will only return a cell whose mvcc is 043 * less than or equal to given read point. For 044 * {@link org.apache.hadoop.hbase.client.IsolationLevel#READ_UNCOMMITTED}, we just set the read 045 * point to {@link Long#MAX_VALUE}, i.e. still do not need to consider it. 046 */ 047@InterfaceAudience.Private 048public abstract class UserScanQueryMatcher extends ScanQueryMatcher { 049 050 protected final boolean hasNullColumn; 051 052 protected final Filter filter; 053 054 protected final byte[] stopRow; 055 056 protected final TimeRange tr; 057 058 private final int versionsAfterFilter; 059 060 private int count = 0; 061 062 private ExtendedCell curColCell = null; 063 064 private static ExtendedCell createStartKey(Scan scan, ScanInfo scanInfo) { 065 if (scan.includeStartRow()) { 066 return createStartKeyFromRow(scan.getStartRow(), scanInfo); 067 } else { 068 return PrivateCellUtil.createLastOnRow(scan.getStartRow()); 069 } 070 } 071 072 protected UserScanQueryMatcher(Scan scan, ScanInfo scanInfo, ColumnTracker columns, 073 boolean hasNullColumn, long oldestUnexpiredTS, long now) { 074 super(createStartKey(scan, scanInfo), scanInfo, columns, oldestUnexpiredTS, now); 075 this.hasNullColumn = hasNullColumn; 076 this.filter = scan.getFilter(); 077 if (this.filter != null) { 078 this.versionsAfterFilter = scan.isRaw() 079 ? scan.getMaxVersions() 080 : Math.min(scan.getMaxVersions(), scanInfo.getMaxVersions()); 081 } else { 082 this.versionsAfterFilter = 0; 083 } 084 this.stopRow = scan.getStopRow(); 085 TimeRange timeRange = scan.getColumnFamilyTimeRange().get(scanInfo.getFamily()); 086 if (timeRange == null) { 087 this.tr = scan.getTimeRange(); 088 } else { 089 this.tr = timeRange; 090 } 091 } 092 093 @Override 094 public boolean hasNullColumnInQuery() { 095 return hasNullColumn; 096 } 097 098 @Override 099 public boolean isUserScan() { 100 return true; 101 } 102 103 @Override 104 public Filter getFilter() { 105 return filter; 106 } 107 108 @Override 109 public ExtendedCell getNextKeyHint(ExtendedCell cell) throws IOException { 110 if (filter == null) { 111 return null; 112 } else { 113 Cell hint = filter.getNextCellHint(cell); 114 if (hint == null || hint instanceof ExtendedCell) { 115 return (ExtendedCell) hint; 116 } else { 117 throw new DoNotRetryIOException("Incorrect filter implementation, " 118 + "the Cell returned by getNextKeyHint is not an ExtendedCell. Filter class: " 119 + filter.getClass().getName()); 120 } 121 122 } 123 } 124 125 @Override 126 public void beforeShipped() throws IOException { 127 super.beforeShipped(); 128 if (curColCell != null) { 129 this.curColCell = KeyValueUtil.toNewKeyCell(this.curColCell); 130 } 131 } 132 133 protected final MatchCode matchColumn(ExtendedCell cell, long timestamp, byte typeByte) 134 throws IOException { 135 int tsCmp = tr.compare(timestamp); 136 if (tsCmp > 0) { 137 return MatchCode.SKIP; 138 } 139 if (tsCmp < 0) { 140 return columns.getNextRowOrNextColumn(cell); 141 } 142 // STEP 1: Check if the column is part of the requested columns 143 MatchCode matchCode = columns.checkColumn(cell, typeByte); 144 if (matchCode != MatchCode.INCLUDE) { 145 return matchCode; 146 } 147 /* 148 * STEP 2: check the number of versions needed. This method call returns SKIP, SEEK_NEXT_COL, 149 * INCLUDE, INCLUDE_AND_SEEK_NEXT_COL, or INCLUDE_AND_SEEK_NEXT_ROW. 150 */ 151 matchCode = columns.checkVersions(cell, timestamp, typeByte, false); 152 switch (matchCode) { 153 case SKIP: 154 return MatchCode.SKIP; 155 case SEEK_NEXT_COL: 156 return MatchCode.SEEK_NEXT_COL; 157 default: 158 // It means it is INCLUDE, INCLUDE_AND_SEEK_NEXT_COL or INCLUDE_AND_SEEK_NEXT_ROW. 159 assert matchCode == MatchCode.INCLUDE || matchCode == MatchCode.INCLUDE_AND_SEEK_NEXT_COL 160 || matchCode == MatchCode.INCLUDE_AND_SEEK_NEXT_ROW; 161 break; 162 } 163 164 return filter == null 165 ? matchCode 166 : mergeFilterResponse(cell, matchCode, filter.filterCell(cell)); 167 } 168 169 /** 170 * Call this when scan has filter. Decide the desired behavior by checkVersions's MatchCode and 171 * filterCell's ReturnCode. Cell may be skipped by filter, so the column versions in result may be 172 * less than user need. It need to check versions again when filter and columnTracker both include 173 * the cell. <br/> 174 * 175 * <pre> 176 * ColumnChecker FilterResponse Desired behavior 177 * INCLUDE SKIP SKIP 178 * INCLUDE NEXT_COL SEEK_NEXT_COL or SEEK_NEXT_ROW 179 * INCLUDE NEXT_ROW SEEK_NEXT_ROW 180 * INCLUDE SEEK_NEXT_USING_HINT SEEK_NEXT_USING_HINT 181 * INCLUDE INCLUDE INCLUDE 182 * INCLUDE INCLUDE_AND_NEXT_COL INCLUDE_AND_SEEK_NEXT_COL 183 * INCLUDE INCLUDE_AND_SEEK_NEXT_ROW INCLUDE_AND_SEEK_NEXT_ROW 184 * INCLUDE_AND_SEEK_NEXT_COL SKIP SEEK_NEXT_COL 185 * INCLUDE_AND_SEEK_NEXT_COL NEXT_COL SEEK_NEXT_COL or SEEK_NEXT_ROW 186 * INCLUDE_AND_SEEK_NEXT_COL NEXT_ROW SEEK_NEXT_ROW 187 * INCLUDE_AND_SEEK_NEXT_COL SEEK_NEXT_USING_HINT SEEK_NEXT_USING_HINT 188 * INCLUDE_AND_SEEK_NEXT_COL INCLUDE INCLUDE_AND_SEEK_NEXT_COL 189 * INCLUDE_AND_SEEK_NEXT_COL INCLUDE_AND_NEXT_COL INCLUDE_AND_SEEK_NEXT_COL 190 * INCLUDE_AND_SEEK_NEXT_COL INCLUDE_AND_SEEK_NEXT_ROW INCLUDE_AND_SEEK_NEXT_ROW 191 * INCLUDE_AND_SEEK_NEXT_ROW SKIP SEEK_NEXT_ROW 192 * INCLUDE_AND_SEEK_NEXT_ROW NEXT_COL SEEK_NEXT_ROW 193 * INCLUDE_AND_SEEK_NEXT_ROW NEXT_ROW SEEK_NEXT_ROW 194 * INCLUDE_AND_SEEK_NEXT_ROW SEEK_NEXT_USING_HINT SEEK_NEXT_USING_HINT 195 * INCLUDE_AND_SEEK_NEXT_ROW INCLUDE INCLUDE_AND_SEEK_NEXT_ROW 196 * INCLUDE_AND_SEEK_NEXT_ROW INCLUDE_AND_NEXT_COL INCLUDE_AND_SEEK_NEXT_ROW 197 * INCLUDE_AND_SEEK_NEXT_ROW INCLUDE_AND_SEEK_NEXT_ROW INCLUDE_AND_SEEK_NEXT_ROW 198 * </pre> 199 */ 200 private final MatchCode mergeFilterResponse(ExtendedCell cell, MatchCode matchCode, 201 ReturnCode filterResponse) { 202 switch (filterResponse) { 203 case SKIP: 204 if (matchCode == MatchCode.INCLUDE) { 205 return MatchCode.SKIP; 206 } else if (matchCode == MatchCode.INCLUDE_AND_SEEK_NEXT_COL) { 207 return MatchCode.SEEK_NEXT_COL; 208 } else if (matchCode == MatchCode.INCLUDE_AND_SEEK_NEXT_ROW) { 209 return MatchCode.SEEK_NEXT_ROW; 210 } 211 break; 212 case NEXT_COL: 213 if (matchCode == MatchCode.INCLUDE || matchCode == MatchCode.INCLUDE_AND_SEEK_NEXT_COL) { 214 return columns.getNextRowOrNextColumn(cell); 215 } else if (matchCode == MatchCode.INCLUDE_AND_SEEK_NEXT_ROW) { 216 return MatchCode.SEEK_NEXT_ROW; 217 } 218 break; 219 case NEXT_ROW: 220 return MatchCode.SEEK_NEXT_ROW; 221 case SEEK_NEXT_USING_HINT: 222 return MatchCode.SEEK_NEXT_USING_HINT; 223 case INCLUDE: 224 break; 225 case INCLUDE_AND_NEXT_COL: 226 if (matchCode == MatchCode.INCLUDE) { 227 matchCode = MatchCode.INCLUDE_AND_SEEK_NEXT_COL; 228 } 229 break; 230 case INCLUDE_AND_SEEK_NEXT_ROW: 231 matchCode = MatchCode.INCLUDE_AND_SEEK_NEXT_ROW; 232 break; 233 default: 234 throw new RuntimeException("UNEXPECTED"); 235 } 236 237 // It means it is INCLUDE, INCLUDE_AND_SEEK_NEXT_COL or INCLUDE_AND_SEEK_NEXT_ROW. 238 assert matchCode == MatchCode.INCLUDE || matchCode == MatchCode.INCLUDE_AND_SEEK_NEXT_COL 239 || matchCode == MatchCode.INCLUDE_AND_SEEK_NEXT_ROW; 240 241 // We need to make sure that the number of cells returned will not exceed max version in scan 242 // when the match code is INCLUDE* case. 243 if (curColCell == null || !CellUtil.matchingRowColumn(cell, curColCell)) { 244 count = 0; 245 curColCell = cell; 246 } 247 count += 1; 248 249 if (count > versionsAfterFilter) { 250 // when the number of cells exceed max version in scan, we should return SEEK_NEXT_COL match 251 // code, but if current code is INCLUDE_AND_SEEK_NEXT_ROW, we can optimize to choose the max 252 // step between SEEK_NEXT_COL and INCLUDE_AND_SEEK_NEXT_ROW, which is SEEK_NEXT_ROW. 253 if (matchCode == MatchCode.INCLUDE_AND_SEEK_NEXT_ROW) { 254 matchCode = MatchCode.SEEK_NEXT_ROW; 255 } else { 256 matchCode = MatchCode.SEEK_NEXT_COL; 257 } 258 } 259 if (matchCode == MatchCode.INCLUDE_AND_SEEK_NEXT_COL || matchCode == MatchCode.SEEK_NEXT_COL) { 260 // Update column tracker to next column, As we use the column hint from the tracker to seek 261 // to next cell (HBASE-19749) 262 columns.doneWithColumn(cell); 263 } 264 return matchCode; 265 } 266 267 protected abstract boolean isGet(); 268 269 protected abstract boolean moreRowsMayExistsAfter(int cmpToStopRow); 270 271 @Override 272 public boolean moreRowsMayExistAfter(ExtendedCell cell) { 273 // If a 'get' Scan -- we are doing a Get (every Get is a single-row Scan in implementation) -- 274 // then we are looking at one row only, the one specified in the Get coordinate..so we know 275 // for sure that there are no more rows on this Scan 276 if (isGet()) { 277 return false; 278 } 279 // If no stopRow, return that there may be more rows. The tests that follow depend on a 280 // non-empty, non-default stopRow so this little test below short-circuits out doing the 281 // following compares. 282 if (this.stopRow == null || this.stopRow.length == 0) { 283 return true; 284 } 285 return moreRowsMayExistsAfter(rowComparator.compareRows(cell, stopRow, 0, stopRow.length)); 286 } 287 288 public static UserScanQueryMatcher create(Scan scan, ScanInfo scanInfo, 289 NavigableSet<byte[]> columns, long oldestUnexpiredTS, long now, 290 RegionCoprocessorHost regionCoprocessorHost) throws IOException { 291 boolean hasNullColumn = 292 !(columns != null && columns.size() != 0 && columns.first().length != 0); 293 Pair<DeleteTracker, ColumnTracker> trackers = 294 getTrackers(regionCoprocessorHost, columns, scanInfo, oldestUnexpiredTS, scan); 295 DeleteTracker deleteTracker = trackers.getFirst(); 296 ColumnTracker columnTracker = trackers.getSecond(); 297 if (scan.isRaw()) { 298 return RawScanQueryMatcher.create(scan, scanInfo, columnTracker, hasNullColumn, 299 oldestUnexpiredTS, now); 300 } else { 301 return NormalUserScanQueryMatcher.create(scan, scanInfo, columnTracker, deleteTracker, 302 hasNullColumn, oldestUnexpiredTS, now); 303 } 304 } 305}