001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.filter; 019 020import java.util.ArrayList; 021import java.util.Collections; 022import java.util.List; 023import java.util.Objects; 024 025import org.apache.hadoop.hbase.Cell; 026import org.apache.hadoop.hbase.CellUtil; 027import org.apache.hadoop.hbase.HConstants; 028import org.apache.hadoop.hbase.PrivateCellUtil; 029import org.apache.hadoop.hbase.client.ClientUtil; 030import org.apache.hadoop.hbase.exceptions.DeserializationException; 031import org.apache.hadoop.hbase.util.Bytes; 032import org.apache.yetus.audience.InterfaceAudience; 033import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException; 034import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations; 035import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos; 036 037/** 038 * Filter to support scan multiple row key ranges. It can construct the row key ranges from the 039 * passed list which can be accessed by each region server. 040 * 041 * HBase is quite efficient when scanning only one small row key range. If user needs to specify 042 * multiple row key ranges in one scan, the typical solutions are: 1. through FilterList which is a 043 * list of row key Filters, 2. using the SQL layer over HBase to join with two table, such as hive, 044 * phoenix etc. However, both solutions are inefficient. Both of them can't utilize the range info 045 * to perform fast forwarding during scan which is quite time consuming. If the number of ranges 046 * are quite big (e.g. millions), join is a proper solution though it is slow. However, there are 047 * cases that user wants to specify a small number of ranges to scan (e.g. <1000 ranges). Both 048 * solutions can't provide satisfactory performance in such case. MultiRowRangeFilter is to support 049 * such usec ase (scan multiple row key ranges), which can construct the row key ranges from user 050 * specified list and perform fast-forwarding during scan. Thus, the scan will be quite efficient. 051 */ 052@InterfaceAudience.Public 053public class MultiRowRangeFilter extends FilterBase { 054 055 private static final int ROW_BEFORE_FIRST_RANGE = -1; 056 057 private final List<RowRange> rangeList; 058 private final RangeIteration ranges; 059 060 private boolean done = false; 061 private int index; 062 private BasicRowRange range; 063 private ReturnCode currentReturnCode; 064 065 /** 066 * @param list A list of <code>RowRange</code> 067 */ 068 public MultiRowRangeFilter(List<RowRange> list) { 069 // We don't use rangeList anywhere else, but keeping it lets us pay a little 070 // memory to avoid touching the serialization logic. 071 this.rangeList = Collections.unmodifiableList(sortAndMerge(list)); 072 this.ranges = new RangeIteration(rangeList); 073 } 074 075 /** 076 * Constructor for creating a <code>MultiRowRangeFilter</code> from multiple rowkey prefixes. 077 * 078 * As <code>MultiRowRangeFilter</code> javadoc says (See the solution 1 of the first statement), 079 * if you try to create a filter list that scans row keys corresponding to given prefixes (e.g., 080 * <code>FilterList</code> composed of multiple <code>PrefixFilter</code>s), this constructor 081 * provides a way to avoid creating an inefficient one. 082 * 083 * @param rowKeyPrefixes the array of byte array 084 */ 085 public MultiRowRangeFilter(byte[][] rowKeyPrefixes) { 086 this(createRangeListFromRowKeyPrefixes(rowKeyPrefixes)); 087 } 088 089 private static List<RowRange> createRangeListFromRowKeyPrefixes(byte[][] rowKeyPrefixes) { 090 if (rowKeyPrefixes == null) { 091 throw new IllegalArgumentException("Invalid rowkey prefixes"); 092 } 093 094 List<RowRange> list = new ArrayList<>(); 095 for (byte[] rowKeyPrefix: rowKeyPrefixes) { 096 byte[] stopRow = ClientUtil.calculateTheClosestNextRowKeyForPrefix(rowKeyPrefix); 097 list.add(new RowRange(rowKeyPrefix, true, stopRow, false)); 098 } 099 return list; 100 } 101 102 public List<RowRange> getRowRanges() { 103 // Used by hbase-rest 104 return this.rangeList; 105 } 106 107 @Override 108 public boolean filterAllRemaining() { 109 return done; 110 } 111 112 @Override 113 public boolean filterRowKey(Cell firstRowCell) { 114 if (filterAllRemaining()) return true; 115 116 // N.b. We can only do this after we're iterating over records. If we try to do 117 // it before, the Scan (and this filter) may not yet be fully initialized. This is a 118 // wart on Filter and something that'd be nice to clean up (like CP's in HBase2.0) 119 if (!ranges.isInitialized()) { 120 ranges.initialize(isReversed()); 121 } 122 123 // If it is the first time of running, calculate the current range index for 124 // the row key. If index is out of bound which happens when the start row 125 // user sets is after the largest stop row of the ranges, stop the scan. 126 // If row key is after the current range, find the next range and update index. 127 byte[] rowArr = firstRowCell.getRowArray(); 128 int length = firstRowCell.getRowLength(); 129 int offset = firstRowCell.getRowOffset(); 130 if (!ranges.hasFoundFirstRange() || !range.contains(rowArr, offset, length)) { 131 byte[] rowkey = CellUtil.cloneRow(firstRowCell); 132 index = ranges.getNextRangeIndex(rowkey); 133 if (ranges.isIterationComplete(index)) { 134 done = true; 135 currentReturnCode = ReturnCode.NEXT_ROW; 136 return false; 137 } 138 if(index != ROW_BEFORE_FIRST_RANGE) { 139 range = ranges.get(index); 140 } else { 141 range = ranges.get(0); 142 } 143 if (ranges.isExclusive()) { 144 ranges.resetExclusive(); 145 currentReturnCode = ReturnCode.NEXT_ROW; 146 return false; 147 } 148 if (!ranges.hasFoundFirstRange()) { 149 if(index != ROW_BEFORE_FIRST_RANGE) { 150 currentReturnCode = ReturnCode.INCLUDE; 151 } else { 152 currentReturnCode = ReturnCode.SEEK_NEXT_USING_HINT; 153 } 154 ranges.setFoundFirstRange(); 155 } else { 156 if (range.contains(rowArr, offset, length)) { 157 currentReturnCode = ReturnCode.INCLUDE; 158 } else { 159 currentReturnCode = ReturnCode.SEEK_NEXT_USING_HINT; 160 } 161 } 162 } else { 163 currentReturnCode = ReturnCode.INCLUDE; 164 } 165 return false; 166 } 167 168 @Override 169 public ReturnCode filterCell(final Cell ignored) { 170 return currentReturnCode; 171 } 172 173 @Override 174 public Cell getNextCellHint(Cell currentKV) { 175 // skip to the next range's start row 176 // #getComparisonData lets us avoid the `if (reversed)` branch 177 byte[] comparisonData = range.getComparisonData(); 178 return PrivateCellUtil.createFirstOnRow(comparisonData, 0, (short) comparisonData.length); 179 } 180 181 /** 182 * @return The filter serialized using pb 183 */ 184 @Override 185 public byte[] toByteArray() { 186 FilterProtos.MultiRowRangeFilter.Builder builder = FilterProtos.MultiRowRangeFilter 187 .newBuilder(); 188 for (RowRange range : rangeList) { 189 if (range != null) { 190 FilterProtos.RowRange.Builder rangebuilder = FilterProtos.RowRange.newBuilder(); 191 if (range.startRow != null) 192 rangebuilder.setStartRow(UnsafeByteOperations.unsafeWrap(range.startRow)); 193 rangebuilder.setStartRowInclusive(range.startRowInclusive); 194 if (range.stopRow != null) 195 rangebuilder.setStopRow(UnsafeByteOperations.unsafeWrap(range.stopRow)); 196 rangebuilder.setStopRowInclusive(range.stopRowInclusive); 197 builder.addRowRangeList(rangebuilder.build()); 198 } 199 } 200 return builder.build().toByteArray(); 201 } 202 203 /** 204 * @param pbBytes A pb serialized instance 205 * @return An instance of MultiRowRangeFilter 206 * @throws org.apache.hadoop.hbase.exceptions.DeserializationException 207 */ 208 public static MultiRowRangeFilter parseFrom(final byte[] pbBytes) 209 throws DeserializationException { 210 FilterProtos.MultiRowRangeFilter proto; 211 try { 212 proto = FilterProtos.MultiRowRangeFilter.parseFrom(pbBytes); 213 } catch (InvalidProtocolBufferException e) { 214 throw new DeserializationException(e); 215 } 216 int length = proto.getRowRangeListCount(); 217 List<FilterProtos.RowRange> rangeProtos = proto.getRowRangeListList(); 218 List<RowRange> rangeList = new ArrayList<>(length); 219 for (FilterProtos.RowRange rangeProto : rangeProtos) { 220 RowRange range = new RowRange(rangeProto.hasStartRow() ? rangeProto.getStartRow() 221 .toByteArray() : null, rangeProto.getStartRowInclusive(), rangeProto.hasStopRow() ? 222 rangeProto.getStopRow().toByteArray() : null, rangeProto.getStopRowInclusive()); 223 rangeList.add(range); 224 } 225 return new MultiRowRangeFilter(rangeList); 226 } 227 228 /** 229 * @param o the filter to compare 230 * @return true if and only if the fields of the filter that are serialized are equal to the 231 * corresponding fields in other. Used for testing. 232 */ 233 @Override 234 boolean areSerializedFieldsEqual(Filter o) { 235 if (o == this) 236 return true; 237 if (!(o instanceof MultiRowRangeFilter)) 238 return false; 239 240 MultiRowRangeFilter other = (MultiRowRangeFilter) o; 241 if (this.rangeList.size() != other.rangeList.size()) 242 return false; 243 for (int i = 0; i < rangeList.size(); ++i) { 244 RowRange thisRange = this.rangeList.get(i); 245 RowRange otherRange = other.rangeList.get(i); 246 if (!(Bytes.equals(thisRange.startRow, otherRange.startRow) && Bytes.equals( 247 thisRange.stopRow, otherRange.stopRow) && (thisRange.startRowInclusive == 248 otherRange.startRowInclusive) && (thisRange.stopRowInclusive == 249 otherRange.stopRowInclusive))) { 250 return false; 251 } 252 } 253 return true; 254 } 255 256 /** 257 * sort the ranges and if the ranges with overlap, then merge them. 258 * 259 * @param ranges the list of ranges to sort and merge. 260 * @return the ranges after sort and merge. 261 */ 262 public static List<RowRange> sortAndMerge(List<RowRange> ranges) { 263 if (ranges.isEmpty()) { 264 throw new IllegalArgumentException("No ranges found."); 265 } 266 List<RowRange> invalidRanges = new ArrayList<>(); 267 List<RowRange> newRanges = new ArrayList<>(ranges.size()); 268 Collections.sort(ranges); 269 if(ranges.get(0).isValid()) { 270 if (ranges.size() == 1) { 271 newRanges.add(ranges.get(0)); 272 } 273 } else { 274 invalidRanges.add(ranges.get(0)); 275 } 276 277 byte[] lastStartRow = ranges.get(0).startRow; 278 boolean lastStartRowInclusive = ranges.get(0).startRowInclusive; 279 byte[] lastStopRow = ranges.get(0).stopRow; 280 boolean lastStopRowInclusive = ranges.get(0).stopRowInclusive; 281 int i = 1; 282 for (; i < ranges.size(); i++) { 283 RowRange range = ranges.get(i); 284 if (!range.isValid()) { 285 invalidRanges.add(range); 286 } 287 if(Bytes.equals(lastStopRow, HConstants.EMPTY_BYTE_ARRAY)) { 288 newRanges.add(new RowRange(lastStartRow, lastStartRowInclusive, lastStopRow, 289 lastStopRowInclusive)); 290 break; 291 } 292 // with overlap in the ranges 293 if ((Bytes.compareTo(lastStopRow, range.startRow) > 0) || 294 (Bytes.compareTo(lastStopRow, range.startRow) == 0 && !(lastStopRowInclusive == false && 295 range.isStartRowInclusive() == false))) { 296 if(Bytes.equals(range.stopRow, HConstants.EMPTY_BYTE_ARRAY)) { 297 newRanges.add(new RowRange(lastStartRow, lastStartRowInclusive, range.stopRow, 298 range.stopRowInclusive)); 299 break; 300 } 301 // if first range contains second range, ignore the second range 302 if (Bytes.compareTo(lastStopRow, range.stopRow) >= 0) { 303 if((Bytes.compareTo(lastStopRow, range.stopRow) == 0)) { 304 if(lastStopRowInclusive == true || range.stopRowInclusive == true) { 305 lastStopRowInclusive = true; 306 } 307 } 308 if ((i + 1) == ranges.size()) { 309 newRanges.add(new RowRange(lastStartRow, lastStartRowInclusive, lastStopRow, 310 lastStopRowInclusive)); 311 } 312 } else { 313 lastStopRow = range.stopRow; 314 lastStopRowInclusive = range.stopRowInclusive; 315 if ((i + 1) < ranges.size()) { 316 i++; 317 range = ranges.get(i); 318 if (!range.isValid()) { 319 invalidRanges.add(range); 320 } 321 } else { 322 newRanges.add(new RowRange(lastStartRow, lastStartRowInclusive, lastStopRow, 323 lastStopRowInclusive)); 324 break; 325 } 326 while ((Bytes.compareTo(lastStopRow, range.startRow) > 0) || 327 (Bytes.compareTo(lastStopRow, range.startRow) == 0 && 328 (lastStopRowInclusive == true || range.startRowInclusive==true))) { 329 if(Bytes.equals(range.stopRow, HConstants.EMPTY_BYTE_ARRAY)) { 330 break; 331 } 332 // if this first range contain second range, ignore the second range 333 if (Bytes.compareTo(lastStopRow, range.stopRow) >= 0) { 334 if(lastStopRowInclusive == true || range.stopRowInclusive == true) { 335 lastStopRowInclusive = true; 336 } 337 i++; 338 if (i < ranges.size()) { 339 range = ranges.get(i); 340 if (!range.isValid()) { 341 invalidRanges.add(range); 342 } 343 } else { 344 break; 345 } 346 } else { 347 lastStopRow = range.stopRow; 348 lastStopRowInclusive = range.stopRowInclusive; 349 i++; 350 if (i < ranges.size()) { 351 range = ranges.get(i); 352 if (!range.isValid()) { 353 invalidRanges.add(range); 354 } 355 } else { 356 break; 357 } 358 } 359 } 360 if(Bytes.equals(range.stopRow, HConstants.EMPTY_BYTE_ARRAY)) { 361 if((Bytes.compareTo(lastStopRow, range.startRow) < 0) || 362 (Bytes.compareTo(lastStopRow, range.startRow) == 0 && 363 lastStopRowInclusive == false && range.startRowInclusive == false)) { 364 newRanges.add(new RowRange(lastStartRow, lastStartRowInclusive, lastStopRow, 365 lastStopRowInclusive)); 366 newRanges.add(range); 367 } else { 368 newRanges.add(new RowRange(lastStartRow, lastStartRowInclusive, range.stopRow, 369 range.stopRowInclusive)); 370 break; 371 } 372 } 373 newRanges.add(new RowRange(lastStartRow, lastStartRowInclusive, lastStopRow, 374 lastStopRowInclusive)); 375 if ((i + 1) == ranges.size()) { 376 newRanges.add(range); 377 } 378 lastStartRow = range.startRow; 379 lastStartRowInclusive = range.startRowInclusive; 380 lastStopRow = range.stopRow; 381 lastStopRowInclusive = range.stopRowInclusive; 382 } 383 } else { 384 newRanges.add(new RowRange(lastStartRow, lastStartRowInclusive, lastStopRow, 385 lastStopRowInclusive)); 386 if ((i + 1) == ranges.size()) { 387 newRanges.add(range); 388 } 389 lastStartRow = range.startRow; 390 lastStartRowInclusive = range.startRowInclusive; 391 lastStopRow = range.stopRow; 392 lastStopRowInclusive = range.stopRowInclusive; 393 } 394 } 395 // check the remaining ranges 396 for(int j=i; j < ranges.size(); j++) { 397 if(!ranges.get(j).isValid()) { 398 invalidRanges.add(ranges.get(j)); 399 } 400 } 401 // if invalid range exists, throw the exception 402 if (invalidRanges.size() != 0) { 403 throwExceptionForInvalidRanges(invalidRanges, true); 404 } 405 // If no valid ranges found, throw the exception 406 if(newRanges.isEmpty()) { 407 throw new IllegalArgumentException("No valid ranges found."); 408 } 409 return newRanges; 410 } 411 412 private static void throwExceptionForInvalidRanges(List<RowRange> invalidRanges, 413 boolean details) { 414 StringBuilder sb = new StringBuilder(); 415 sb.append(invalidRanges.size()).append(" invaild ranges.\n"); 416 if (details) { 417 for (RowRange range : invalidRanges) { 418 sb.append( 419 "Invalid range: start row => " + Bytes.toString(range.startRow) + ", stop row => " 420 + Bytes.toString(range.stopRow)).append('\n'); 421 } 422 } 423 throw new IllegalArgumentException(sb.toString()); 424 } 425 426 private static abstract class BasicRowRange implements Comparable<BasicRowRange> { 427 protected byte[] startRow; 428 protected boolean startRowInclusive = true; 429 protected byte[] stopRow; 430 protected boolean stopRowInclusive = false; 431 432 public BasicRowRange() { 433 } 434 /** 435 * If the startRow is empty or null, set it to HConstants.EMPTY_BYTE_ARRAY, means begin at the 436 * start row of the table. If the stopRow is empty or null, set it to 437 * HConstants.EMPTY_BYTE_ARRAY, means end of the last row of table. 438 */ 439 public BasicRowRange(String startRow, boolean startRowInclusive, String stopRow, 440 boolean stopRowInclusive) { 441 this((startRow == null || startRow.isEmpty()) ? HConstants.EMPTY_BYTE_ARRAY : 442 Bytes.toBytes(startRow), startRowInclusive, 443 (stopRow == null || stopRow.isEmpty()) ? HConstants.EMPTY_BYTE_ARRAY : 444 Bytes.toBytes(stopRow), stopRowInclusive); 445 } 446 447 public BasicRowRange(byte[] startRow, boolean startRowInclusive, byte[] stopRow, 448 boolean stopRowInclusive) { 449 this.startRow = (startRow == null) ? HConstants.EMPTY_BYTE_ARRAY : startRow; 450 this.startRowInclusive = startRowInclusive; 451 this.stopRow = (stopRow == null) ? HConstants.EMPTY_BYTE_ARRAY :stopRow; 452 this.stopRowInclusive = stopRowInclusive; 453 } 454 455 public byte[] getStartRow() { 456 return startRow; 457 } 458 459 public byte[] getStopRow() { 460 return stopRow; 461 } 462 463 /** 464 * @return if start row is inclusive. 465 */ 466 public boolean isStartRowInclusive() { 467 return startRowInclusive; 468 } 469 470 /** 471 * @return if stop row is inclusive. 472 */ 473 public boolean isStopRowInclusive() { 474 return stopRowInclusive; 475 } 476 477 public boolean contains(byte[] row) { 478 return contains(row, 0, row.length); 479 } 480 481 public boolean contains(byte[] buffer, int offset, int length) { 482 if(startRowInclusive) { 483 if(stopRowInclusive) { 484 return Bytes.compareTo(buffer, offset, length, startRow, 0, startRow.length) >= 0 485 && (Bytes.equals(stopRow, HConstants.EMPTY_BYTE_ARRAY) || 486 Bytes.compareTo(buffer, offset, length, stopRow, 0, stopRow.length) <= 0); 487 } else { 488 return Bytes.compareTo(buffer, offset, length, startRow, 0, startRow.length) >= 0 489 && (Bytes.equals(stopRow, HConstants.EMPTY_BYTE_ARRAY) || 490 Bytes.compareTo(buffer, offset, length, stopRow, 0, stopRow.length) < 0); 491 } 492 } else { 493 if(stopRowInclusive) { 494 return Bytes.compareTo(buffer, offset, length, startRow, 0, startRow.length) > 0 495 && (Bytes.equals(stopRow, HConstants.EMPTY_BYTE_ARRAY) || 496 Bytes.compareTo(buffer, offset, length, stopRow, 0, stopRow.length) <= 0); 497 } else { 498 return Bytes.compareTo(buffer, offset, length, startRow, 0, startRow.length) > 0 499 && (Bytes.equals(stopRow, HConstants.EMPTY_BYTE_ARRAY) || 500 Bytes.compareTo(buffer, offset, length, stopRow, 0, stopRow.length) < 0); 501 } 502 } 503 } 504 505 public boolean isValid() { 506 return Bytes.equals(startRow, HConstants.EMPTY_BYTE_ARRAY) 507 || Bytes.equals(stopRow, HConstants.EMPTY_BYTE_ARRAY) 508 || Bytes.compareTo(startRow, stopRow) < 0 509 || (Bytes.compareTo(startRow, stopRow) == 0 && stopRowInclusive == true); 510 } 511 512 @Override 513 public boolean equals(Object obj){ 514 if (!(obj instanceof BasicRowRange)) { 515 return false; 516 } 517 if (this == obj) { 518 return true; 519 } 520 BasicRowRange rr = (BasicRowRange) obj; 521 return Bytes.equals(this.stopRow, rr.getStopRow()) && 522 Bytes.equals(this.startRow, this.getStartRow()) && 523 this.startRowInclusive == rr.isStartRowInclusive() && 524 this.stopRowInclusive == rr.isStopRowInclusive(); 525 } 526 527 @Override 528 public int hashCode() { 529 return Objects.hash(Bytes.hashCode(this.stopRow), 530 Bytes.hashCode(this.startRow), 531 this.startRowInclusive, 532 this.stopRowInclusive); 533 } 534 535 /** 536 * Returns the data to be used to compare {@code this} to another object. 537 */ 538 public abstract byte[] getComparisonData(); 539 540 /** 541 * Returns whether the bounding row used for binary-search is inclusive or not. 542 * 543 * For forward scans, we would check the starRow, but we would check the stopRow for 544 * the reverse scan case. 545 */ 546 public abstract boolean isSearchRowInclusive(); 547 548 @Override 549 public int compareTo(BasicRowRange other) { 550 byte[] left; 551 byte[] right; 552 if (isAscendingOrder()) { 553 left = this.getComparisonData(); 554 right = other.getComparisonData(); 555 } else { 556 left = other.getComparisonData(); 557 right = this.getComparisonData(); 558 } 559 return Bytes.compareTo(left, right); 560 } 561 562 public abstract boolean isAscendingOrder(); 563 } 564 565 /** 566 * Internal RowRange that reverses the sort-order to handle reverse scans. 567 */ 568 @InterfaceAudience.Private 569 private static class ReversedRowRange extends BasicRowRange { 570 public ReversedRowRange(byte[] startRow, boolean startRowInclusive, byte[] stopRow, 571 boolean stopRowInclusive) { 572 super(startRow, startRowInclusive, stopRow, stopRowInclusive); 573 } 574 575 @Override 576 public byte[] getComparisonData() { 577 return this.stopRow; 578 } 579 580 @Override 581 public boolean isSearchRowInclusive() { 582 return this.stopRowInclusive; 583 } 584 585 @Override 586 public boolean isAscendingOrder() { 587 return false; 588 } 589 } 590 591 @InterfaceAudience.Public 592 public static class RowRange extends BasicRowRange { 593 public RowRange() { 594 } 595 /** 596 * If the startRow is empty or null, set it to HConstants.EMPTY_BYTE_ARRAY, means begin at the 597 * start row of the table. If the stopRow is empty or null, set it to 598 * HConstants.EMPTY_BYTE_ARRAY, means end of the last row of table. 599 */ 600 public RowRange(String startRow, boolean startRowInclusive, String stopRow, 601 boolean stopRowInclusive) { 602 super(startRow, startRowInclusive, stopRow, stopRowInclusive); 603 } 604 605 public RowRange(byte[] startRow, boolean startRowInclusive, byte[] stopRow, 606 boolean stopRowInclusive) { 607 super(startRow, startRowInclusive, stopRow, stopRowInclusive); 608 } 609 610 @Override 611 public byte[] getComparisonData() { 612 return startRow; 613 } 614 615 @Override 616 public boolean isSearchRowInclusive() { 617 return startRowInclusive; 618 } 619 620 @Override 621 public boolean isAscendingOrder() { 622 return true; 623 } 624 } 625 626 /** 627 * Abstraction over the ranges of rows to return from this filter, regardless of forward or 628 * reverse scans being used. This Filter can use this class, agnostic of iteration direction, 629 * as the same algorithm can be applied in both cases. 630 */ 631 @InterfaceAudience.Private 632 private static class RangeIteration { 633 private boolean exclusive = false; 634 private boolean initialized = false; 635 private boolean foundFirstRange = false; 636 private boolean reversed = false; 637 private final List<RowRange> sortedAndMergedRanges; 638 private List<? extends BasicRowRange> ranges; 639 640 public RangeIteration(List<RowRange> sortedAndMergedRanges) { 641 this.sortedAndMergedRanges = sortedAndMergedRanges; 642 } 643 644 void initialize(boolean reversed) { 645 // Avoid double initialization 646 assert !this.initialized; 647 this.reversed = reversed; 648 if (reversed) { 649 // If we are doing a reverse scan, we can reverse the ranges (both the elements in 650 // the list as well as their start/stop key), and use the same algorithm. 651 this.ranges = flipAndReverseRanges(sortedAndMergedRanges); 652 } else { 653 this.ranges = sortedAndMergedRanges; 654 } 655 this.initialized = true; 656 } 657 658 /** 659 * Rebuilds the sorted ranges (by startKey) into an equivalent sorted list of ranges, only by 660 * stopKey instead. Descending order and the ReversedRowRange compareTo implementation make 661 * sure that we can use Collections.binarySearch(). 662 */ 663 static List<ReversedRowRange> flipAndReverseRanges(List<RowRange> ranges) { 664 List<ReversedRowRange> flippedRanges = new ArrayList<>(ranges.size()); 665 for (int i = ranges.size() - 1; i >= 0; i--) { 666 RowRange origRange = ranges.get(i); 667 ReversedRowRange newRowRange = new ReversedRowRange( 668 origRange.startRow, origRange.startRowInclusive, origRange.stopRow, 669 origRange.isStopRowInclusive()); 670 flippedRanges.add(newRowRange); 671 } 672 return flippedRanges; 673 } 674 675 /** 676 * Calculates the position where the given rowkey fits in the ranges list. 677 * 678 * @param rowKey the row key to calculate 679 * @return index the position of the row key 680 */ 681 public int getNextRangeIndex(byte[] rowKey) { 682 BasicRowRange temp; 683 if (reversed) { 684 temp = new ReversedRowRange(null, true, rowKey, true); 685 } else { 686 temp = new RowRange(rowKey, true, null, true); 687 } 688 // Because we make sure that `ranges` has the correct natural ordering (given it containing 689 // RowRange or ReverseRowRange objects). This keeps us from having to have two different 690 // implementations below. 691 final int index = Collections.binarySearch(ranges, temp); 692 if (index < 0) { 693 int insertionPosition = -index - 1; 694 // check if the row key in the range before the insertion position 695 if (insertionPosition != 0 && ranges.get(insertionPosition - 1).contains(rowKey)) { 696 return insertionPosition - 1; 697 } 698 // check if the row key is before the first range 699 if (insertionPosition == 0 && !ranges.get(insertionPosition).contains(rowKey)) { 700 return ROW_BEFORE_FIRST_RANGE; 701 } 702 if (!foundFirstRange) { 703 foundFirstRange = true; 704 } 705 return insertionPosition; 706 } 707 // the row key equals one of the start keys, and the the range exclude the start key 708 if(ranges.get(index).isSearchRowInclusive() == false) { 709 exclusive = true; 710 } 711 return index; 712 } 713 714 /** 715 * Sets {@link #foundFirstRange} to {@code true}, indicating that we found a matching row range. 716 */ 717 public void setFoundFirstRange() { 718 this.foundFirstRange = true; 719 } 720 721 /** 722 * Gets the RowRange at the given offset. 723 */ 724 @SuppressWarnings("unchecked") 725 public <T extends BasicRowRange> T get(int i) { 726 return (T) ranges.get(i); 727 } 728 729 /** 730 * Returns true if the first matching row range was found. 731 */ 732 public boolean hasFoundFirstRange() { 733 return foundFirstRange; 734 } 735 736 /** 737 * Returns true if the current range's key is exclusive 738 */ 739 public boolean isExclusive() { 740 return exclusive; 741 } 742 743 /** 744 * Resets the exclusive flag. 745 */ 746 public void resetExclusive() { 747 exclusive = false; 748 } 749 750 /** 751 * Returns true if this class has been initialized by calling {@link #initialize(boolean)}. 752 */ 753 public boolean isInitialized() { 754 return initialized; 755 } 756 757 /** 758 * Returns true if we exhausted searching all row ranges. 759 */ 760 public boolean isIterationComplete(int index) { 761 return index >= ranges.size(); 762 } 763 } 764 765 @Override 766 public boolean equals(Object obj) { 767 return obj instanceof Filter && areSerializedFieldsEqual((Filter) obj); 768 } 769 770 @Override 771 public int hashCode() { 772 return Objects.hash(this.ranges); 773 } 774}