001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase; 019 020import java.util.Comparator; 021 022import org.apache.hadoop.hbase.util.ByteBufferUtils; 023import org.apache.hbase.thirdparty.com.google.common.primitives.Longs; 024import org.apache.yetus.audience.InterfaceAudience; 025import org.slf4j.Logger; 026import org.slf4j.LoggerFactory; 027 028 029/** 030 * A comparator for case where {@link ByteBufferKeyValue} is prevalent type (BBKV 031 * is base-type in hbase2). Takes a general comparator as fallback in case types are NOT the 032 * expected ByteBufferKeyValue. 033 * 034 * <p>This is a tricked-out Comparator at heart of hbase read and write. It is in 035 * the HOT path so we try all sorts of ugly stuff so we can go faster. See below 036 * in this javadoc comment for the list. 037 * 038 * <p>Apply this comparator narrowly so it is fed exclusively ByteBufferKeyValues 039 * as much as is possible so JIT can settle (e.g. make one per ConcurrentSkipListMap 040 * in HStore). 041 * 042 * <p>Exploits specially added methods in BBKV to save on deserializations of shorts, 043 * longs, etc: i.e. calculating the family length requires row length; pass it in 044 * rather than recalculate it, and so on. 045 * 046 * <p>This comparator does static dispatch to private final methods so hotspot is comfortable 047 * deciding inline. 048 * 049 * <p>Measurement has it that we almost have it so all inlines from memstore 050 * ConcurrentSkipListMap on down to the (unsafe) intrinisics that do byte compare 051 * and deserialize shorts and ints; needs a bit more work. 052 * 053 * <p>Does not take a Type to compare: i.e. it is not a Comparator<Cell> or 054 * CellComparator<Cell> or Comparator<ByteBufferKeyValue> because that adds 055 * another method to the hierarchy -- from compare(Object, Object) 056 * to dynamic compare(Cell, Cell) to static private compare -- and inlining doesn't happen if 057 * hierarchy is too deep (it is the case here). 058 * 059 * <p>Be careful making changes. Compare perf before and after and look at what 060 * hotspot ends up generating before committing change (jitwatch is helpful here). 061 * Changing this one class doubled write throughput (HBASE-20483). 062 */ 063@InterfaceAudience.Private 064public class BBKVComparator implements Comparator { 065 protected static final Logger LOG = LoggerFactory.getLogger(BBKVComparator.class); 066 private final Comparator fallback; 067 068 public BBKVComparator(Comparator fallback) { 069 this.fallback = fallback; 070 } 071 072 @Override 073 public int compare(Object l, Object r) { 074 // LOG.info("ltype={} rtype={}", l, r); 075 if ((l instanceof ByteBufferKeyValue) && (r instanceof ByteBufferKeyValue)) { 076 return compare((ByteBufferKeyValue)l, (ByteBufferKeyValue)r, false); 077 } 078 // Skip calling compare(Object, Object) and go direct to compare(Cell, Cell) 079 return this.fallback.compare((Cell)l, (Cell)r); 080 } 081 082 // TODO: Come back here. We get a few percentage points extra of throughput if this is a 083 // private method. 084 static final int compare(ByteBufferKeyValue left, ByteBufferKeyValue right, 085 boolean ignoreSequenceid) { 086 // NOTE: Same method is in CellComparatorImpl, also private, not shared, intentionally. Not 087 // sharing gets us a few percent more throughput in compares. If changes here or there, make 088 // sure done in both places. 089 090 // Compare Rows. Cache row length. 091 int leftRowLength = left.getRowLength(); 092 int rightRowLength = right.getRowLength(); 093 int diff = ByteBufferUtils.compareTo(left.getRowByteBuffer(), left.getRowPosition(), 094 leftRowLength, 095 right.getRowByteBuffer(), right.getRowPosition(), rightRowLength); 096 if (diff != 0) { 097 return diff; 098 } 099 100 // If the column is not specified, the "minimum" key type appears as latest in the sorted 101 // order, regardless of the timestamp. This is used for specifying the last key/value in a 102 // given row, because there is no "lexicographically last column" (it would be infinitely long). 103 // The "maximum" key type does not need this behavior. Copied from KeyValue. This is bad in that 104 // we can't do memcmp w/ special rules like this. 105 // TODO: Is there a test for this behavior? 106 int leftFamilyLengthPosition = left.getFamilyLengthPosition(leftRowLength); 107 int leftFamilyLength = left.getFamilyLength(leftFamilyLengthPosition); 108 int leftKeyLength = left.getKeyLength(); 109 int leftQualifierLength = left.getQualifierLength(leftKeyLength, leftRowLength, 110 leftFamilyLength); 111 112 // No need of left row length below here. 113 114 byte leftType = left.getTypeByte(leftKeyLength); 115 if (leftFamilyLength + leftQualifierLength == 0 && 116 leftType == KeyValue.Type.Minimum.getCode()) { 117 // left is "bigger", i.e. it appears later in the sorted order 118 return 1; 119 } 120 121 int rightFamilyLengthPosition = right.getFamilyLengthPosition(rightRowLength); 122 int rightFamilyLength = right.getFamilyLength(rightFamilyLengthPosition); 123 int rightKeyLength = right.getKeyLength(); 124 int rightQualifierLength = right.getQualifierLength(rightKeyLength, rightRowLength, 125 rightFamilyLength); 126 127 // No need of right row length below here. 128 129 byte rightType = right.getTypeByte(rightKeyLength); 130 if (rightFamilyLength + rightQualifierLength == 0 && 131 rightType == KeyValue.Type.Minimum.getCode()) { 132 return -1; 133 } 134 135 // Compare families. 136 int leftFamilyPosition = left.getFamilyPosition(leftFamilyLengthPosition); 137 int rightFamilyPosition = right.getFamilyPosition(rightFamilyLengthPosition); 138 diff = ByteBufferUtils.compareTo(left.getFamilyByteBuffer(), leftFamilyPosition, 139 leftFamilyLength, 140 right.getFamilyByteBuffer(), rightFamilyPosition, rightFamilyLength); 141 if (diff != 0) { 142 return diff; 143 } 144 145 // Compare qualifiers 146 diff = ByteBufferUtils.compareTo(left.getQualifierByteBuffer(), 147 left.getQualifierPosition(leftFamilyPosition, leftFamilyLength), leftQualifierLength, 148 right.getQualifierByteBuffer(), 149 right.getQualifierPosition(rightFamilyPosition, rightFamilyLength), 150 rightQualifierLength); 151 if (diff != 0) { 152 return diff; 153 } 154 155 // Timestamps. 156 // Swap order we pass into compare so we get DESCENDING order. 157 diff = Long.compare(right.getTimestamp(rightKeyLength), left.getTimestamp(leftKeyLength)); 158 if (diff != 0) { 159 return diff; 160 } 161 162 // Compare types. Let the delete types sort ahead of puts; i.e. types 163 // of higher numbers sort before those of lesser numbers. Maximum (255) 164 // appears ahead of everything, and minimum (0) appears after 165 // everything. 166 diff = (0xff & rightType) - (0xff & leftType); 167 if (diff != 0) { 168 return diff; 169 } 170 171 // Negate following comparisons so later edits show up first mvccVersion: later sorts first 172 return ignoreSequenceid ? diff : Longs.compare(right.getSequenceId(), left.getSequenceId()); 173 } 174}