001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase; 019 020import java.util.Comparator; 021 022import org.apache.hadoop.hbase.util.ByteBufferUtils; 023import org.apache.yetus.audience.InterfaceAudience; 024import org.slf4j.Logger; 025import org.slf4j.LoggerFactory; 026 027import org.apache.hbase.thirdparty.com.google.common.primitives.Longs; 028 029/** 030 * A comparator for case where {@link ByteBufferKeyValue} is prevalent type (BBKV 031 * is base-type in hbase2). Takes a general comparator as fallback in case types are NOT the 032 * expected ByteBufferKeyValue. 033 * 034 * <p>This is a tricked-out Comparator at heart of hbase read and write. It is in 035 * the HOT path so we try all sorts of ugly stuff so we can go faster. See below 036 * in this javadoc comment for the list. 037 * 038 * <p>Apply this comparator narrowly so it is fed exclusively ByteBufferKeyValues 039 * as much as is possible so JIT can settle (e.g. make one per ConcurrentSkipListMap 040 * in HStore). 041 * 042 * <p>Exploits specially added methods in BBKV to save on deserializations of shorts, 043 * longs, etc: i.e. calculating the family length requires row length; pass it in 044 * rather than recalculate it, and so on. 045 * 046 * <p>This comparator does static dispatch to private final methods so hotspot is comfortable 047 * deciding inline. 048 * 049 * <p>Measurement has it that we almost have it so all inlines from memstore 050 * ConcurrentSkipListMap on down to the (unsafe) intrinisics that do byte compare 051 * and deserialize shorts and ints; needs a bit more work. 052 * 053 * <p>Does not take a Type to compare: i.e. it is not a Comparator<Cell> or 054 * CellComparator<Cell> or Comparator<ByteBufferKeyValue> because that adds 055 * another method to the hierarchy -- from compare(Object, Object) 056 * to dynamic compare(Cell, Cell) to static private compare -- and inlining doesn't happen if 057 * hierarchy is too deep (it is the case here). 058 * 059 * <p>Be careful making changes. Compare perf before and after and look at what 060 * hotspot ends up generating before committing change (jitwatch is helpful here). 061 * Changing this one class doubled write throughput (HBASE-20483). 062 */ 063@InterfaceAudience.Private 064public class BBKVComparator implements Comparator { 065 protected static final Logger LOG = LoggerFactory.getLogger(BBKVComparator.class); 066 private final Comparator fallback; 067 068 public BBKVComparator(Comparator fallback) { 069 this.fallback = fallback; 070 } 071 072 @Override 073 public int compare(Object l, Object r) { 074 if ((l instanceof ByteBufferKeyValue) && (r instanceof ByteBufferKeyValue)) { 075 return compare((ByteBufferKeyValue)l, (ByteBufferKeyValue)r, false); 076 } 077 // Skip calling compare(Object, Object) and go direct to compare(Cell, Cell) 078 return this.fallback.compare((Cell)l, (Cell)r); 079 } 080 081 // TODO: Come back here. We get a few percentage points extra of throughput if this is a 082 // private method. 083 static int compare(ByteBufferKeyValue left, ByteBufferKeyValue right, 084 boolean ignoreSequenceid) { 085 // NOTE: Same method is in CellComparatorImpl, also private, not shared, intentionally. Not 086 // sharing gets us a few percent more throughput in compares. If changes here or there, make 087 // sure done in both places. 088 089 // Compare Rows. Cache row length. 090 int leftRowLength = left.getRowLength(); 091 int rightRowLength = right.getRowLength(); 092 int diff = ByteBufferUtils.compareTo(left.getRowByteBuffer(), left.getRowPosition(), 093 leftRowLength, 094 right.getRowByteBuffer(), right.getRowPosition(), rightRowLength); 095 if (diff != 0) { 096 return diff; 097 } 098 099 // If the column is not specified, the "minimum" key type appears as latest in the sorted 100 // order, regardless of the timestamp. This is used for specifying the last key/value in a 101 // given row, because there is no "lexicographically last column" (it would be infinitely long). 102 // The "maximum" key type does not need this behavior. Copied from KeyValue. This is bad in that 103 // we can't do memcmp w/ special rules like this. 104 // TODO: Is there a test for this behavior? 105 int leftFamilyLengthPosition = left.getFamilyLengthPosition(leftRowLength); 106 int leftFamilyLength = left.getFamilyLength(leftFamilyLengthPosition); 107 int leftKeyLength = left.getKeyLength(); 108 int leftQualifierLength = left.getQualifierLength(leftKeyLength, leftRowLength, 109 leftFamilyLength); 110 111 // No need of left row length below here. 112 113 byte leftType = left.getTypeByte(leftKeyLength); 114 if (leftFamilyLength + leftQualifierLength == 0 && 115 leftType == KeyValue.Type.Minimum.getCode()) { 116 // left is "bigger", i.e. it appears later in the sorted order 117 return 1; 118 } 119 120 int rightFamilyLengthPosition = right.getFamilyLengthPosition(rightRowLength); 121 int rightFamilyLength = right.getFamilyLength(rightFamilyLengthPosition); 122 int rightKeyLength = right.getKeyLength(); 123 int rightQualifierLength = right.getQualifierLength(rightKeyLength, rightRowLength, 124 rightFamilyLength); 125 126 // No need of right row length below here. 127 128 byte rightType = right.getTypeByte(rightKeyLength); 129 if (rightFamilyLength + rightQualifierLength == 0 && 130 rightType == KeyValue.Type.Minimum.getCode()) { 131 return -1; 132 } 133 134 // Compare families. 135 int leftFamilyPosition = left.getFamilyPosition(leftFamilyLengthPosition); 136 int rightFamilyPosition = right.getFamilyPosition(rightFamilyLengthPosition); 137 diff = ByteBufferUtils.compareTo(left.getFamilyByteBuffer(), leftFamilyPosition, 138 leftFamilyLength, 139 right.getFamilyByteBuffer(), rightFamilyPosition, rightFamilyLength); 140 if (diff != 0) { 141 return diff; 142 } 143 144 // Compare qualifiers 145 diff = ByteBufferUtils.compareTo(left.getQualifierByteBuffer(), 146 left.getQualifierPosition(leftFamilyPosition, leftFamilyLength), leftQualifierLength, 147 right.getQualifierByteBuffer(), 148 right.getQualifierPosition(rightFamilyPosition, rightFamilyLength), 149 rightQualifierLength); 150 if (diff != 0) { 151 return diff; 152 } 153 154 // Timestamps. 155 // Swap order we pass into compare so we get DESCENDING order. 156 diff = Long.compare(right.getTimestamp(rightKeyLength), left.getTimestamp(leftKeyLength)); 157 if (diff != 0) { 158 return diff; 159 } 160 161 // Compare types. Let the delete types sort ahead of puts; i.e. types 162 // of higher numbers sort before those of lesser numbers. Maximum (255) 163 // appears ahead of everything, and minimum (0) appears after 164 // everything. 165 diff = (0xff & rightType) - (0xff & leftType); 166 if (diff != 0) { 167 return diff; 168 } 169 170 // Negate following comparisons so later edits show up first mvccVersion: later sorts first 171 return ignoreSequenceid ? diff : Longs.compare(right.getSequenceId(), left.getSequenceId()); 172 } 173}