001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase;
019
020import java.util.Comparator;
021
022import org.apache.hadoop.hbase.util.ByteBufferUtils;
023import org.apache.hbase.thirdparty.com.google.common.primitives.Longs;
024import org.apache.yetus.audience.InterfaceAudience;
025import org.slf4j.Logger;
026import org.slf4j.LoggerFactory;
027
028
029/**
030 * A comparator for case where {@link ByteBufferKeyValue} is prevalent type (BBKV
031 * is base-type in hbase2). Takes a general comparator as fallback in case types are NOT the
032 * expected ByteBufferKeyValue.
033 *
034 * <p>This is a tricked-out Comparator at heart of hbase read and write. It is in
035 * the HOT path so we try all sorts of ugly stuff so we can go faster. See below
036 * in this javadoc comment for the list.
037 *
038 * <p>Apply this comparator narrowly so it is fed exclusively ByteBufferKeyValues
039 * as much as is possible so JIT can settle (e.g. make one per ConcurrentSkipListMap
040 * in HStore).
041 *
042 * <p>Exploits specially added methods in BBKV to save on deserializations of shorts,
043 * longs, etc: i.e. calculating the family length requires row length; pass it in
044 * rather than recalculate it, and so on.
045 *
046 * <p>This comparator does static dispatch to private final methods so hotspot is comfortable
047 * deciding inline.
048 *
049 * <p>Measurement has it that we almost have it so all inlines from memstore
050 * ConcurrentSkipListMap on down to the (unsafe) intrinisics that do byte compare
051 * and deserialize shorts and ints; needs a bit more work.
052 *
053 * <p>Does not take a Type to compare: i.e. it is not a Comparator&lt;Cell> or
054 * CellComparator&lt;Cell> or Comparator&lt;ByteBufferKeyValue> because that adds
055 * another method to the hierarchy -- from compare(Object, Object)
056 * to dynamic compare(Cell, Cell) to static private compare -- and inlining doesn't happen if
057 * hierarchy is too deep (it is the case here).
058 *
059 * <p>Be careful making changes. Compare perf before and after and look at what
060 * hotspot ends up generating before committing change (jitwatch is helpful here).
061 * Changing this one class doubled write throughput (HBASE-20483).
062 */
063@InterfaceAudience.Private
064public class BBKVComparator implements Comparator {
065  protected static final Logger LOG = LoggerFactory.getLogger(BBKVComparator.class);
066  private final Comparator fallback;
067
068  public BBKVComparator(Comparator fallback) {
069    this.fallback = fallback;
070  }
071
072  @Override
073  public int compare(Object l, Object r) {
074    // LOG.info("ltype={} rtype={}", l, r);
075    if ((l instanceof ByteBufferKeyValue) && (r instanceof ByteBufferKeyValue)) {
076      return compare((ByteBufferKeyValue)l, (ByteBufferKeyValue)r, false);
077    }
078    // Skip calling compare(Object, Object) and go direct to compare(Cell, Cell)
079    return this.fallback.compare((Cell)l, (Cell)r);
080  }
081
082  // TODO: Come back here. We get a few percentage points extra of throughput if this is a
083  // private method.
084  static final int compare(ByteBufferKeyValue left, ByteBufferKeyValue right,
085      boolean ignoreSequenceid) {
086    // NOTE: Same method is in CellComparatorImpl, also private, not shared, intentionally. Not
087    // sharing gets us a few percent more throughput in compares. If changes here or there, make
088    // sure done in both places.
089
090    // Compare Rows. Cache row length.
091    int leftRowLength = left.getRowLength();
092    int rightRowLength = right.getRowLength();
093    int diff = ByteBufferUtils.compareTo(left.getRowByteBuffer(), left.getRowPosition(),
094        leftRowLength,
095        right.getRowByteBuffer(), right.getRowPosition(), rightRowLength);
096    if (diff != 0) {
097      return diff;
098    }
099
100    // If the column is not specified, the "minimum" key type appears as latest in the sorted
101    // order, regardless of the timestamp. This is used for specifying the last key/value in a
102    // given row, because there is no "lexicographically last column" (it would be infinitely long).
103    // The "maximum" key type does not need this behavior. Copied from KeyValue. This is bad in that
104    // we can't do memcmp w/ special rules like this.
105    // TODO: Is there a test for this behavior?
106    int leftFamilyLengthPosition = left.getFamilyLengthPosition(leftRowLength);
107    int leftFamilyLength = left.getFamilyLength(leftFamilyLengthPosition);
108    int leftKeyLength = left.getKeyLength();
109    int leftQualifierLength = left.getQualifierLength(leftKeyLength, leftRowLength,
110        leftFamilyLength);
111
112    // No need of left row length below here.
113
114    byte leftType = left.getTypeByte(leftKeyLength);
115    if (leftFamilyLength + leftQualifierLength == 0 &&
116        leftType == KeyValue.Type.Minimum.getCode()) {
117      // left is "bigger", i.e. it appears later in the sorted order
118      return 1;
119    }
120
121    int rightFamilyLengthPosition = right.getFamilyLengthPosition(rightRowLength);
122    int rightFamilyLength = right.getFamilyLength(rightFamilyLengthPosition);
123    int rightKeyLength = right.getKeyLength();
124    int rightQualifierLength = right.getQualifierLength(rightKeyLength, rightRowLength,
125        rightFamilyLength);
126
127   // No need of right row length below here.
128
129    byte rightType = right.getTypeByte(rightKeyLength);
130    if (rightFamilyLength + rightQualifierLength == 0 &&
131        rightType == KeyValue.Type.Minimum.getCode()) {
132      return -1;
133    }
134
135    // Compare families.
136    int leftFamilyPosition = left.getFamilyPosition(leftFamilyLengthPosition);
137    int rightFamilyPosition = right.getFamilyPosition(rightFamilyLengthPosition);
138    diff = ByteBufferUtils.compareTo(left.getFamilyByteBuffer(), leftFamilyPosition,
139        leftFamilyLength,
140        right.getFamilyByteBuffer(), rightFamilyPosition, rightFamilyLength);
141    if (diff != 0) {
142      return diff;
143    }
144
145    // Compare qualifiers
146    diff = ByteBufferUtils.compareTo(left.getQualifierByteBuffer(),
147        left.getQualifierPosition(leftFamilyPosition, leftFamilyLength), leftQualifierLength,
148        right.getQualifierByteBuffer(),
149        right.getQualifierPosition(rightFamilyPosition, rightFamilyLength),
150        rightQualifierLength);
151    if (diff != 0) {
152      return diff;
153    }
154
155    // Timestamps.
156    // Swap order we pass into compare so we get DESCENDING order.
157    diff = Long.compare(right.getTimestamp(rightKeyLength), left.getTimestamp(leftKeyLength));
158    if (diff != 0) {
159      return diff;
160    }
161
162    // Compare types. Let the delete types sort ahead of puts; i.e. types
163    // of higher numbers sort before those of lesser numbers. Maximum (255)
164    // appears ahead of everything, and minimum (0) appears after
165    // everything.
166    diff = (0xff & rightType) - (0xff & leftType);
167    if (diff != 0) {
168      return diff;
169    }
170
171    // Negate following comparisons so later edits show up first mvccVersion: later sorts first
172    return ignoreSequenceid ? diff : Longs.compare(right.getSequenceId(), left.getSequenceId());
173  }
174}