001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase;
019
020import java.util.Comparator;
021
022import org.apache.hadoop.hbase.util.ByteBufferUtils;
023import org.apache.yetus.audience.InterfaceAudience;
024import org.slf4j.Logger;
025import org.slf4j.LoggerFactory;
026
027import org.apache.hbase.thirdparty.com.google.common.primitives.Longs;
028
029/**
030 * A comparator for case where {@link ByteBufferKeyValue} is prevalent type (BBKV
031 * is base-type in hbase2). Takes a general comparator as fallback in case types are NOT the
032 * expected ByteBufferKeyValue.
033 *
034 * <p>This is a tricked-out Comparator at heart of hbase read and write. It is in
035 * the HOT path so we try all sorts of ugly stuff so we can go faster. See below
036 * in this javadoc comment for the list.
037 *
038 * <p>Apply this comparator narrowly so it is fed exclusively ByteBufferKeyValues
039 * as much as is possible so JIT can settle (e.g. make one per ConcurrentSkipListMap
040 * in HStore).
041 *
042 * <p>Exploits specially added methods in BBKV to save on deserializations of shorts,
043 * longs, etc: i.e. calculating the family length requires row length; pass it in
044 * rather than recalculate it, and so on.
045 *
046 * <p>This comparator does static dispatch to private final methods so hotspot is comfortable
047 * deciding inline.
048 *
049 * <p>Measurement has it that we almost have it so all inlines from memstore
050 * ConcurrentSkipListMap on down to the (unsafe) intrinisics that do byte compare
051 * and deserialize shorts and ints; needs a bit more work.
052 *
053 * <p>Does not take a Type to compare: i.e. it is not a Comparator&lt;Cell> or
054 * CellComparator&lt;Cell> or Comparator&lt;ByteBufferKeyValue> because that adds
055 * another method to the hierarchy -- from compare(Object, Object)
056 * to dynamic compare(Cell, Cell) to static private compare -- and inlining doesn't happen if
057 * hierarchy is too deep (it is the case here).
058 *
059 * <p>Be careful making changes. Compare perf before and after and look at what
060 * hotspot ends up generating before committing change (jitwatch is helpful here).
061 * Changing this one class doubled write throughput (HBASE-20483).
062 */
063@InterfaceAudience.Private
064public class BBKVComparator implements Comparator {
065  protected static final Logger LOG = LoggerFactory.getLogger(BBKVComparator.class);
066  private final Comparator fallback;
067
068  public BBKVComparator(Comparator fallback) {
069    this.fallback = fallback;
070  }
071
072  @Override
073  public int compare(Object l, Object r) {
074    if ((l instanceof ByteBufferKeyValue) && (r instanceof ByteBufferKeyValue)) {
075      return compare((ByteBufferKeyValue)l, (ByteBufferKeyValue)r, false);
076    }
077    // Skip calling compare(Object, Object) and go direct to compare(Cell, Cell)
078    return this.fallback.compare((Cell)l, (Cell)r);
079  }
080
081  // TODO: Come back here. We get a few percentage points extra of throughput if this is a
082  // private method.
083  static int compare(ByteBufferKeyValue left, ByteBufferKeyValue right,
084      boolean ignoreSequenceid) {
085    // NOTE: Same method is in CellComparatorImpl, also private, not shared, intentionally. Not
086    // sharing gets us a few percent more throughput in compares. If changes here or there, make
087    // sure done in both places.
088
089    // Compare Rows. Cache row length.
090    int leftRowLength = left.getRowLength();
091    int rightRowLength = right.getRowLength();
092    int diff = ByteBufferUtils.compareTo(left.getRowByteBuffer(), left.getRowPosition(),
093        leftRowLength,
094        right.getRowByteBuffer(), right.getRowPosition(), rightRowLength);
095    if (diff != 0) {
096      return diff;
097    }
098
099    // If the column is not specified, the "minimum" key type appears as latest in the sorted
100    // order, regardless of the timestamp. This is used for specifying the last key/value in a
101    // given row, because there is no "lexicographically last column" (it would be infinitely long).
102    // The "maximum" key type does not need this behavior. Copied from KeyValue. This is bad in that
103    // we can't do memcmp w/ special rules like this.
104    // TODO: Is there a test for this behavior?
105    int leftFamilyLengthPosition = left.getFamilyLengthPosition(leftRowLength);
106    int leftFamilyLength = left.getFamilyLength(leftFamilyLengthPosition);
107    int leftKeyLength = left.getKeyLength();
108    int leftQualifierLength = left.getQualifierLength(leftKeyLength, leftRowLength,
109        leftFamilyLength);
110
111    // No need of left row length below here.
112
113    byte leftType = left.getTypeByte(leftKeyLength);
114    if (leftFamilyLength + leftQualifierLength == 0 &&
115        leftType == KeyValue.Type.Minimum.getCode()) {
116      // left is "bigger", i.e. it appears later in the sorted order
117      return 1;
118    }
119
120    int rightFamilyLengthPosition = right.getFamilyLengthPosition(rightRowLength);
121    int rightFamilyLength = right.getFamilyLength(rightFamilyLengthPosition);
122    int rightKeyLength = right.getKeyLength();
123    int rightQualifierLength = right.getQualifierLength(rightKeyLength, rightRowLength,
124        rightFamilyLength);
125
126   // No need of right row length below here.
127
128    byte rightType = right.getTypeByte(rightKeyLength);
129    if (rightFamilyLength + rightQualifierLength == 0 &&
130        rightType == KeyValue.Type.Minimum.getCode()) {
131      return -1;
132    }
133
134    // Compare families.
135    int leftFamilyPosition = left.getFamilyPosition(leftFamilyLengthPosition);
136    int rightFamilyPosition = right.getFamilyPosition(rightFamilyLengthPosition);
137    diff = ByteBufferUtils.compareTo(left.getFamilyByteBuffer(), leftFamilyPosition,
138        leftFamilyLength,
139        right.getFamilyByteBuffer(), rightFamilyPosition, rightFamilyLength);
140    if (diff != 0) {
141      return diff;
142    }
143
144    // Compare qualifiers
145    diff = ByteBufferUtils.compareTo(left.getQualifierByteBuffer(),
146        left.getQualifierPosition(leftFamilyPosition, leftFamilyLength), leftQualifierLength,
147        right.getQualifierByteBuffer(),
148        right.getQualifierPosition(rightFamilyPosition, rightFamilyLength),
149        rightQualifierLength);
150    if (diff != 0) {
151      return diff;
152    }
153
154    // Timestamps.
155    // Swap order we pass into compare so we get DESCENDING order.
156    diff = Long.compare(right.getTimestamp(rightKeyLength), left.getTimestamp(leftKeyLength));
157    if (diff != 0) {
158      return diff;
159    }
160
161    // Compare types. Let the delete types sort ahead of puts; i.e. types
162    // of higher numbers sort before those of lesser numbers. Maximum (255)
163    // appears ahead of everything, and minimum (0) appears after
164    // everything.
165    diff = (0xff & rightType) - (0xff & leftType);
166    if (diff != 0) {
167      return diff;
168    }
169
170    // Negate following comparisons so later edits show up first mvccVersion: later sorts first
171    return ignoreSequenceid ? diff : Longs.compare(right.getSequenceId(), left.getSequenceId());
172  }
173}