View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase;
20  
21  import java.io.Serializable;
22  import java.util.Comparator;
23  
24  import org.apache.hadoop.hbase.KeyValue.Type;
25  import org.apache.hadoop.hbase.classification.InterfaceAudience;
26  import org.apache.hadoop.hbase.classification.InterfaceStability;
27  import org.apache.hadoop.hbase.util.Bytes;
28  
29  import com.google.common.primitives.Longs;
30  
31  /**
32   * Compare two HBase cells.  Do not use this method comparing <code>-ROOT-</code> or
33   * <code>hbase:meta</code> cells.  Cells from these tables need a specialized comparator, one that
34   * takes account of the special formatting of the row where we have commas to delimit table from
35   * regionname, from row.  See KeyValue for how it has a special comparator to do hbase:meta cells
36   * and yet another for -ROOT-.
37   */
38  @edu.umd.cs.findbugs.annotations.SuppressWarnings(
39      value="UNKNOWN",
40      justification="Findbugs doesn't like the way we are negating the result of a compare in below")
41  @InterfaceAudience.Private
42  @InterfaceStability.Evolving
43  public class CellComparator implements Comparator<Cell>, Serializable {
44    private static final long serialVersionUID = -8760041766259623329L;
45  
46    @Override
47    public int compare(Cell a, Cell b) {
48      return compare(a, b, false);
49    }
50  
51    /**
52     * Compare cells.
53     * TODO: Replace with dynamic rather than static comparator so can change comparator
54     * implementation.
55     * @param a
56     * @param b
57     * @param ignoreSequenceid True if we are to compare the key portion only and ignore
58     * the sequenceid. Set to false to compare key and consider sequenceid.
59     * @return 0 if equal, -1 if a &lt; b, and +1 if a &gt; b.
60     */
61    public static int compare(final Cell a, final Cell b, boolean ignoreSequenceid) {
62      // row
63      int c = compareRows(a, b);
64      if (c != 0) return c;
65  
66      c = compareWithoutRow(a, b);
67      if(c != 0) return c;
68  
69      if (!ignoreSequenceid) {
70        // Negate following comparisons so later edits show up first
71        // mvccVersion: later sorts first
72        return Longs.compare(b.getMvccVersion(), a.getMvccVersion());
73      } else {
74        return c;
75      }
76    }
77  
78    public static int findCommonPrefixInRowPart(Cell left, Cell right, int rowCommonPrefix) {
79      return findCommonPrefix(left.getRowArray(), right.getRowArray(), left.getRowLength()
80          - rowCommonPrefix, right.getRowLength() - rowCommonPrefix, left.getRowOffset()
81          + rowCommonPrefix, right.getRowOffset() + rowCommonPrefix);
82    }
83  
84    private static int findCommonPrefix(byte[] left, byte[] right, int leftLength, int rightLength,
85        int leftOffset, int rightOffset) {
86      int length = Math.min(leftLength, rightLength);
87      int result = 0;
88  
89      while (result < length && left[leftOffset + result] == right[rightOffset + result]) {
90        result++;
91      }
92      return result;
93    }
94  
95    public static int findCommonPrefixInFamilyPart(Cell left, Cell right, int familyCommonPrefix) {
96      return findCommonPrefix(left.getFamilyArray(), right.getFamilyArray(), left.getFamilyLength()
97          - familyCommonPrefix, right.getFamilyLength() - familyCommonPrefix, left.getFamilyOffset()
98          + familyCommonPrefix, right.getFamilyOffset() + familyCommonPrefix);
99    }
100 
101   public static int findCommonPrefixInQualifierPart(Cell left, Cell right,
102       int qualifierCommonPrefix) {
103     return findCommonPrefix(left.getQualifierArray(), right.getQualifierArray(),
104         left.getQualifierLength() - qualifierCommonPrefix, right.getQualifierLength()
105             - qualifierCommonPrefix, left.getQualifierOffset() + qualifierCommonPrefix,
106         right.getQualifierOffset() + qualifierCommonPrefix);
107   }
108 
109   /**************** equals ****************************/
110 
111   public static boolean equals(Cell a, Cell b){
112     return equalsRow(a, b)
113         && equalsFamily(a, b)
114         && equalsQualifier(a, b)
115         && equalsTimestamp(a, b)
116         && equalsType(a, b);
117   }
118 
119   public static boolean equalsRow(Cell a, Cell b){
120     return Bytes.equals(
121       a.getRowArray(), a.getRowOffset(), a.getRowLength(),
122       b.getRowArray(), b.getRowOffset(), b.getRowLength());
123   }
124 
125   public static boolean equalsFamily(Cell a, Cell b){
126     return Bytes.equals(
127       a.getFamilyArray(), a.getFamilyOffset(), a.getFamilyLength(),
128       b.getFamilyArray(), b.getFamilyOffset(), b.getFamilyLength());
129   }
130 
131   public static boolean equalsQualifier(Cell a, Cell b){
132     return Bytes.equals(
133       a.getQualifierArray(), a.getQualifierOffset(), a.getQualifierLength(),
134       b.getQualifierArray(), b.getQualifierOffset(), b.getQualifierLength());
135   }
136 
137   public static boolean equalsTimestamp(Cell a, Cell b){
138     return a.getTimestamp() == b.getTimestamp();
139   }
140 
141   public static boolean equalsType(Cell a, Cell b){
142     return a.getTypeByte() == b.getTypeByte();
143   }
144 
145   public static int compareColumns(final Cell left, final Cell right) {
146     int lfoffset = left.getFamilyOffset();
147     int rfoffset = right.getFamilyOffset();
148     int lclength = left.getQualifierLength();
149     int rclength = right.getQualifierLength();
150     int lfamilylength = left.getFamilyLength();
151     int rfamilylength = right.getFamilyLength();
152     int diff = compare(left.getFamilyArray(), lfoffset, lfamilylength, right.getFamilyArray(),
153         rfoffset, rfamilylength);
154     if (diff != 0) {
155       return diff;
156     } else {
157       return compare(left.getQualifierArray(), left.getQualifierOffset(), lclength,
158           right.getQualifierArray(), right.getQualifierOffset(), rclength);
159     }
160   }
161 
162   public static int compareFamilies(Cell left, Cell right) {
163     return Bytes.compareTo(left.getFamilyArray(), left.getFamilyOffset(), left.getFamilyLength(),
164         right.getFamilyArray(), right.getFamilyOffset(), right.getFamilyLength());
165   }
166 
167   public static int compareQualifiers(Cell left, Cell right) {
168     return Bytes.compareTo(left.getQualifierArray(), left.getQualifierOffset(),
169         left.getQualifierLength(), right.getQualifierArray(), right.getQualifierOffset(),
170         right.getQualifierLength());
171   }
172 
173   public int compareFlatKey(Cell left, Cell right) {
174     int compare = compareRows(left, right);
175     if (compare != 0) {
176       return compare;
177     }
178     return compareWithoutRow(left, right);
179   }
180 
181   /**
182    * Do not use comparing rows from hbase:meta. Meta table Cells have schema (table,startrow,hash)
183    * so can't be treated as plain byte arrays as this method does.
184    */
185   public static int compareRows(final Cell left, final Cell right) {
186     return Bytes.compareTo(left.getRowArray(), left.getRowOffset(), left.getRowLength(),
187         right.getRowArray(), right.getRowOffset(), right.getRowLength());
188   }
189 
190   /**
191    * Do not use comparing rows from hbase:meta. Meta table Cells have schema (table,startrow,hash)
192    * so can't be treated as plain byte arrays as this method does.
193    */
194   public static int compareRows(byte[] left, int loffset, int llength, byte[] right, int roffset,
195       int rlength) {
196     return Bytes.compareTo(left, loffset, llength, right, roffset, rlength);
197   }
198 
199   public static int compareWithoutRow(final Cell leftCell, final Cell rightCell) {
200     // If the column is not specified, the "minimum" key type appears the
201     // latest in the sorted order, regardless of the timestamp. This is used
202     // for specifying the last key/value in a given row, because there is no
203     // "lexicographically last column" (it would be infinitely long). The
204     // "maximum" key type does not need this behavior.
205     // Copied from KeyValue. This is bad in that we can't do memcmp w/ special rules like this.
206     // TODO
207     if (leftCell.getFamilyLength() + leftCell.getQualifierLength() == 0
208           && leftCell.getTypeByte() == Type.Minimum.getCode()) {
209       // left is "bigger", i.e. it appears later in the sorted order
210       return 1;
211     }
212     if (rightCell.getFamilyLength() + rightCell.getQualifierLength() == 0
213         && rightCell.getTypeByte() == Type.Minimum.getCode()) {
214       return -1;
215     }
216     boolean sameFamilySize = (leftCell.getFamilyLength() == rightCell.getFamilyLength());
217     if (!sameFamilySize) {
218       // comparing column family is enough.
219 
220       return Bytes.compareTo(leftCell.getFamilyArray(), leftCell.getFamilyOffset(),
221           leftCell.getFamilyLength(), rightCell.getFamilyArray(), rightCell.getFamilyOffset(),
222           rightCell.getFamilyLength());
223     }
224     int diff = compareColumns(leftCell, rightCell);
225     if (diff != 0) return diff;
226 
227     diff = compareTimestamps(leftCell, rightCell);
228     if (diff != 0) return diff;
229 
230     // Compare types. Let the delete types sort ahead of puts; i.e. types
231     // of higher numbers sort before those of lesser numbers. Maximum (255)
232     // appears ahead of everything, and minimum (0) appears after
233     // everything.
234     return (0xff & rightCell.getTypeByte()) - (0xff & leftCell.getTypeByte());
235   }
236 
237   /**
238    * Compares cell's timestamps in DESCENDING order.
239    * The below older timestamps sorting ahead of newer timestamps looks
240    * wrong but it is intentional. This way, newer timestamps are first
241    * found when we iterate over a memstore and newer versions are the
242    * first we trip over when reading from a store file.
243    * @return 1 if left's timestamp &lt; right's timestamp
244    *         -1 if left's timestamp &gt; right's timestamp
245    *         0 if both timestamps are equal
246    */
247   public static int compareTimestamps(final Cell left, final Cell right) {
248     return compareTimestamps(left.getTimestamp(), right.getTimestamp());
249   }
250 
251   /********************* hashCode ************************/
252 
253   /**
254    * Returns a hash code that is always the same for two Cells having a matching equals(..) result.
255    */
256   public static int hashCode(Cell cell){
257     if (cell == null) {// return 0 for empty Cell
258       return 0;
259     }
260 
261     int hash = calculateHashForKeyValue(cell);
262     hash = 31 * hash + (int)cell.getMvccVersion();
263     return hash;
264   }
265 
266   /**
267    * Returns a hash code that is always the same for two Cells having a matching
268    * equals(..) result. Note : Ignore mvcc while calculating the hashcode
269    *
270    * @param cell
271    * @return hashCode
272    */
273   public static int hashCodeIgnoreMvcc(Cell cell) {
274     if (cell == null) {// return 0 for empty Cell
275       return 0;
276     }
277 
278     int hash = calculateHashForKeyValue(cell);
279     return hash;
280   }
281 
282   private static int calculateHashForKeyValue(Cell cell) {
283     //pre-calculate the 3 hashes made of byte ranges
284     int rowHash = Bytes.hashCode(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength());
285     int familyHash =
286       Bytes.hashCode(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength());
287     int qualifierHash = Bytes.hashCode(cell.getQualifierArray(), cell.getQualifierOffset(),
288       cell.getQualifierLength());
289 
290     //combine the 6 sub-hashes
291     int hash = 31 * rowHash + familyHash;
292     hash = 31 * hash + qualifierHash;
293     hash = 31 * hash + (int)cell.getTimestamp();
294     hash = 31 * hash + cell.getTypeByte();
295     return hash;
296   }
297 
298 
299   /******************** lengths *************************/
300 
301   public static boolean areKeyLengthsEqual(Cell a, Cell b) {
302     return a.getRowLength() == b.getRowLength()
303         && a.getFamilyLength() == b.getFamilyLength()
304         && a.getQualifierLength() == b.getQualifierLength();
305   }
306 
307   public static boolean areRowLengthsEqual(Cell a, Cell b) {
308     return a.getRowLength() == b.getRowLength();
309   }
310 
311 
312   /*********************common prefixes*************************/
313 
314   private static int compare(byte[] left, int leftOffset, int leftLength, byte[] right,
315       int rightOffset, int rightLength) {
316     return Bytes.compareTo(left, leftOffset, leftLength, right, rightOffset, rightLength);
317   }
318 
319   public static int compareCommonRowPrefix(Cell left, Cell right, int rowCommonPrefix) {
320     return compare(left.getRowArray(), left.getRowOffset() + rowCommonPrefix, left.getRowLength()
321         - rowCommonPrefix, right.getRowArray(), right.getRowOffset() + rowCommonPrefix,
322         right.getRowLength() - rowCommonPrefix);
323   }
324 
325   public static int compareCommonFamilyPrefix(Cell left, Cell right,
326       int familyCommonPrefix) {
327     return compare(left.getFamilyArray(), left.getFamilyOffset() + familyCommonPrefix,
328         left.getFamilyLength() - familyCommonPrefix, right.getFamilyArray(),
329         right.getFamilyOffset() + familyCommonPrefix,
330         right.getFamilyLength() - familyCommonPrefix);
331   }
332 
333   public static int compareCommonQualifierPrefix(Cell left, Cell right,
334       int qualCommonPrefix) {
335     return compare(left.getQualifierArray(), left.getQualifierOffset() + qualCommonPrefix,
336         left.getQualifierLength() - qualCommonPrefix, right.getQualifierArray(),
337         right.getQualifierOffset() + qualCommonPrefix, right.getQualifierLength()
338             - qualCommonPrefix);
339   }
340 
341   /***************** special cases ****************************/
342   /**
343    * special case for KeyValue.equals
344    */
345   public static boolean equalsIgnoreMvccVersion(Cell a, Cell b){
346     return 0 == compareStaticIgnoreMvccVersion(a, b);
347   }
348 
349   private static int compareStaticIgnoreMvccVersion(Cell a, Cell b) {
350     // row
351     int c = compareRows(a, b);
352     if (c != 0) return c;
353 
354     // family
355     c = compareColumns(a, b);
356     if (c != 0) return c;
357 
358     // timestamp: later sorts first
359     c = compareTimestamps(a, b);
360     if (c != 0) return c;
361 
362     //type
363     c = (0xff & b.getTypeByte()) - (0xff & a.getTypeByte());
364     return c;
365   }
366 
367   /**
368    * Compares timestamps in DESCENDING order.
369    * The below older timestamps sorting ahead of newer timestamps looks
370    * wrong but it is intentional. This way, newer timestamps are first
371    * found when we iterate over a memstore and newer versions are the
372    * first we trip over when reading from a store file.
373    * @return 1 if left timestamp &lt; right timestamp
374    *         -1 if left timestamp &gt; right timestamp
375    *         0 if both timestamps are equal
376    */
377   private static int compareTimestamps(final long ltimestamp, final long rtimestamp) {
378     if (ltimestamp < rtimestamp) {
379       return 1;
380     } else if (ltimestamp > rtimestamp) {
381       return -1;
382     }
383     return 0;
384   }
385 
386   /**
387    * Counter part for the KeyValue.RowOnlyComparator
388    */
389   public static class RowComparator extends CellComparator {
390     @Override
391     public int compare(Cell a, Cell b) {
392       return compareRows(a, b);
393     }
394   }
395 
396   /**
397    * Try to return a Cell that falls between <code>left</code> and <code>right</code> but that is
398    * shorter; i.e. takes up less space. This trick is used building HFile block index.
399    * Its an optimization. It does not always work.  In this case we'll just return the
400    * <code>right</code> cell.
401    * @param comparator Comparator to use.
402    * @param left
403    * @param right
404    * @return A cell that sorts between <code>left</code> and <code>right</code>.
405    */
406   public static Cell getMidpoint(final KeyValue.KVComparator comparator, final Cell left,
407       final Cell right) {
408     // TODO: Redo so only a single pass over the arrays rather than one to compare and then a
409     // second composing midpoint.
410     if (right == null) {
411       throw new IllegalArgumentException("right cell can not be null");
412     }
413     if (left == null) {
414       return right;
415     }
416     // If Cells from meta table, don't mess around. meta table Cells have schema
417     // (table,startrow,hash) so can't be treated as plain byte arrays. Just skip out without
418     // trying to do this optimization.
419     if (comparator != null && comparator instanceof KeyValue.MetaComparator) {
420       return right;
421     }
422     int diff = compareRows(left, right);
423     if (diff > 0) {
424       throw new IllegalArgumentException("Left row sorts after right row; left=" +
425         CellUtil.getCellKeyAsString(left) + ", right=" + CellUtil.getCellKeyAsString(right));
426     }
427     if (diff < 0) {
428       // Left row is < right row.
429       byte [] midRow = getMinimumMidpointArray(left.getRowArray(), left.getRowOffset(),
430           left.getRowLength(),
431         right.getRowArray(), right.getRowOffset(), right.getRowLength());
432       // If midRow is null, just return 'right'.  Can't do optimization.
433       if (midRow == null) return right;
434       return CellUtil.createCell(midRow);
435     }
436     // Rows are same. Compare on families.
437     diff = compareFamilies(left, right);
438     if (diff > 0) {
439       throw new IllegalArgumentException("Left family sorts after right family; left=" +
440           CellUtil.getCellKeyAsString(left) + ", right=" + CellUtil.getCellKeyAsString(right));
441     }
442     if (diff < 0) {
443       byte [] midRow = getMinimumMidpointArray(left.getFamilyArray(), left.getFamilyOffset(),
444           left.getFamilyLength(),
445         right.getFamilyArray(), right.getFamilyOffset(), right.getFamilyLength());
446       // If midRow is null, just return 'right'.  Can't do optimization.
447       if (midRow == null) return right;
448       // Return new Cell where we use right row and then a mid sort family.
449       return CellUtil.createCell(right.getRowArray(), right.getRowOffset(), right.getRowLength(),
450         midRow, 0, midRow.length, HConstants.EMPTY_BYTE_ARRAY, 0,
451         HConstants.EMPTY_BYTE_ARRAY.length);
452     }
453     // Families are same. Compare on qualifiers.
454     diff = compareQualifiers(left, right);
455     if (diff > 0) {
456       throw new IllegalArgumentException("Left qualifier sorts after right qualifier; left=" +
457           CellUtil.getCellKeyAsString(left) + ", right=" + CellUtil.getCellKeyAsString(right));
458     }
459     if (diff < 0) {
460       byte [] midRow = getMinimumMidpointArray(left.getQualifierArray(), left.getQualifierOffset(),
461           left.getQualifierLength(),
462         right.getQualifierArray(), right.getQualifierOffset(), right.getQualifierLength());
463       // If midRow is null, just return 'right'.  Can't do optimization.
464       if (midRow == null) return right;
465       // Return new Cell where we use right row and family and then a mid sort qualifier.
466       return CellUtil.createCell(right.getRowArray(), right.getRowOffset(), right.getRowLength(),
467         right.getFamilyArray(), right.getFamilyOffset(), right.getFamilyLength(),
468         midRow, 0, midRow.length);
469     }
470     // No opportunity for optimization. Just return right key.
471     return right;
472   }
473 
474   /**
475    * @param leftArray
476    * @param leftOffset
477    * @param leftLength
478    * @param rightArray
479    * @param rightOffset
480    * @param rightLength
481    * @return Return a new array that is between left and right and minimally sized else just return
482    * null as indicator that we could not create a mid point.
483    */
484   private static byte [] getMinimumMidpointArray(final byte [] leftArray, final int leftOffset,
485         final int leftLength,
486       final byte [] rightArray, final int rightOffset, final int rightLength) {
487     // rows are different
488     int minLength = leftLength < rightLength ? leftLength : rightLength;
489     int diffIdx = 0;
490     while (diffIdx < minLength &&
491         leftArray[leftOffset + diffIdx] == rightArray[rightOffset + diffIdx]) {
492       diffIdx++;
493     }
494     byte [] minimumMidpointArray = null;
495     if (diffIdx >= minLength) {
496       // leftKey's row is prefix of rightKey's.
497       minimumMidpointArray = new byte[diffIdx + 1];
498       System.arraycopy(rightArray, rightOffset, minimumMidpointArray, 0, diffIdx + 1);
499     } else {
500       int diffByte = leftArray[leftOffset + diffIdx];
501       if ((0xff & diffByte) < 0xff && (diffByte + 1) < (rightArray[rightOffset + diffIdx] & 0xff)) {
502         minimumMidpointArray = new byte[diffIdx + 1];
503         System.arraycopy(leftArray, leftOffset, minimumMidpointArray, 0, diffIdx);
504         minimumMidpointArray[diffIdx] = (byte) (diffByte + 1);
505       } else {
506         minimumMidpointArray = new byte[diffIdx + 1];
507         System.arraycopy(rightArray, rightOffset, minimumMidpointArray, 0, diffIdx + 1);
508       }
509     }
510     return minimumMidpointArray;
511   }
512 }