View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import java.util.Arrays;
22  
23  import org.apache.commons.logging.Log;
24  import org.apache.commons.logging.LogFactory;
25  import org.apache.hadoop.hbase.classification.InterfaceAudience;
26  import org.apache.hadoop.hbase.util.Bytes;
27  
28  /**
29   * A custom RegionSplitPolicy implementing a SplitPolicy that groups
30   * rows by a prefix of the row-key with a delimiter. Only the first delimiter
31   * for the row key will define the prefix of the row key that is used for grouping.
32   *
33   * This ensures that a region is not split "inside" a prefix of a row key.
34   * I.e. rows can be co-located in a region by their prefix.
35   *
36   * As an example, if you have row keys delimited with <code>_</code>, like
37   * <code>userid_eventtype_eventid</code>, and use prefix delimiter _, this split policy
38   * ensures that all rows starting with the same userid, belongs to the same region.
39   * @see KeyPrefixRegionSplitPolicy
40   */
41  @InterfaceAudience.Private
42  public class DelimitedKeyPrefixRegionSplitPolicy extends IncreasingToUpperBoundRegionSplitPolicy {
43  
44    private static final Log LOG = LogFactory
45        .getLog(DelimitedKeyPrefixRegionSplitPolicy.class);
46    public static final String DELIMITER_KEY = "DelimitedKeyPrefixRegionSplitPolicy.delimiter";
47  
48    private byte[] delimiter = null;
49  
50    @Override
51    protected void configureForRegion(HRegion region) {
52      super.configureForRegion(region);
53      if (region != null) {
54  
55        // read the prefix length from the table descriptor
56        String delimiterString = region.getTableDesc().getValue(
57            DELIMITER_KEY);
58        if (delimiterString == null || delimiterString.length() == 0) {
59          LOG.error(DELIMITER_KEY + " not specified for table "
60              + region.getTableDesc().getTableName()
61              + ". Using default RegionSplitPolicy");
62          return;
63        }
64  
65        delimiter = Bytes.toBytes(delimiterString);
66      }
67    }
68  
69    @Override
70    protected byte[] getSplitPoint() {
71      byte[] splitPoint = super.getSplitPoint();
72      if (splitPoint != null && delimiter != null) {
73  
74        //find the first occurrence of delimiter in split point
75        int index = com.google.common.primitives.Bytes.indexOf(splitPoint, delimiter);
76        if (index < 0) {
77          LOG.warn("Delimiter " + Bytes.toString(delimiter) + "  not found for split key "
78              + Bytes.toString(splitPoint));
79          return splitPoint;
80        }
81  
82        // group split keys by a prefix
83        return Arrays.copyOf(splitPoint, Math.min(index, splitPoint.length));
84      } else {
85        return splitPoint;
86      }
87    }
88  }