View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import java.util.Arrays;
22  
23  import org.apache.commons.logging.Log;
24  import org.apache.commons.logging.LogFactory;
25  import org.apache.hadoop.hbase.classification.InterfaceAudience;
26  import org.apache.hadoop.hbase.util.Bytes;
27  
28  /**
29   * A custom RegionSplitPolicy implementing a SplitPolicy that groups
30   * rows by a prefix of the row-key with a delimiter. Only the first delimiter
31   * for the row key will define the prefix of the row key that is used for grouping.
32   *
33   * This ensures that a region is not split "inside" a prefix of a row key.
34   * I.e. rows can be co-located in a region by their prefix.
35   *
36   * As an example, if you have row keys delimited with <code>_</code>, like
37   * <code>userid_eventtype_eventid</code>, and use prefix delimiter _, this split policy
38   * ensures that all rows starting with the same userid, belongs to the same region.
39   * @see KeyPrefixRegionSplitPolicy
40   */
41  @InterfaceAudience.Private
42  public class DelimitedKeyPrefixRegionSplitPolicy extends IncreasingToUpperBoundRegionSplitPolicy {
43  
44    private static final Log LOG = LogFactory
45        .getLog(DelimitedKeyPrefixRegionSplitPolicy.class);
46    public static final String DELIMITER_KEY = "DelimitedKeyPrefixRegionSplitPolicy.delimiter";
47  
48    private byte[] delimiter = null;
49  
50    @Override
51    protected void configureForRegion(HRegion region) {
52      super.configureForRegion(region);
53      // read the prefix length from the table descriptor
54      String delimiterString = region.getTableDesc().getValue(DELIMITER_KEY);
55      if (delimiterString == null || delimiterString.length() == 0) {
56        LOG.error(DELIMITER_KEY + " not specified for table " + region.getTableDesc().getTableName() +
57          ". Using default RegionSplitPolicy");
58        return;
59      }
60      delimiter = Bytes.toBytes(delimiterString);
61    }
62  
63    @Override
64    protected byte[] getSplitPoint() {
65      byte[] splitPoint = super.getSplitPoint();
66      if (splitPoint != null && delimiter != null) {
67  
68        //find the first occurrence of delimiter in split point
69        int index = com.google.common.primitives.Bytes.indexOf(splitPoint, delimiter);
70        if (index < 0) {
71          LOG.warn("Delimiter " + Bytes.toString(delimiter) + "  not found for split key "
72              + Bytes.toString(splitPoint));
73          return splitPoint;
74        }
75  
76        // group split keys by a prefix
77        return Arrays.copyOf(splitPoint, Math.min(index, splitPoint.length));
78      } else {
79        return splitPoint;
80      }
81    }
82  }