001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.hbase.regionserver;
020
021import java.util.Arrays;
022
023import org.apache.yetus.audience.InterfaceAudience;
024import org.slf4j.Logger;
025import org.slf4j.LoggerFactory;
026import org.apache.hadoop.hbase.util.Bytes;
027
028/**
029 * A custom RegionSplitPolicy implementing a SplitPolicy that groups
030 * rows by a prefix of the row-key with a delimiter. Only the first delimiter
031 * for the row key will define the prefix of the row key that is used for grouping.
032 *
033 * This ensures that a region is not split "inside" a prefix of a row key.
034 * I.e. rows can be co-located in a region by their prefix.
035 *
036 * As an example, if you have row keys delimited with <code>_</code>, like
037 * <code>userid_eventtype_eventid</code>, and use prefix delimiter _, this split policy
038 * ensures that all rows starting with the same userid, belongs to the same region.
039 * @see KeyPrefixRegionSplitPolicy
040 */
041@InterfaceAudience.Private
042public class DelimitedKeyPrefixRegionSplitPolicy extends IncreasingToUpperBoundRegionSplitPolicy {
043
044  private static final Logger LOG = LoggerFactory
045      .getLogger(DelimitedKeyPrefixRegionSplitPolicy.class);
046  public static final String DELIMITER_KEY = "DelimitedKeyPrefixRegionSplitPolicy.delimiter";
047
048  private byte[] delimiter = null;
049
050  @Override
051  protected void configureForRegion(HRegion region) {
052    super.configureForRegion(region);
053    // read the prefix length from the table descriptor
054    String delimiterString = region.getTableDescriptor().getValue(DELIMITER_KEY);
055    if (delimiterString == null || delimiterString.length() == 0) {
056      LOG.error(DELIMITER_KEY + " not specified for table " + region.getTableDescriptor().getTableName() +
057        ". Using default RegionSplitPolicy");
058      return;
059    }
060    delimiter = Bytes.toBytes(delimiterString);
061  }
062
063  @Override
064  protected byte[] getSplitPoint() {
065    byte[] splitPoint = super.getSplitPoint();
066    if (splitPoint != null && delimiter != null) {
067
068      //find the first occurrence of delimiter in split point
069      int index =
070        org.apache.hbase.thirdparty.com.google.common.primitives.Bytes.indexOf(splitPoint, delimiter);
071      if (index < 0) {
072        LOG.warn("Delimiter " + Bytes.toString(delimiter) + "  not found for split key "
073            + Bytes.toString(splitPoint));
074        return splitPoint;
075      }
076
077      // group split keys by a prefix
078      return Arrays.copyOf(splitPoint, Math.min(index, splitPoint.length));
079    } else {
080      return splitPoint;
081    }
082  }
083}