001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.hbase.regionserver; 020 021import java.util.Arrays; 022 023import org.apache.yetus.audience.InterfaceAudience; 024import org.slf4j.Logger; 025import org.slf4j.LoggerFactory; 026import org.apache.hadoop.hbase.util.Bytes; 027 028/** 029 * A custom RegionSplitPolicy implementing a SplitPolicy that groups 030 * rows by a prefix of the row-key with a delimiter. Only the first delimiter 031 * for the row key will define the prefix of the row key that is used for grouping. 032 * 033 * This ensures that a region is not split "inside" a prefix of a row key. 034 * I.e. rows can be co-located in a region by their prefix. 035 * 036 * As an example, if you have row keys delimited with <code>_</code>, like 037 * <code>userid_eventtype_eventid</code>, and use prefix delimiter _, this split policy 038 * ensures that all rows starting with the same userid, belongs to the same region. 039 * @see KeyPrefixRegionSplitPolicy 040 */ 041@InterfaceAudience.Private 042public class DelimitedKeyPrefixRegionSplitPolicy extends IncreasingToUpperBoundRegionSplitPolicy { 043 044 private static final Logger LOG = LoggerFactory 045 .getLogger(DelimitedKeyPrefixRegionSplitPolicy.class); 046 public static final String DELIMITER_KEY = "DelimitedKeyPrefixRegionSplitPolicy.delimiter"; 047 048 private byte[] delimiter = null; 049 050 @Override 051 protected void configureForRegion(HRegion region) { 052 super.configureForRegion(region); 053 // read the prefix length from the table descriptor 054 String delimiterString = region.getTableDescriptor().getValue(DELIMITER_KEY); 055 if (delimiterString == null || delimiterString.length() == 0) { 056 LOG.error(DELIMITER_KEY + " not specified for table " + region.getTableDescriptor().getTableName() + 057 ". Using default RegionSplitPolicy"); 058 return; 059 } 060 delimiter = Bytes.toBytes(delimiterString); 061 } 062 063 @Override 064 protected byte[] getSplitPoint() { 065 byte[] splitPoint = super.getSplitPoint(); 066 if (splitPoint != null && delimiter != null) { 067 068 //find the first occurrence of delimiter in split point 069 int index = 070 org.apache.hbase.thirdparty.com.google.common.primitives.Bytes.indexOf(splitPoint, delimiter); 071 if (index < 0) { 072 LOG.warn("Delimiter " + Bytes.toString(delimiter) + " not found for split key " 073 + Bytes.toString(splitPoint)); 074 return splitPoint; 075 } 076 077 // group split keys by a prefix 078 return Arrays.copyOf(splitPoint, Math.min(index, splitPoint.length)); 079 } else { 080 return splitPoint; 081 } 082 } 083}