001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 * <p>
010 * http://www.apache.org/licenses/LICENSE-2.0
011 * <p>
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.hbase.mapreduce;
020
021import org.apache.yetus.audience.InterfaceAudience;
022import org.slf4j.Logger;
023import org.slf4j.LoggerFactory;
024import org.apache.hadoop.hbase.client.TableDescriptor;
025import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
026import org.apache.hadoop.hbase.util.Bytes;
027import org.apache.hadoop.mapreduce.Job;
028
029import java.io.IOException;
030import java.nio.charset.Charset;
031import java.util.List;
032import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
033
034/**
035 * Create 3 level tree directory, first level is using table name as parent
036 * directory and then use family name as child directory, and all related HFiles
037 * for one family are under child directory
038 * -tableName1
039 *     -columnFamilyName1
040 *     -columnFamilyName2
041 *         -HFiles
042 * -tableName2
043 *     -columnFamilyName1
044 *         -HFiles
045 *     -columnFamilyName2
046 */
047@InterfaceAudience.Public
048@VisibleForTesting
049public class MultiTableHFileOutputFormat extends HFileOutputFormat2 {
050  private static final Logger LOG = LoggerFactory.getLogger(MultiTableHFileOutputFormat.class);
051
052  /**
053   * Creates a composite key to use as a mapper output key when using
054   * MultiTableHFileOutputFormat.configureIncrementaLoad to set up bulk ingest job
055   *
056   * @param tableName Name of the Table - Eg: TableName.getNameAsString()
057   * @param suffix    Usually represents a rowkey when creating a mapper key or column family
058   * @return          byte[] representation of composite key
059   */
060  public static byte[] createCompositeKey(byte[] tableName,
061                                          byte[] suffix) {
062    return combineTableNameSuffix(tableName, suffix);
063  }
064
065  /**
066   * Alternate api which accepts an ImmutableBytesWritable for the suffix
067   * @see MultiTableHFileOutputFormat#createCompositeKey(byte[], byte[])
068   */
069  public static byte[] createCompositeKey(byte[] tableName,
070                                          ImmutableBytesWritable suffix) {
071    return combineTableNameSuffix(tableName, suffix.get());
072  }
073
074  /**
075   * Alternate api which accepts a String for the tableName and ImmutableBytesWritable for the
076   * suffix
077   * @see MultiTableHFileOutputFormat#createCompositeKey(byte[], byte[])
078   */
079  public static byte[] createCompositeKey(String tableName,
080                                          ImmutableBytesWritable suffix) {
081    return combineTableNameSuffix(tableName.getBytes(Charset.forName("UTF-8")), suffix.get());
082  }
083
084  /**
085   * Analogous to
086   * {@link HFileOutputFormat2#configureIncrementalLoad(Job, TableDescriptor, RegionLocator)},
087   * this function will configure the requisite number of reducers to write HFiles for multple
088   * tables simultaneously
089   *
090   * @param job                   See {@link org.apache.hadoop.mapreduce.Job}
091   * @param multiTableDescriptors Table descriptor and region locator pairs
092   * @throws IOException
093   */
094  public static void configureIncrementalLoad(Job job, List<TableInfo>
095      multiTableDescriptors)
096      throws IOException {
097    MultiTableHFileOutputFormat.configureIncrementalLoad(job, multiTableDescriptors,
098            MultiTableHFileOutputFormat.class);
099  }
100
101  final private static int validateCompositeKey(byte[] keyBytes) {
102
103    int separatorIdx = Bytes.indexOf(keyBytes, tableSeparator);
104
105    // Either the separator was not found or a tablename wasn't present or a key wasn't present
106    if (separatorIdx == -1) {
107      throw new IllegalArgumentException("Invalid format for composite key [" + Bytes
108              .toStringBinary(keyBytes) + "]. Cannot extract tablename and suffix from key");
109    }
110    return separatorIdx;
111  }
112
113  protected static byte[] getTableName(byte[] keyBytes) {
114    int separatorIdx = validateCompositeKey(keyBytes);
115    return Bytes.copy(keyBytes, 0, separatorIdx);
116  }
117
118  protected static byte[] getSuffix(byte[] keyBytes) {
119    int separatorIdx = validateCompositeKey(keyBytes);
120    return Bytes.copy(keyBytes, separatorIdx+1, keyBytes.length - separatorIdx - 1);
121  }
122}