001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 * <p>
010 * http://www.apache.org/licenses/LICENSE-2.0
011 * <p>
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.hbase.mapreduce;
020
021import org.apache.yetus.audience.InterfaceAudience;
022import org.slf4j.Logger;
023import org.slf4j.LoggerFactory;
024import org.apache.hadoop.hbase.client.TableDescriptor;
025import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
026import org.apache.hadoop.hbase.util.Bytes;
027import org.apache.hadoop.mapreduce.Job;
028
029import java.io.IOException;
030import java.nio.charset.Charset;
031import java.util.List;
032
033/**
034 * Create 3 level tree directory, first level is using table name as parent
035 * directory and then use family name as child directory, and all related HFiles
036 * for one family are under child directory
037 * -tableName1
038 *     -columnFamilyName1
039 *     -columnFamilyName2
040 *         -HFiles
041 * -tableName2
042 *     -columnFamilyName1
043 *         -HFiles
044 *     -columnFamilyName2
045 */
046@InterfaceAudience.Public
047public class MultiTableHFileOutputFormat extends HFileOutputFormat2 {
048  private static final Logger LOG = LoggerFactory.getLogger(MultiTableHFileOutputFormat.class);
049
050  /**
051   * Creates a composite key to use as a mapper output key when using
052   * MultiTableHFileOutputFormat.configureIncrementaLoad to set up bulk ingest job
053   *
054   * @param tableName Name of the Table - Eg: TableName.getNameAsString()
055   * @param suffix    Usually represents a rowkey when creating a mapper key or column family
056   * @return          byte[] representation of composite key
057   */
058  public static byte[] createCompositeKey(byte[] tableName,
059                                          byte[] suffix) {
060    return combineTableNameSuffix(tableName, suffix);
061  }
062
063  /**
064   * Alternate api which accepts an ImmutableBytesWritable for the suffix
065   * @see MultiTableHFileOutputFormat#createCompositeKey(byte[], byte[])
066   */
067  public static byte[] createCompositeKey(byte[] tableName,
068                                          ImmutableBytesWritable suffix) {
069    return combineTableNameSuffix(tableName, suffix.get());
070  }
071
072  /**
073   * Alternate api which accepts a String for the tableName and ImmutableBytesWritable for the
074   * suffix
075   * @see MultiTableHFileOutputFormat#createCompositeKey(byte[], byte[])
076   */
077  public static byte[] createCompositeKey(String tableName,
078                                          ImmutableBytesWritable suffix) {
079    return combineTableNameSuffix(tableName.getBytes(Charset.forName("UTF-8")), suffix.get());
080  }
081
082  /**
083   * Analogous to
084   * {@link HFileOutputFormat2#configureIncrementalLoad(Job, TableDescriptor, RegionLocator)},
085   * this function will configure the requisite number of reducers to write HFiles for multple
086   * tables simultaneously
087   *
088   * @param job                   See {@link org.apache.hadoop.mapreduce.Job}
089   * @param multiTableDescriptors Table descriptor and region locator pairs
090   * @throws IOException
091   */
092  public static void configureIncrementalLoad(Job job, List<TableInfo>
093      multiTableDescriptors)
094      throws IOException {
095    MultiTableHFileOutputFormat.configureIncrementalLoad(job, multiTableDescriptors,
096            MultiTableHFileOutputFormat.class);
097  }
098
099  final private static int validateCompositeKey(byte[] keyBytes) {
100
101    int separatorIdx = Bytes.indexOf(keyBytes, tableSeparator);
102
103    // Either the separator was not found or a tablename wasn't present or a key wasn't present
104    if (separatorIdx == -1) {
105      throw new IllegalArgumentException("Invalid format for composite key [" + Bytes
106              .toStringBinary(keyBytes) + "]. Cannot extract tablename and suffix from key");
107    }
108    return separatorIdx;
109  }
110
111  protected static byte[] getTableName(byte[] keyBytes) {
112    int separatorIdx = validateCompositeKey(keyBytes);
113    return Bytes.copy(keyBytes, 0, separatorIdx);
114  }
115
116  protected static byte[] getSuffix(byte[] keyBytes) {
117    int separatorIdx = validateCompositeKey(keyBytes);
118    return Bytes.copy(keyBytes, separatorIdx+1, keyBytes.length - separatorIdx - 1);
119  }
120}