001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.tool;
019
020import java.io.IOException;
021import java.nio.ByteBuffer;
022import java.util.List;
023import java.util.Map;
024import org.apache.hadoop.conf.Configuration;
025import org.apache.hadoop.fs.Path;
026import org.apache.hadoop.hbase.TableName;
027import org.apache.hadoop.hbase.TableNotFoundException;
028import org.apache.hadoop.hbase.util.Bytes;
029import org.apache.yetus.audience.InterfaceAudience;
030
031/**
032 * The tool to let you load the output of {@code HFileOutputFormat} into an existing table
033 * programmatically. Not thread safe.
034 */
035@InterfaceAudience.Public
036public interface BulkLoadHFiles {
037
038  static final String RETRY_ON_IO_EXCEPTION = "hbase.bulkload.retries.retryOnIOException";
039  static final String MAX_FILES_PER_REGION_PER_FAMILY =
040    "hbase.mapreduce.bulkload.max.hfiles.perRegion.perFamily";
041  static final String ASSIGN_SEQ_IDS = "hbase.mapreduce.bulkload.assign.sequenceNumbers";
042  static final String CREATE_TABLE_CONF_KEY = "create.table";
043  static final String IGNORE_UNMATCHED_CF_CONF_KEY = "ignore.unmatched.families";
044  static final String ALWAYS_COPY_FILES = "always.copy.files";
045  /**
046   * HBASE-24221 Support bulkLoadHFile by family to avoid long time waiting of bulkLoadHFile because
047   * of compacting at server side
048   */
049  public static final String BULK_LOAD_HFILES_BY_FAMILY = "hbase.mapreduce.bulkload.by.family";
050
051  /**
052   * Represents an HFile waiting to be loaded. An queue is used in this class in order to support
053   * the case where a region has split during the process of the load. When this happens, the HFile
054   * is split into two physical parts across the new region boundary, and each part is added back
055   * into the queue. The import process finishes when the queue is empty.
056   */
057  @InterfaceAudience.Public
058  public static class LoadQueueItem {
059
060    private final byte[] family;
061
062    private final Path hfilePath;
063
064    public LoadQueueItem(byte[] family, Path hfilePath) {
065      this.family = family;
066      this.hfilePath = hfilePath;
067    }
068
069    @Override
070    public String toString() {
071      return "family:" + Bytes.toString(family) + " path:" + hfilePath.toString();
072    }
073
074    public byte[] getFamily() {
075      return family;
076    }
077
078    public Path getFilePath() {
079      return hfilePath;
080    }
081  }
082
083  /**
084   * Perform a bulk load of the given directory into the given pre-existing table.
085   * @param tableName    the table to load into
086   * @param family2Files map of family to List of hfiles
087   * @throws TableNotFoundException if table does not yet exist
088   */
089  Map<LoadQueueItem, ByteBuffer> bulkLoad(TableName tableName, Map<byte[], List<Path>> family2Files)
090    throws TableNotFoundException, IOException;
091
092  /**
093   * Perform a bulk load of the given directory into the given pre-existing table.
094   * @param tableName the table to load into
095   * @param dir       the directory that was provided as the output path of a job using
096   *                  {@code HFileOutputFormat}
097   * @throws TableNotFoundException if table does not yet exist
098   */
099  Map<LoadQueueItem, ByteBuffer> bulkLoad(TableName tableName, Path dir)
100    throws TableNotFoundException, IOException;
101
102  static BulkLoadHFiles create(Configuration conf) {
103    return new BulkLoadHFilesTool(conf);
104  }
105
106}