001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.tool;
019
020import java.io.IOException;
021import java.nio.ByteBuffer;
022import java.util.List;
023import java.util.Map;
024import org.apache.hadoop.conf.Configuration;
025import org.apache.hadoop.fs.Path;
026import org.apache.hadoop.hbase.TableName;
027import org.apache.hadoop.hbase.TableNotFoundException;
028import org.apache.hadoop.hbase.util.Bytes;
029import org.apache.yetus.audience.InterfaceAudience;
030
031/**
032 * The tool to let you load the output of {@code HFileOutputFormat} into an existing table
033 * programmatically. Not thread safe.
034 */
035@InterfaceAudience.Public
036public interface BulkLoadHFiles {
037
038  static final String RETRY_ON_IO_EXCEPTION = "hbase.bulkload.retries.retryOnIOException";
039  static final String MAX_FILES_PER_REGION_PER_FAMILY =
040    "hbase.mapreduce.bulkload.max.hfiles.perRegion.perFamily";
041  static final String ASSIGN_SEQ_IDS = "hbase.mapreduce.bulkload.assign.sequenceNumbers";
042  static final String CREATE_TABLE_CONF_KEY = "create.table";
043  static final String IGNORE_UNMATCHED_CF_CONF_KEY = "ignore.unmatched.families";
044  static final String ALWAYS_COPY_FILES = "always.copy.files";
045
046  /**
047   * Represents an HFile waiting to be loaded. An queue is used in this class in order to support
048   * the case where a region has split during the process of the load. When this happens, the HFile
049   * is split into two physical parts across the new region boundary, and each part is added back
050   * into the queue. The import process finishes when the queue is empty.
051   */
052  @InterfaceAudience.Public
053  public static class LoadQueueItem {
054
055    private final byte[] family;
056
057    private final Path hfilePath;
058
059    public LoadQueueItem(byte[] family, Path hfilePath) {
060      this.family = family;
061      this.hfilePath = hfilePath;
062    }
063
064    @Override
065    public String toString() {
066      return "family:" + Bytes.toString(family) + " path:" + hfilePath.toString();
067    }
068
069    public byte[] getFamily() {
070      return family;
071    }
072
073    public Path getFilePath() {
074      return hfilePath;
075    }
076  }
077
078  /**
079   * Perform a bulk load of the given directory into the given pre-existing table.
080   * @param tableName    the table to load into
081   * @param family2Files map of family to List of hfiles
082   * @throws TableNotFoundException if table does not yet exist
083   */
084  Map<LoadQueueItem, ByteBuffer> bulkLoad(TableName tableName, Map<byte[], List<Path>> family2Files)
085    throws TableNotFoundException, IOException;
086
087  /**
088   * Disables replication for all bulkloads done via this instance, when bulkload replication is
089   * configured.
090   */
091  void disableReplication();
092
093  /** Returns true if replication has been disabled. */
094  boolean isReplicationDisabled();
095
096  /**
097   * Perform a bulk load of the given directory into the given pre-existing table.
098   * @param tableName the table to load into
099   * @param dir       the directory that was provided as the output path of a job using
100   *                  {@code HFileOutputFormat}
101   * @throws TableNotFoundException if table does not yet exist
102   */
103  Map<LoadQueueItem, ByteBuffer> bulkLoad(TableName tableName, Path dir)
104    throws TableNotFoundException, IOException;
105
106  static BulkLoadHFiles create(Configuration conf) {
107    return new BulkLoadHFilesTool(conf);
108  }
109
110}