001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.tool; 019 020import java.io.IOException; 021import java.nio.ByteBuffer; 022import java.util.List; 023import java.util.Map; 024import org.apache.hadoop.conf.Configuration; 025import org.apache.hadoop.fs.Path; 026import org.apache.hadoop.hbase.TableName; 027import org.apache.hadoop.hbase.TableNotFoundException; 028import org.apache.hadoop.hbase.util.Bytes; 029import org.apache.yetus.audience.InterfaceAudience; 030 031/** 032 * The tool to let you load the output of {@code HFileOutputFormat} into an existing table 033 * programmatically. Not thread safe. 034 */ 035@InterfaceAudience.Public 036public interface BulkLoadHFiles { 037 038 static final String RETRY_ON_IO_EXCEPTION = "hbase.bulkload.retries.retryOnIOException"; 039 static final String MAX_FILES_PER_REGION_PER_FAMILY = 040 "hbase.mapreduce.bulkload.max.hfiles.perRegion.perFamily"; 041 static final String ASSIGN_SEQ_IDS = "hbase.mapreduce.bulkload.assign.sequenceNumbers"; 042 static final String CREATE_TABLE_CONF_KEY = "create.table"; 043 static final String IGNORE_UNMATCHED_CF_CONF_KEY = "ignore.unmatched.families"; 044 static final String ALWAYS_COPY_FILES = "always.copy.files"; 045 046 /** 047 * Represents an HFile waiting to be loaded. An queue is used in this class in order to support 048 * the case where a region has split during the process of the load. When this happens, the HFile 049 * is split into two physical parts across the new region boundary, and each part is added back 050 * into the queue. The import process finishes when the queue is empty. 051 */ 052 @InterfaceAudience.Public 053 public static class LoadQueueItem { 054 055 private final byte[] family; 056 057 private final Path hfilePath; 058 059 public LoadQueueItem(byte[] family, Path hfilePath) { 060 this.family = family; 061 this.hfilePath = hfilePath; 062 } 063 064 @Override 065 public String toString() { 066 return "family:" + Bytes.toString(family) + " path:" + hfilePath.toString(); 067 } 068 069 public byte[] getFamily() { 070 return family; 071 } 072 073 public Path getFilePath() { 074 return hfilePath; 075 } 076 } 077 078 /** 079 * Perform a bulk load of the given directory into the given pre-existing table. 080 * @param tableName the table to load into 081 * @param family2Files map of family to List of hfiles 082 * @throws TableNotFoundException if table does not yet exist 083 */ 084 Map<LoadQueueItem, ByteBuffer> bulkLoad(TableName tableName, Map<byte[], List<Path>> family2Files) 085 throws TableNotFoundException, IOException; 086 087 /** 088 * Perform a bulk load of the given directory into the given pre-existing table. 089 * @param tableName the table to load into 090 * @param dir the directory that was provided as the output path of a job using 091 * {@code HFileOutputFormat} 092 * @throws TableNotFoundException if table does not yet exist 093 */ 094 Map<LoadQueueItem, ByteBuffer> bulkLoad(TableName tableName, Path dir) 095 throws TableNotFoundException, IOException; 096 097 static BulkLoadHFiles create(Configuration conf) { 098 return new BulkLoadHFilesTool(conf); 099 } 100 101}