1 /** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 package org.apache.hadoop.hbase.mapreduce; 20 21 import java.io.IOException; 22 import java.util.List; 23 import java.util.Map; 24 25 import org.apache.commons.logging.Log; 26 import org.apache.commons.logging.LogFactory; 27 import org.apache.hadoop.hbase.classification.InterfaceAudience; 28 import org.apache.hadoop.hbase.classification.InterfaceStability; 29 import org.apache.hadoop.conf.Configuration; 30 import org.apache.hadoop.hbase.HTableDescriptor; 31 import org.apache.hadoop.hbase.KeyValue; 32 import org.apache.hadoop.hbase.client.HTable; 33 import org.apache.hadoop.hbase.client.Table; 34 import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 35 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; 36 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 37 import org.apache.hadoop.hbase.regionserver.BloomType; 38 import org.apache.hadoop.mapreduce.Job; 39 import org.apache.hadoop.mapreduce.RecordWriter; 40 import org.apache.hadoop.mapreduce.TaskAttemptContext; 41 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 42 43 import com.google.common.annotations.VisibleForTesting; 44 45 /** 46 * Writes HFiles. Passed KeyValues must arrive in order. 47 * Writes current time as the sequence id for the file. Sets the major compacted 48 * attribute on created hfiles. Calling write(null,null) will forcibly roll 49 * all HFiles being written. 50 * <p> 51 * Using this class as part of a MapReduce job is best done 52 * using {@link #configureIncrementalLoad(Job, HTable)}. 53 * @see KeyValueSortReducer 54 * @deprecated use {@link HFileOutputFormat2} instead. 55 */ 56 @Deprecated 57 @InterfaceAudience.Public 58 @InterfaceStability.Stable 59 public class HFileOutputFormat extends FileOutputFormat<ImmutableBytesWritable, KeyValue> { 60 static Log LOG = LogFactory.getLog(HFileOutputFormat.class); 61 62 // This constant is public since the client can modify this when setting 63 // up their conf object and thus refer to this symbol. 64 // It is present for backwards compatibility reasons. Use it only to 65 // override the auto-detection of datablock encoding. 66 public static final String DATABLOCK_ENCODING_OVERRIDE_CONF_KEY = 67 HFileOutputFormat2.DATABLOCK_ENCODING_OVERRIDE_CONF_KEY; 68 69 @Override 70 public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter( 71 final TaskAttemptContext context) throws IOException, InterruptedException { 72 return HFileOutputFormat2.createRecordWriter(context); 73 } 74 75 /** 76 * Configure a MapReduce Job to perform an incremental load into the given 77 * table. This 78 * <ul> 79 * <li>Inspects the table to configure a total order partitioner</li> 80 * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li> 81 * <li>Sets the number of reduce tasks to match the current number of regions</li> 82 * <li>Sets the output key/value class to match HFileOutputFormat's requirements</li> 83 * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or 84 * PutSortReducer)</li> 85 * </ul> 86 * The user should be sure to set the map output value class to either KeyValue or Put before 87 * running this function. 88 */ 89 public static void configureIncrementalLoad(Job job, HTable table) 90 throws IOException { 91 HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), 92 table.getRegionLocator()); 93 } 94 95 /** 96 * Runs inside the task to deserialize column family to compression algorithm 97 * map from the configuration. 98 * 99 * @param conf to read the serialized values from 100 * @return a map from column family to the configured compression algorithm 101 */ 102 @VisibleForTesting 103 static Map<byte[], Algorithm> createFamilyCompressionMap(Configuration 104 conf) { 105 return HFileOutputFormat2.createFamilyCompressionMap(conf); 106 } 107 108 /** 109 * Runs inside the task to deserialize column family to bloom filter type 110 * map from the configuration. 111 * 112 * @param conf to read the serialized values from 113 * @return a map from column family to the the configured bloom filter type 114 */ 115 @VisibleForTesting 116 static Map<byte[], BloomType> createFamilyBloomTypeMap(Configuration conf) { 117 return HFileOutputFormat2.createFamilyBloomTypeMap(conf); 118 } 119 120 /** 121 * Runs inside the task to deserialize column family to block size 122 * map from the configuration. 123 * 124 * @param conf to read the serialized values from 125 * @return a map from column family to the configured block size 126 */ 127 @VisibleForTesting 128 static Map<byte[], Integer> createFamilyBlockSizeMap(Configuration conf) { 129 return HFileOutputFormat2.createFamilyBlockSizeMap(conf); 130 } 131 132 /** 133 * Runs inside the task to deserialize column family to data block encoding 134 * type map from the configuration. 135 * 136 * @param conf to read the serialized values from 137 * @return a map from column family to HFileDataBlockEncoder for the 138 * configured data block type for the family 139 */ 140 @VisibleForTesting 141 static Map<byte[], DataBlockEncoding> createFamilyDataBlockEncodingMap( 142 Configuration conf) { 143 return HFileOutputFormat2.createFamilyDataBlockEncodingMap(conf); 144 } 145 146 /** 147 * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against 148 * <code>splitPoints</code>. Cleans up the partitions file after job exists. 149 */ 150 static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints) 151 throws IOException { 152 HFileOutputFormat2.configurePartitioner(job, splitPoints); 153 } 154 155 static void configureCompression(Table table, Configuration conf) throws IOException { 156 HFileOutputFormat2.configureCompression(conf, table.getTableDescriptor()); 157 } 158 159 /** 160 * Serialize column family to block size map to configuration. 161 * Invoked while configuring the MR job for incremental load. 162 * 163 * @param table to read the properties from 164 * @param conf to persist serialized values into 165 * @throws IOException 166 * on failure to read column family descriptors 167 */ 168 @VisibleForTesting 169 static void configureBlockSize(Table table, Configuration conf) throws IOException { 170 HFileOutputFormat2.configureBlockSize(table.getTableDescriptor(), conf); 171 } 172 173 /** 174 * Serialize column family to bloom type map to configuration. 175 * Invoked while configuring the MR job for incremental load. 176 * 177 * @param table to read the properties from 178 * @param conf to persist serialized values into 179 * @throws IOException 180 * on failure to read column family descriptors 181 */ 182 @VisibleForTesting 183 static void configureBloomType(Table table, Configuration conf) throws IOException { 184 HFileOutputFormat2.configureBloomType(table.getTableDescriptor(), conf); 185 } 186 187 /** 188 * Serialize column family to data block encoding map to configuration. 189 * Invoked while configuring the MR job for incremental load. 190 * 191 * @param table to read the properties from 192 * @param conf to persist serialized values into 193 * @throws IOException 194 * on failure to read column family descriptors 195 */ 196 @VisibleForTesting 197 static void configureDataBlockEncoding(Table table, 198 Configuration conf) throws IOException { 199 HTableDescriptor tableDescriptor = table.getTableDescriptor(); 200 HFileOutputFormat2.configureDataBlockEncoding(tableDescriptor, conf); 201 } 202 }