View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import java.io.IOException;
22  import java.util.List;
23  import java.util.Map;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.hbase.classification.InterfaceStability;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.hbase.HTableDescriptor;
31  import org.apache.hadoop.hbase.KeyValue;
32  import org.apache.hadoop.hbase.client.HTable;
33  import org.apache.hadoop.hbase.client.Table;
34  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
35  import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
36  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
37  import org.apache.hadoop.hbase.regionserver.BloomType;
38  import org.apache.hadoop.mapreduce.Job;
39  import org.apache.hadoop.mapreduce.RecordWriter;
40  import org.apache.hadoop.mapreduce.TaskAttemptContext;
41  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
42  
43  import com.google.common.annotations.VisibleForTesting;
44  
45  /**
46   * Writes HFiles. Passed KeyValues must arrive in order.
47   * Writes current time as the sequence id for the file. Sets the major compacted
48   * attribute on created hfiles. Calling write(null,null) will forcibly roll
49   * all HFiles being written.
50   * <p>
51   * Using this class as part of a MapReduce job is best done
52   * using {@link #configureIncrementalLoad(Job, HTable)}.
53   * @see KeyValueSortReducer
54   * @deprecated use {@link HFileOutputFormat2} instead.
55   */
56  @Deprecated
57  @InterfaceAudience.Public
58  @InterfaceStability.Stable
59  public class HFileOutputFormat extends FileOutputFormat<ImmutableBytesWritable, KeyValue> {
60    static Log LOG = LogFactory.getLog(HFileOutputFormat.class);
61  
62    // This constant is public since the client can modify this when setting
63    // up their conf object and thus refer to this symbol.
64    // It is present for backwards compatibility reasons. Use it only to
65    // override the auto-detection of datablock encoding.
66    public static final String DATABLOCK_ENCODING_OVERRIDE_CONF_KEY =
67      HFileOutputFormat2.DATABLOCK_ENCODING_OVERRIDE_CONF_KEY;
68  
69    @Override
70    public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(
71        final TaskAttemptContext context) throws IOException, InterruptedException {
72      return HFileOutputFormat2.createRecordWriter(context);
73    }
74  
75    /**
76     * Configure a MapReduce Job to perform an incremental load into the given
77     * table. This
78     * <ul>
79     *   <li>Inspects the table to configure a total order partitioner</li>
80     *   <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
81     *   <li>Sets the number of reduce tasks to match the current number of regions</li>
82     *   <li>Sets the output key/value class to match HFileOutputFormat's requirements</li>
83     *   <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
84     *     PutSortReducer)</li>
85     * </ul>
86     * The user should be sure to set the map output value class to either KeyValue or Put before
87     * running this function.
88     */
89    public static void configureIncrementalLoad(Job job, HTable table)
90        throws IOException {
91      HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(),
92          table.getRegionLocator());
93    }
94  
95    /**
96     * Runs inside the task to deserialize column family to compression algorithm
97     * map from the configuration.
98     *
99     * @param conf to read the serialized values from
100    * @return a map from column family to the configured compression algorithm
101    */
102   @VisibleForTesting
103   static Map<byte[], Algorithm> createFamilyCompressionMap(Configuration
104       conf) {
105     return HFileOutputFormat2.createFamilyCompressionMap(conf);
106   }
107 
108   /**
109    * Runs inside the task to deserialize column family to bloom filter type
110    * map from the configuration.
111    *
112    * @param conf to read the serialized values from
113    * @return a map from column family to the the configured bloom filter type
114    */
115   @VisibleForTesting
116   static Map<byte[], BloomType> createFamilyBloomTypeMap(Configuration conf) {
117     return HFileOutputFormat2.createFamilyBloomTypeMap(conf);
118   }
119 
120   /**
121    * Runs inside the task to deserialize column family to block size
122    * map from the configuration.
123    *
124    * @param conf to read the serialized values from
125    * @return a map from column family to the configured block size
126    */
127   @VisibleForTesting
128   static Map<byte[], Integer> createFamilyBlockSizeMap(Configuration conf) {
129     return HFileOutputFormat2.createFamilyBlockSizeMap(conf);
130   }
131 
132   /**
133    * Runs inside the task to deserialize column family to data block encoding
134    * type map from the configuration.
135    *
136    * @param conf to read the serialized values from
137    * @return a map from column family to HFileDataBlockEncoder for the
138    *         configured data block type for the family
139    */
140   @VisibleForTesting
141   static Map<byte[], DataBlockEncoding> createFamilyDataBlockEncodingMap(
142       Configuration conf) {
143     return HFileOutputFormat2.createFamilyDataBlockEncodingMap(conf);
144   }
145 
146   /**
147    * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
148    * <code>splitPoints</code>. Cleans up the partitions file after job exists.
149    */
150   static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints)
151       throws IOException {
152     HFileOutputFormat2.configurePartitioner(job, splitPoints);
153   }
154 
155   static void configureCompression(Table table, Configuration conf) throws IOException {
156     HFileOutputFormat2.configureCompression(conf, table.getTableDescriptor());
157   }
158 
159   /**
160    * Serialize column family to block size map to configuration.
161    * Invoked while configuring the MR job for incremental load.
162    *
163    * @param table to read the properties from
164    * @param conf to persist serialized values into
165    * @throws IOException
166    *           on failure to read column family descriptors
167    */
168   @VisibleForTesting
169   static void configureBlockSize(Table table, Configuration conf) throws IOException {
170     HFileOutputFormat2.configureBlockSize(table.getTableDescriptor(), conf);
171   }
172 
173   /**
174    * Serialize column family to bloom type map to configuration.
175    * Invoked while configuring the MR job for incremental load.
176    *
177    * @param table to read the properties from
178    * @param conf to persist serialized values into
179    * @throws IOException
180    *           on failure to read column family descriptors
181    */
182   @VisibleForTesting
183   static void configureBloomType(Table table, Configuration conf) throws IOException {
184     HFileOutputFormat2.configureBloomType(table.getTableDescriptor(), conf);
185   }
186 
187   /**
188    * Serialize column family to data block encoding map to configuration.
189    * Invoked while configuring the MR job for incremental load.
190    *
191    * @param table to read the properties from
192    * @param conf to persist serialized values into
193    * @throws IOException
194    *           on failure to read column family descriptors
195    */
196   @VisibleForTesting
197   static void configureDataBlockEncoding(Table table,
198       Configuration conf) throws IOException {
199     HTableDescriptor tableDescriptor = table.getTableDescriptor();
200     HFileOutputFormat2.configureDataBlockEncoding(tableDescriptor, conf);
201   }
202 }