View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import java.io.IOException;
22  import java.util.List;
23  import java.util.Map;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.hbase.classification.InterfaceStability;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.hbase.KeyValue;
31  import org.apache.hadoop.hbase.client.HTable;
32  import org.apache.hadoop.hbase.client.Table;
33  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
34  import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
35  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
36  import org.apache.hadoop.hbase.regionserver.BloomType;
37  import org.apache.hadoop.mapreduce.Job;
38  import org.apache.hadoop.mapreduce.RecordWriter;
39  import org.apache.hadoop.mapreduce.TaskAttemptContext;
40  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
41  
42  import com.google.common.annotations.VisibleForTesting;
43  
44  /**
45   * Writes HFiles. Passed KeyValues must arrive in order.
46   * Writes current time as the sequence id for the file. Sets the major compacted
47   * attribute on created hfiles. Calling write(null,null) will forcibly roll
48   * all HFiles being written.
49   * <p>
50   * Using this class as part of a MapReduce job is best done
51   * using {@link #configureIncrementalLoad(Job, HTable)}.
52   * @see KeyValueSortReducer
53   * @deprecated use {@link HFileOutputFormat2} instead.
54   */
55  @Deprecated
56  @InterfaceAudience.Public
57  @InterfaceStability.Stable
58  public class HFileOutputFormat extends FileOutputFormat<ImmutableBytesWritable, KeyValue> {
59    static Log LOG = LogFactory.getLog(HFileOutputFormat.class);
60  
61    // This constant is public since the client can modify this when setting
62    // up their conf object and thus refer to this symbol.
63    // It is present for backwards compatibility reasons. Use it only to
64    // override the auto-detection of datablock encoding.
65    public static final String DATABLOCK_ENCODING_OVERRIDE_CONF_KEY =
66      HFileOutputFormat2.DATABLOCK_ENCODING_OVERRIDE_CONF_KEY;
67  
68    @Override
69    public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(
70        final TaskAttemptContext context) throws IOException, InterruptedException {
71      return HFileOutputFormat2.createRecordWriter(context);
72    }
73  
74    /**
75     * Configure a MapReduce Job to perform an incremental load into the given
76     * table. This
77     * <ul>
78     *   <li>Inspects the table to configure a total order partitioner</li>
79     *   <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
80     *   <li>Sets the number of reduce tasks to match the current number of regions</li>
81     *   <li>Sets the output key/value class to match HFileOutputFormat's requirements</li>
82     *   <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
83     *     PutSortReducer)</li>
84     * </ul>
85     * The user should be sure to set the map output value class to either KeyValue or Put before
86     * running this function.
87     */
88    public static void configureIncrementalLoad(Job job, HTable table)
89        throws IOException {
90      HFileOutputFormat2.configureIncrementalLoad(job, table, table);
91    }
92  
93    /**
94     * Runs inside the task to deserialize column family to compression algorithm
95     * map from the configuration.
96     *
97     * @param conf to read the serialized values from
98     * @return a map from column family to the configured compression algorithm
99     */
100   @VisibleForTesting
101   static Map<byte[], Algorithm> createFamilyCompressionMap(Configuration
102       conf) {
103     return HFileOutputFormat2.createFamilyCompressionMap(conf);
104   }
105 
106   /**
107    * Runs inside the task to deserialize column family to bloom filter type
108    * map from the configuration.
109    *
110    * @param conf to read the serialized values from
111    * @return a map from column family to the the configured bloom filter type
112    */
113   @VisibleForTesting
114   static Map<byte[], BloomType> createFamilyBloomTypeMap(Configuration conf) {
115     return HFileOutputFormat2.createFamilyBloomTypeMap(conf);
116   }
117 
118   /**
119    * Runs inside the task to deserialize column family to block size
120    * map from the configuration.
121    *
122    * @param conf to read the serialized values from
123    * @return a map from column family to the configured block size
124    */
125   @VisibleForTesting
126   static Map<byte[], Integer> createFamilyBlockSizeMap(Configuration conf) {
127     return HFileOutputFormat2.createFamilyBlockSizeMap(conf);
128   }
129 
130   /**
131    * Runs inside the task to deserialize column family to data block encoding
132    * type map from the configuration.
133    *
134    * @param conf to read the serialized values from
135    * @return a map from column family to HFileDataBlockEncoder for the
136    *         configured data block type for the family
137    */
138   @VisibleForTesting
139   static Map<byte[], DataBlockEncoding> createFamilyDataBlockEncodingMap(
140       Configuration conf) {
141     return HFileOutputFormat2.createFamilyDataBlockEncodingMap(conf);
142   }
143 
144   /**
145    * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
146    * <code>splitPoints</code>. Cleans up the partitions file after job exists.
147    */
148   static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints)
149       throws IOException {
150     HFileOutputFormat2.configurePartitioner(job, splitPoints);
151   }
152 
153   /**
154    * Serialize column family to compression algorithm map to configuration.
155    * Invoked while configuring the MR job for incremental load.
156    *
157    * @param table to read the properties from
158    * @param conf to persist serialized values into
159    * @throws IOException
160    *           on failure to read column family descriptors
161    */
162   @edu.umd.cs.findbugs.annotations.SuppressWarnings(
163       value="RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE")
164   @VisibleForTesting
165   static void configureCompression(Table table, Configuration conf) throws IOException {
166     HFileOutputFormat2.configureCompression(table, conf);
167   }
168 
169   /**
170    * Serialize column family to block size map to configuration.
171    * Invoked while configuring the MR job for incremental load.
172    *
173    * @param table to read the properties from
174    * @param conf to persist serialized values into
175    * @throws IOException
176    *           on failure to read column family descriptors
177    */
178   @VisibleForTesting
179   static void configureBlockSize(Table table, Configuration conf) throws IOException {
180     HFileOutputFormat2.configureBlockSize(table, conf);
181   }
182 
183   /**
184    * Serialize column family to bloom type map to configuration.
185    * Invoked while configuring the MR job for incremental load.
186    *
187    * @param table to read the properties from
188    * @param conf to persist serialized values into
189    * @throws IOException
190    *           on failure to read column family descriptors
191    */
192   @VisibleForTesting
193   static void configureBloomType(Table table, Configuration conf) throws IOException {
194     HFileOutputFormat2.configureBloomType(table, conf);
195   }
196 
197   /**
198    * Serialize column family to data block encoding map to configuration.
199    * Invoked while configuring the MR job for incremental load.
200    *
201    * @param table to read the properties from
202    * @param conf to persist serialized values into
203    * @throws IOException
204    *           on failure to read column family descriptors
205    */
206   @VisibleForTesting
207   static void configureDataBlockEncoding(Table table,
208       Configuration conf) throws IOException {
209     HFileOutputFormat2.configureDataBlockEncoding(table, conf);
210   }
211 }