View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import java.io.IOException;
22  import java.util.List;
23  import java.util.Map;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.classification.InterfaceAudience;
28  import org.apache.hadoop.classification.InterfaceStability;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.hbase.KeyValue;
31  import org.apache.hadoop.hbase.client.HTable;
32  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
33  import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
34  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
35  import org.apache.hadoop.hbase.regionserver.BloomType;
36  import org.apache.hadoop.mapreduce.Job;
37  import org.apache.hadoop.mapreduce.RecordWriter;
38  import org.apache.hadoop.mapreduce.TaskAttemptContext;
39  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
40  
41  import com.google.common.annotations.VisibleForTesting;
42  
43  /**
44   * Writes HFiles. Passed KeyValues must arrive in order.
45   * Writes current time as the sequence id for the file. Sets the major compacted
46   * attribute on created hfiles. Calling write(null,null) will forcibly roll
47   * all HFiles being written.
48   * <p>
49   * Using this class as part of a MapReduce job is best done
50   * using {@link #configureIncrementalLoad(Job, HTable)}.
51   * @see KeyValueSortReducer
52   * @deprecated use {@link HFileOutputFormat2} instead.
53   */
54  @Deprecated
55  @InterfaceAudience.Public
56  @InterfaceStability.Stable
57  public class HFileOutputFormat extends FileOutputFormat<ImmutableBytesWritable, KeyValue> {
58    static Log LOG = LogFactory.getLog(HFileOutputFormat.class);
59  
60    // This constant is public since the client can modify this when setting
61    // up their conf object and thus refer to this symbol.
62    // It is present for backwards compatibility reasons. Use it only to
63    // override the auto-detection of datablock encoding.
64    public static final String DATABLOCK_ENCODING_OVERRIDE_CONF_KEY =
65      HFileOutputFormat2.DATABLOCK_ENCODING_OVERRIDE_CONF_KEY;
66  
67    public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(
68        final TaskAttemptContext context) throws IOException, InterruptedException {
69      return HFileOutputFormat2.createRecordWriter(context);
70    }
71  
72    /**
73     * Configure a MapReduce Job to perform an incremental load into the given
74     * table. This
75     * <ul>
76     *   <li>Inspects the table to configure a total order partitioner</li>
77     *   <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
78     *   <li>Sets the number of reduce tasks to match the current number of regions</li>
79     *   <li>Sets the output key/value class to match HFileOutputFormat's requirements</li>
80     *   <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
81     *     PutSortReducer)</li>
82     * </ul>
83     * The user should be sure to set the map output value class to either KeyValue or Put before
84     * running this function.
85     */
86    public static void configureIncrementalLoad(Job job, HTable table)
87        throws IOException {
88      HFileOutputFormat2.configureIncrementalLoad(job, table, HFileOutputFormat.class);
89    }
90  
91    /**
92     * Runs inside the task to deserialize column family to compression algorithm
93     * map from the configuration.
94     *
95     * @param conf to read the serialized values from
96     * @return a map from column family to the configured compression algorithm
97     */
98    @VisibleForTesting
99    static Map<byte[], Algorithm> createFamilyCompressionMap(Configuration
100       conf) {
101     return HFileOutputFormat2.createFamilyCompressionMap(conf);
102   }
103 
104   /**
105    * Runs inside the task to deserialize column family to bloom filter type
106    * map from the configuration.
107    *
108    * @param conf to read the serialized values from
109    * @return a map from column family to the the configured bloom filter type
110    */
111   @VisibleForTesting
112   static Map<byte[], BloomType> createFamilyBloomTypeMap(Configuration conf) {
113     return HFileOutputFormat2.createFamilyBloomTypeMap(conf);
114   }
115 
116   /**
117    * Runs inside the task to deserialize column family to block size
118    * map from the configuration.
119    *
120    * @param conf to read the serialized values from
121    * @return a map from column family to the configured block size
122    */
123   @VisibleForTesting
124   static Map<byte[], Integer> createFamilyBlockSizeMap(Configuration conf) {
125     return HFileOutputFormat2.createFamilyBlockSizeMap(conf);
126   }
127 
128   /**
129    * Runs inside the task to deserialize column family to data block encoding
130    * type map from the configuration.
131    *
132    * @param conf to read the serialized values from
133    * @return a map from column family to HFileDataBlockEncoder for the
134    *         configured data block type for the family
135    */
136   @VisibleForTesting
137   static Map<byte[], DataBlockEncoding> createFamilyDataBlockEncodingMap(
138       Configuration conf) {
139     return HFileOutputFormat2.createFamilyDataBlockEncodingMap(conf);
140   }
141 
142   /**
143    * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
144    * <code>splitPoints</code>. Cleans up the partitions file after job exists.
145    */
146   static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints)
147       throws IOException {
148     HFileOutputFormat2.configurePartitioner(job, splitPoints);
149   }
150 
151   /**
152    * Serialize column family to compression algorithm map to configuration.
153    * Invoked while configuring the MR job for incremental load.
154    *
155    * @param table to read the properties from
156    * @param conf to persist serialized values into
157    * @throws IOException
158    *           on failure to read column family descriptors
159    */
160   @edu.umd.cs.findbugs.annotations.SuppressWarnings(
161       value="RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE")
162   @VisibleForTesting
163   static void configureCompression(HTable table, Configuration conf) throws IOException {
164     HFileOutputFormat2.configureCompression(table, conf);
165   }
166 
167   /**
168    * Serialize column family to block size map to configuration.
169    * Invoked while configuring the MR job for incremental load.
170    *
171    * @param table to read the properties from
172    * @param conf to persist serialized values into
173    * @throws IOException
174    *           on failure to read column family descriptors
175    */
176   @VisibleForTesting
177   static void configureBlockSize(HTable table, Configuration conf) throws IOException {
178     HFileOutputFormat2.configureBlockSize(table, conf);
179   }
180 
181   /**
182    * Serialize column family to bloom type map to configuration.
183    * Invoked while configuring the MR job for incremental load.
184    *
185    * @param table to read the properties from
186    * @param conf to persist serialized values into
187    * @throws IOException
188    *           on failure to read column family descriptors
189    */
190   @VisibleForTesting
191   static void configureBloomType(HTable table, Configuration conf) throws IOException {
192     HFileOutputFormat2.configureBloomType(table, conf);
193   }
194 
195   /**
196    * Serialize column family to data block encoding map to configuration.
197    * Invoked while configuring the MR job for incremental load.
198    *
199    * @param table to read the properties from
200    * @param conf to persist serialized values into
201    * @throws IOException
202    *           on failure to read column family descriptors
203    */
204   @VisibleForTesting
205   static void configureDataBlockEncoding(HTable table,
206       Configuration conf) throws IOException {
207     HFileOutputFormat2.configureDataBlockEncoding(table, conf);
208   }
209 }