View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import java.io.IOException;
22  
23  import org.apache.commons.logging.Log;
24  import org.apache.commons.logging.LogFactory;
25  import org.apache.hadoop.hbase.classification.InterfaceAudience;
26  import org.apache.hadoop.hbase.classification.InterfaceStability;
27  import org.apache.hadoop.conf.Configurable;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.hbase.HBaseConfiguration;
30  import org.apache.hadoop.hbase.HConstants;
31  import org.apache.hadoop.hbase.TableName;
32  import org.apache.hadoop.hbase.client.Connection;
33  import org.apache.hadoop.hbase.client.ConnectionFactory;
34  import org.apache.hadoop.hbase.client.Delete;
35  import org.apache.hadoop.hbase.client.HTable;
36  import org.apache.hadoop.hbase.client.Mutation;
37  import org.apache.hadoop.hbase.client.Put;
38  import org.apache.hadoop.hbase.client.Table;
39  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
40  import org.apache.hadoop.mapreduce.JobContext;
41  import org.apache.hadoop.mapreduce.OutputCommitter;
42  import org.apache.hadoop.mapreduce.OutputFormat;
43  import org.apache.hadoop.mapreduce.RecordWriter;
44  import org.apache.hadoop.mapreduce.TaskAttemptContext;
45  
46  /**
47   * Convert Map/Reduce output and write it to an HBase table. The KEY is ignored
48   * while the output value <u>must</u> be either a {@link Put} or a
49   * {@link Delete} instance.
50   *
51   */
52  @InterfaceAudience.Public
53  @InterfaceStability.Stable
54  public class TableOutputFormat<KEY> extends OutputFormat<KEY, Mutation>
55  implements Configurable {
56  
57    private static final Log LOG = LogFactory.getLog(TableOutputFormat.class);
58  
59    /** Job parameter that specifies the output table. */
60    public static final String OUTPUT_TABLE = "hbase.mapred.outputtable";
61  
62    /**
63     * Optional job parameter to specify a peer cluster.
64     * Used specifying remote cluster when copying between hbase clusters (the
65     * source is picked up from <code>hbase-site.xml</code>).
66     * @see TableMapReduceUtil#initTableReducerJob(String, Class, org.apache.hadoop.mapreduce.Job, Class, String, String, String)
67     */
68    public static final String QUORUM_ADDRESS = "hbase.mapred.output.quorum";
69  
70    /** Optional job parameter to specify peer cluster's ZK client port */
71    public static final String QUORUM_PORT = "hbase.mapred.output.quorum.port";
72  
73    /** Optional specification of the rs class name of the peer cluster */
74    public static final String
75        REGION_SERVER_CLASS = "hbase.mapred.output.rs.class";
76    /** Optional specification of the rs impl name of the peer cluster */
77    public static final String
78        REGION_SERVER_IMPL = "hbase.mapred.output.rs.impl";
79  
80    /** The configuration. */
81    private Configuration conf = null;
82  
83    private Table table;
84    private Connection connection;
85  
86    /**
87     * Writes the reducer output to an HBase table.
88     *
89     * @param <KEY>  The type of the key.
90     */
91    protected class TableRecordWriter
92    extends RecordWriter<KEY, Mutation> {
93  
94      /**
95       * Closes the writer, in this case flush table commits.
96       *
97       * @param context  The context.
98       * @throws IOException When closing the writer fails.
99       * @see org.apache.hadoop.mapreduce.RecordWriter#close(org.apache.hadoop.mapreduce.TaskAttemptContext)
100      */
101     @Override
102     public void close(TaskAttemptContext context)
103     throws IOException {
104       table.close();
105       connection.close();
106     }
107 
108     /**
109      * Writes a key/value pair into the table.
110      *
111      * @param key  The key.
112      * @param value  The value.
113      * @throws IOException When writing fails.
114      * @see org.apache.hadoop.mapreduce.RecordWriter#write(java.lang.Object, java.lang.Object)
115      */
116     @Override
117     public void write(KEY key, Mutation value)
118     throws IOException {
119       if (value instanceof Put) table.put(new Put((Put)value));
120       else if (value instanceof Delete) table.delete(new Delete((Delete)value));
121       else throw new IOException("Pass a Delete or a Put");
122     }
123   }
124 
125   /**
126    * Creates a new record writer.
127    *
128    * @param context  The current task context.
129    * @return The newly created writer instance.
130    * @throws IOException When creating the writer fails.
131    * @throws InterruptedException When the jobs is cancelled.
132    * @see org.apache.hadoop.mapreduce.lib.output.FileOutputFormat#getRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext)
133    */
134   @Override
135   public RecordWriter<KEY, Mutation> getRecordWriter(
136     TaskAttemptContext context)
137   throws IOException, InterruptedException {
138     return new TableRecordWriter();
139   }
140 
141   /**
142    * Checks if the output target exists.
143    *
144    * @param context  The current context.
145    * @throws IOException When the check fails.
146    * @throws InterruptedException When the job is aborted.
147    * @see org.apache.hadoop.mapreduce.OutputFormat#checkOutputSpecs(org.apache.hadoop.mapreduce.JobContext)
148    */
149   @Override
150   public void checkOutputSpecs(JobContext context) throws IOException,
151       InterruptedException {
152     // TODO Check if the table exists?
153 
154   }
155 
156   /**
157    * Returns the output committer.
158    *
159    * @param context  The current context.
160    * @return The committer.
161    * @throws IOException When creating the committer fails.
162    * @throws InterruptedException When the job is aborted.
163    * @see org.apache.hadoop.mapreduce.OutputFormat#getOutputCommitter(org.apache.hadoop.mapreduce.TaskAttemptContext)
164    */
165   @Override
166   public OutputCommitter getOutputCommitter(TaskAttemptContext context)
167   throws IOException, InterruptedException {
168     return new TableOutputCommitter();
169   }
170 
171   public Configuration getConf() {
172     return conf;
173   }
174 
175   @Override
176   public void setConf(Configuration otherConf) {
177     this.conf = HBaseConfiguration.create(otherConf);
178 
179     String tableName = this.conf.get(OUTPUT_TABLE);
180     if(tableName == null || tableName.length() <= 0) {
181       throw new IllegalArgumentException("Must specify table name");
182     }
183 
184     String address = this.conf.get(QUORUM_ADDRESS);
185     int zkClientPort = this.conf.getInt(QUORUM_PORT, 0);
186     String serverClass = this.conf.get(REGION_SERVER_CLASS);
187     String serverImpl = this.conf.get(REGION_SERVER_IMPL);
188 
189     try {
190       if (address != null) {
191         ZKUtil.applyClusterKeyToConf(this.conf, address);
192       }
193       if (serverClass != null) {
194         this.conf.set(HConstants.REGION_SERVER_IMPL, serverImpl);
195       }
196       if (zkClientPort != 0) {
197         this.conf.setInt(HConstants.ZOOKEEPER_CLIENT_PORT, zkClientPort);
198       }
199       this.connection = ConnectionFactory.createConnection(this.conf);
200       this.table = connection.getTable(TableName.valueOf(tableName));
201       ((HTable) this.table).setAutoFlush(false, true);
202       LOG.info("Created table instance for "  + tableName);
203     } catch(IOException e) {
204       LOG.error(e);
205       throw new RuntimeException(e);
206     }
207   }
208 }