View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import java.io.IOException;
22  
23  import org.apache.commons.logging.Log;
24  import org.apache.commons.logging.LogFactory;
25  import org.apache.hadoop.hbase.classification.InterfaceAudience;
26  import org.apache.hadoop.hbase.classification.InterfaceStability;
27  import org.apache.hadoop.conf.Configurable;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.hbase.HBaseConfiguration;
30  import org.apache.hadoop.hbase.HConstants;
31  import org.apache.hadoop.hbase.TableName;
32  import org.apache.hadoop.hbase.client.Delete;
33  import org.apache.hadoop.hbase.client.HTable;
34  import org.apache.hadoop.hbase.client.Mutation;
35  import org.apache.hadoop.hbase.client.Put;
36  import org.apache.hadoop.hbase.client.Table;
37  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
38  import org.apache.hadoop.mapreduce.JobContext;
39  import org.apache.hadoop.mapreduce.OutputCommitter;
40  import org.apache.hadoop.mapreduce.OutputFormat;
41  import org.apache.hadoop.mapreduce.RecordWriter;
42  import org.apache.hadoop.mapreduce.TaskAttemptContext;
43  
44  /**
45   * Convert Map/Reduce output and write it to an HBase table. The KEY is ignored
46   * while the output value <u>must</u> be either a {@link Put} or a
47   * {@link Delete} instance.
48   *
49   * @param <KEY>  The type of the key. Ignored in this class.
50   */
51  @InterfaceAudience.Public
52  @InterfaceStability.Stable
53  public class TableOutputFormat<KEY> extends OutputFormat<KEY, Mutation>
54  implements Configurable {
55  
56    private static final Log LOG = LogFactory.getLog(TableOutputFormat.class);
57  
58    /** Job parameter that specifies the output table. */
59    public static final String OUTPUT_TABLE = "hbase.mapred.outputtable";
60  
61    /**
62     * Optional job parameter to specify a peer cluster.
63     * Used specifying remote cluster when copying between hbase clusters (the
64     * source is picked up from <code>hbase-site.xml</code>).
65     * @see TableMapReduceUtil#initTableReducerJob(String, Class, org.apache.hadoop.mapreduce.Job, Class, String, String, String)
66     */
67    public static final String QUORUM_ADDRESS = "hbase.mapred.output.quorum";
68  
69    /** Optional job parameter to specify peer cluster's ZK client port */
70    public static final String QUORUM_PORT = "hbase.mapred.output.quorum.port";
71  
72    /** Optional specification of the rs class name of the peer cluster */
73    public static final String
74        REGION_SERVER_CLASS = "hbase.mapred.output.rs.class";
75    /** Optional specification of the rs impl name of the peer cluster */
76    public static final String
77        REGION_SERVER_IMPL = "hbase.mapred.output.rs.impl";
78  
79    /** The configuration. */
80    private Configuration conf = null;
81  
82    private HTable table;
83  
84    /**
85     * Writes the reducer output to an HBase table.
86     *
87     * @param <KEY>  The type of the key.
88     */
89    protected static class TableRecordWriter<KEY>
90    extends RecordWriter<KEY, Mutation> {
91  
92      /** The table to write to. */
93      private Table table;
94  
95      /**
96       * Instantiate a TableRecordWriter with the HBase HClient for writing.
97       *
98       * @param table  The table to write to.
99       */
100     public TableRecordWriter(Table table) {
101       this.table = table;
102     }
103 
104     /**
105      * Closes the writer, in this case flush table commits.
106      *
107      * @param context  The context.
108      * @throws IOException When closing the writer fails.
109      * @see org.apache.hadoop.mapreduce.RecordWriter#close(org.apache.hadoop.mapreduce.TaskAttemptContext)
110      */
111     @Override
112     public void close(TaskAttemptContext context)
113     throws IOException {
114       table.close();
115     }
116 
117     /**
118      * Writes a key/value pair into the table.
119      *
120      * @param key  The key.
121      * @param value  The value.
122      * @throws IOException When writing fails.
123      * @see org.apache.hadoop.mapreduce.RecordWriter#write(java.lang.Object, java.lang.Object)
124      */
125     @Override
126     public void write(KEY key, Mutation value)
127     throws IOException {
128       if (value instanceof Put) this.table.put(new Put((Put)value));
129       else if (value instanceof Delete) this.table.delete(new Delete((Delete)value));
130       else throw new IOException("Pass a Delete or a Put");
131     }
132   }
133 
134   /**
135    * Creates a new record writer.
136    *
137    * @param context  The current task context.
138    * @return The newly created writer instance.
139    * @throws IOException When creating the writer fails.
140    * @throws InterruptedException When the jobs is cancelled.
141    * @see org.apache.hadoop.mapreduce.lib.output.FileOutputFormat#getRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext)
142    */
143   @Override
144   public RecordWriter<KEY, Mutation> getRecordWriter(
145     TaskAttemptContext context)
146   throws IOException, InterruptedException {
147     return new TableRecordWriter<KEY>(this.table);
148   }
149 
150   /**
151    * Checks if the output target exists.
152    *
153    * @param context  The current context.
154    * @throws IOException When the check fails.
155    * @throws InterruptedException When the job is aborted.
156    * @see org.apache.hadoop.mapreduce.OutputFormat#checkOutputSpecs(org.apache.hadoop.mapreduce.JobContext)
157    */
158   @Override
159   public void checkOutputSpecs(JobContext context) throws IOException,
160       InterruptedException {
161     // TODO Check if the table exists?
162 
163   }
164 
165   /**
166    * Returns the output committer.
167    *
168    * @param context  The current context.
169    * @return The committer.
170    * @throws IOException When creating the committer fails.
171    * @throws InterruptedException When the job is aborted.
172    * @see org.apache.hadoop.mapreduce.OutputFormat#getOutputCommitter(org.apache.hadoop.mapreduce.TaskAttemptContext)
173    */
174   @Override
175   public OutputCommitter getOutputCommitter(TaskAttemptContext context)
176   throws IOException, InterruptedException {
177     return new TableOutputCommitter();
178   }
179 
180   public Configuration getConf() {
181     return conf;
182   }
183 
184   @Override
185   public void setConf(Configuration otherConf) {
186     this.conf = HBaseConfiguration.create(otherConf);
187 
188     String tableName = this.conf.get(OUTPUT_TABLE);
189     if(tableName == null || tableName.length() <= 0) {
190       throw new IllegalArgumentException("Must specify table name");
191     }
192 
193     String address = this.conf.get(QUORUM_ADDRESS);
194     int zkClientPort = this.conf.getInt(QUORUM_PORT, 0);
195     String serverClass = this.conf.get(REGION_SERVER_CLASS);
196     String serverImpl = this.conf.get(REGION_SERVER_IMPL);
197 
198     try {
199       if (address != null) {
200         ZKUtil.applyClusterKeyToConf(this.conf, address);
201       }
202       if (serverClass != null) {
203         this.conf.set(HConstants.REGION_SERVER_IMPL, serverImpl);
204       }
205       if (zkClientPort != 0) {
206         this.conf.setInt(HConstants.ZOOKEEPER_CLIENT_PORT, zkClientPort);
207       }
208       this.table = new HTable(this.conf, TableName.valueOf(tableName));
209       this.table.setAutoFlush(false, true);
210       LOG.info("Created table instance for "  + tableName);
211     } catch(IOException e) {
212       LOG.error(e);
213       throw new RuntimeException(e);
214     }
215   }
216 }