View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import java.io.IOException;
22  
23  import org.apache.commons.logging.Log;
24  import org.apache.commons.logging.LogFactory;
25  import org.apache.hadoop.hbase.classification.InterfaceAudience;
26  import org.apache.hadoop.hbase.classification.InterfaceStability;
27  import org.apache.hadoop.conf.Configurable;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.hbase.HBaseConfiguration;
30  import org.apache.hadoop.hbase.HConstants;
31  import org.apache.hadoop.hbase.TableName;
32  import org.apache.hadoop.hbase.client.BufferedMutator;
33  import org.apache.hadoop.hbase.client.Connection;
34  import org.apache.hadoop.hbase.client.ConnectionFactory;
35  import org.apache.hadoop.hbase.client.Delete;
36  import org.apache.hadoop.hbase.client.Mutation;
37  import org.apache.hadoop.hbase.client.Put;
38  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
39  import org.apache.hadoop.mapreduce.JobContext;
40  import org.apache.hadoop.mapreduce.OutputCommitter;
41  import org.apache.hadoop.mapreduce.OutputFormat;
42  import org.apache.hadoop.mapreduce.RecordWriter;
43  import org.apache.hadoop.mapreduce.TaskAttemptContext;
44  
45  /**
46   * Convert Map/Reduce output and write it to an HBase table. The KEY is ignored
47   * while the output value <u>must</u> be either a {@link Put} or a
48   * {@link Delete} instance.
49   */
50  @InterfaceAudience.Public
51  @InterfaceStability.Stable
52  public class TableOutputFormat<KEY> extends OutputFormat<KEY, Mutation>
53  implements Configurable {
54  
55    private static final Log LOG = LogFactory.getLog(TableOutputFormat.class);
56  
57    /** Job parameter that specifies the output table. */
58    public static final String OUTPUT_TABLE = "hbase.mapred.outputtable";
59  
60    /**
61     * Optional job parameter to specify a peer cluster.
62     * Used specifying remote cluster when copying between hbase clusters (the
63     * source is picked up from <code>hbase-site.xml</code>).
64     * @see TableMapReduceUtil#initTableReducerJob(String, Class, org.apache.hadoop.mapreduce.Job, Class, String, String, String)
65     */
66    public static final String QUORUM_ADDRESS = "hbase.mapred.output.quorum";
67  
68    /** Optional job parameter to specify peer cluster's ZK client port */
69    public static final String QUORUM_PORT = "hbase.mapred.output.quorum.port";
70  
71    /** Optional specification of the rs class name of the peer cluster */
72    public static final String
73        REGION_SERVER_CLASS = "hbase.mapred.output.rs.class";
74    /** Optional specification of the rs impl name of the peer cluster */
75    public static final String
76        REGION_SERVER_IMPL = "hbase.mapred.output.rs.impl";
77  
78    /** The configuration. */
79    private Configuration conf = null;
80  
81    /**
82     * Writes the reducer output to an HBase table.
83     */
84    protected class TableRecordWriter
85    extends RecordWriter<KEY, Mutation> {
86  
87      private Connection connection;
88      private BufferedMutator mutator;
89  
90      /**
91       * @throws IOException 
92       * 
93       */
94      public TableRecordWriter() throws IOException {
95        String tableName = conf.get(OUTPUT_TABLE);
96        this.connection = ConnectionFactory.createConnection(conf);
97        this.mutator = connection.getBufferedMutator(TableName.valueOf(tableName));
98        LOG.info("Created table instance for "  + tableName);
99      }
100     /**
101      * Closes the writer, in this case flush table commits.
102      *
103      * @param context  The context.
104      * @throws IOException When closing the writer fails.
105      * @see RecordWriter#close(TaskAttemptContext)
106      */
107     @Override
108     public void close(TaskAttemptContext context)
109     throws IOException {
110       mutator.close();
111       connection.close();
112     }
113 
114     /**
115      * Writes a key/value pair into the table.
116      *
117      * @param key  The key.
118      * @param value  The value.
119      * @throws IOException When writing fails.
120      * @see RecordWriter#write(Object, Object)
121      */
122     @Override
123     public void write(KEY key, Mutation value)
124     throws IOException {
125       if (!(value instanceof Put) && !(value instanceof Delete)) {
126         throw new IOException("Pass a Delete or a Put");
127       }
128       mutator.mutate(value);
129     }
130   }
131 
132   /**
133    * Creates a new record writer.
134    *
135    * @param context  The current task context.
136    * @return The newly created writer instance.
137    * @throws IOException When creating the writer fails.
138    * @throws InterruptedException When the jobs is cancelled.
139    */
140   @Override
141   public RecordWriter<KEY, Mutation> getRecordWriter(TaskAttemptContext context)
142   throws IOException, InterruptedException {
143     return new TableRecordWriter();
144   }
145 
146   /**
147    * Checks if the output target exists.
148    *
149    * @param context  The current context.
150    * @throws IOException When the check fails.
151    * @throws InterruptedException When the job is aborted.
152    * @see OutputFormat#checkOutputSpecs(JobContext)
153    */
154   @Override
155   public void checkOutputSpecs(JobContext context) throws IOException,
156       InterruptedException {
157     // TODO Check if the table exists?
158 
159   }
160 
161   /**
162    * Returns the output committer.
163    *
164    * @param context  The current context.
165    * @return The committer.
166    * @throws IOException When creating the committer fails.
167    * @throws InterruptedException When the job is aborted.
168    * @see OutputFormat#getOutputCommitter(TaskAttemptContext)
169    */
170   @Override
171   public OutputCommitter getOutputCommitter(TaskAttemptContext context)
172   throws IOException, InterruptedException {
173     return new TableOutputCommitter();
174   }
175 
176   @Override
177   public Configuration getConf() {
178     return conf;
179   }
180 
181   @Override
182   public void setConf(Configuration otherConf) {
183     this.conf = HBaseConfiguration.create(otherConf);
184 
185     String tableName = this.conf.get(OUTPUT_TABLE);
186     if(tableName == null || tableName.length() <= 0) {
187       throw new IllegalArgumentException("Must specify table name");
188     }
189 
190     String address = this.conf.get(QUORUM_ADDRESS);
191     int zkClientPort = this.conf.getInt(QUORUM_PORT, 0);
192     String serverClass = this.conf.get(REGION_SERVER_CLASS);
193     String serverImpl = this.conf.get(REGION_SERVER_IMPL);
194 
195     try {
196       if (address != null) {
197         ZKUtil.applyClusterKeyToConf(this.conf, address);
198       }
199       if (serverClass != null) {
200         this.conf.set(HConstants.REGION_SERVER_IMPL, serverImpl);
201       }
202       if (zkClientPort != 0) {
203         this.conf.setInt(HConstants.ZOOKEEPER_CLIENT_PORT, zkClientPort);
204       }
205     } catch(IOException e) {
206       LOG.error(e);
207       throw new RuntimeException(e);
208     }
209   }
210 }