View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import java.io.IOException;
22  
23  import org.apache.commons.logging.Log;
24  import org.apache.commons.logging.LogFactory;
25  import org.apache.hadoop.hbase.classification.InterfaceAudience;
26  import org.apache.hadoop.hbase.classification.InterfaceStability;
27  import org.apache.hadoop.conf.Configurable;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.hbase.HBaseConfiguration;
30  import org.apache.hadoop.hbase.HConstants;
31  import org.apache.hadoop.hbase.TableName;
32  import org.apache.hadoop.hbase.client.BufferedMutator;
33  import org.apache.hadoop.hbase.client.Connection;
34  import org.apache.hadoop.hbase.client.ConnectionFactory;
35  import org.apache.hadoop.hbase.client.Delete;
36  import org.apache.hadoop.hbase.client.HTable;
37  import org.apache.hadoop.hbase.client.Mutation;
38  import org.apache.hadoop.hbase.client.Put;
39  import org.apache.hadoop.mapreduce.JobContext;
40  import org.apache.hadoop.mapreduce.OutputCommitter;
41  import org.apache.hadoop.mapreduce.OutputFormat;
42  import org.apache.hadoop.mapreduce.RecordWriter;
43  import org.apache.hadoop.mapreduce.TaskAttemptContext;
44  
45  /**
46   * Convert Map/Reduce output and write it to an HBase table. The KEY is ignored
47   * while the output value <u>must</u> be either a {@link Put} or a
48   * {@link Delete} instance.
49   */
50  @InterfaceAudience.Public
51  @InterfaceStability.Stable
52  public class TableOutputFormat<KEY> extends OutputFormat<KEY, Mutation>
53  implements Configurable {
54  
55    private static final Log LOG = LogFactory.getLog(TableOutputFormat.class);
56  
57    /** Job parameter that specifies the output table. */
58    public static final String OUTPUT_TABLE = "hbase.mapred.outputtable";
59  
60    /**
61     * Prefix for configuration property overrides to apply in {@link #setConf(Configuration)}.
62     * For keys matching this prefix, the prefix is stripped, and the value is set in the
63     * configuration with the resulting key, ie. the entry "hbase.mapred.output.key1 = value1"
64     * would be set in the configuration as "key1 = value1".  Use this to set properties
65     * which should only be applied to the {@code TableOutputFormat} configuration and not the
66     * input configuration.
67     */
68    public static final String OUTPUT_CONF_PREFIX = "hbase.mapred.output.";
69  
70    /**
71     * Optional job parameter to specify a peer cluster.
72     * Used specifying remote cluster when copying between hbase clusters (the
73     * source is picked up from <code>hbase-site.xml</code>).
74     * @see TableMapReduceUtil#initTableReducerJob(String, Class, org.apache.hadoop.mapreduce.Job, Class, String, String, String)
75     */
76    public static final String QUORUM_ADDRESS = OUTPUT_CONF_PREFIX + "quorum";
77  
78    /** Optional job parameter to specify peer cluster's ZK client port */
79    public static final String QUORUM_PORT = OUTPUT_CONF_PREFIX + "quorum.port";
80  
81    /** Optional specification of the rs class name of the peer cluster */
82    public static final String
83        REGION_SERVER_CLASS = OUTPUT_CONF_PREFIX + "rs.class";
84    /** Optional specification of the rs impl name of the peer cluster */
85    public static final String
86        REGION_SERVER_IMPL = OUTPUT_CONF_PREFIX + "rs.impl";
87  
88    /** The configuration. */
89    private Configuration conf = null;
90  
91    /**
92     * Writes the reducer output to an HBase table.
93     */
94    protected class TableRecordWriter
95    extends RecordWriter<KEY, Mutation> {
96  
97      private Connection connection;
98      private BufferedMutator mutator;
99  
100     /**
101      * @throws IOException 
102      * 
103      */
104     public TableRecordWriter() throws IOException {
105       String tableName = conf.get(OUTPUT_TABLE);
106       this.connection = ConnectionFactory.createConnection(conf);
107       this.mutator = connection.getBufferedMutator(TableName.valueOf(tableName));
108       LOG.info("Created table instance for "  + tableName);
109     }
110     /**
111      * Closes the writer, in this case flush table commits.
112      *
113      * @param context  The context.
114      * @throws IOException When closing the writer fails.
115      * @see RecordWriter#close(TaskAttemptContext)
116      */
117     @Override
118     public void close(TaskAttemptContext context)
119     throws IOException {
120       mutator.close();
121       connection.close();
122     }
123 
124     /**
125      * Writes a key/value pair into the table.
126      *
127      * @param key  The key.
128      * @param value  The value.
129      * @throws IOException When writing fails.
130      * @see RecordWriter#write(Object, Object)
131      */
132     @Override
133     public void write(KEY key, Mutation value)
134     throws IOException {
135       if (!(value instanceof Put) && !(value instanceof Delete)) {
136         throw new IOException("Pass a Delete or a Put");
137       }
138       mutator.mutate(value);
139     }
140   }
141 
142   /**
143    * Creates a new record writer.
144    *
145    * @param context  The current task context.
146    * @return The newly created writer instance.
147    * @throws IOException When creating the writer fails.
148    * @throws InterruptedException When the jobs is cancelled.
149    */
150   @Override
151   public RecordWriter<KEY, Mutation> getRecordWriter(TaskAttemptContext context)
152   throws IOException, InterruptedException {
153     return new TableRecordWriter();
154   }
155 
156   /**
157    * Checks if the output target exists.
158    *
159    * @param context  The current context.
160    * @throws IOException When the check fails.
161    * @throws InterruptedException When the job is aborted.
162    * @see OutputFormat#checkOutputSpecs(JobContext)
163    */
164   @Override
165   public void checkOutputSpecs(JobContext context) throws IOException,
166       InterruptedException {
167     // TODO Check if the table exists?
168 
169   }
170 
171   /**
172    * Returns the output committer.
173    *
174    * @param context  The current context.
175    * @return The committer.
176    * @throws IOException When creating the committer fails.
177    * @throws InterruptedException When the job is aborted.
178    * @see OutputFormat#getOutputCommitter(TaskAttemptContext)
179    */
180   @Override
181   public OutputCommitter getOutputCommitter(TaskAttemptContext context)
182   throws IOException, InterruptedException {
183     return new TableOutputCommitter();
184   }
185 
186   @Override
187   public Configuration getConf() {
188     return conf;
189   }
190 
191   @Override
192   public void setConf(Configuration otherConf) {
193     String tableName = otherConf.get(OUTPUT_TABLE);
194     if(tableName == null || tableName.length() <= 0) {
195       throw new IllegalArgumentException("Must specify table name");
196     }
197 
198     String address = otherConf.get(QUORUM_ADDRESS);
199     int zkClientPort = otherConf.getInt(QUORUM_PORT, 0);
200     String serverClass = otherConf.get(REGION_SERVER_CLASS);
201     String serverImpl = otherConf.get(REGION_SERVER_IMPL);
202 
203     try {
204       this.conf = HBaseConfiguration.createClusterConf(otherConf, address, OUTPUT_CONF_PREFIX);
205 
206       if (serverClass != null) {
207         this.conf.set(HConstants.REGION_SERVER_IMPL, serverImpl);
208       }
209       if (zkClientPort != 0) {
210         this.conf.setInt(HConstants.ZOOKEEPER_CLIENT_PORT, zkClientPort);
211       }
212     } catch(IOException e) {
213       LOG.error(e);
214       throw new RuntimeException(e);
215     }
216   }
217 }