View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import java.io.IOException;
22  import java.util.HashMap;
23  import java.util.Map;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.classification.InterfaceAudience;
28  import org.apache.hadoop.classification.InterfaceStability;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.hbase.HBaseConfiguration;
31  import org.apache.hadoop.hbase.client.Delete;
32  import org.apache.hadoop.hbase.client.HTable;
33  import org.apache.hadoop.hbase.client.Mutation;
34  import org.apache.hadoop.hbase.client.Put;
35  import org.apache.hadoop.hbase.client.Durability;
36  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
37  import org.apache.hadoop.hbase.util.Bytes;
38  import org.apache.hadoop.mapreduce.JobContext;
39  import org.apache.hadoop.mapreduce.OutputCommitter;
40  import org.apache.hadoop.mapreduce.OutputFormat;
41  import org.apache.hadoop.mapreduce.RecordWriter;
42  import org.apache.hadoop.mapreduce.TaskAttemptContext;
43  
44  /**
45   * <p>
46   * Hadoop output format that writes to one or more HBase tables. The key is
47   * taken to be the table name while the output value <em>must</em> be either a
48   * {@link Put} or a {@link Delete} instance. All tables must already exist, and
49   * all Puts and Deletes must reference only valid column families.
50   * </p>
51   *
52   * <p>
53   * Write-ahead logging (HLog) for Puts can be disabled by setting
54   * {@link #WAL_PROPERTY} to {@link #WAL_OFF}. Default value is {@link #WAL_ON}.
55   * Note that disabling write-ahead logging is only appropriate for jobs where
56   * loss of data due to region server failure can be tolerated (for example,
57   * because it is easy to rerun a bulk import).
58   * </p>
59   */
60  @InterfaceAudience.Public
61  @InterfaceStability.Stable
62  public class MultiTableOutputFormat extends OutputFormat<ImmutableBytesWritable, Mutation> {
63    /** Set this to {@link #WAL_OFF} to turn off write-ahead logging (HLog) */
64    public static final String WAL_PROPERTY = "hbase.mapreduce.multitableoutputformat.wal";
65    /** Property value to use write-ahead logging */
66    public static final boolean WAL_ON = true;
67    /** Property value to disable write-ahead logging */
68    public static final boolean WAL_OFF = false;
69    /**
70     * Record writer for outputting to multiple HTables.
71     */
72    protected static class MultiTableRecordWriter extends
73        RecordWriter<ImmutableBytesWritable, Mutation> {
74      private static final Log LOG = LogFactory.getLog(MultiTableRecordWriter.class);
75      Map<ImmutableBytesWritable, HTable> tables;
76      Configuration conf;
77      boolean useWriteAheadLogging;
78  
79      /**
80       * @param conf
81       *          HBaseConfiguration to used
82       * @param useWriteAheadLogging
83       *          whether to use write ahead logging. This can be turned off (
84       *          <tt>false</tt>) to improve performance when bulk loading data.
85       */
86      public MultiTableRecordWriter(Configuration conf,
87          boolean useWriteAheadLogging) {
88        LOG.debug("Created new MultiTableRecordReader with WAL "
89            + (useWriteAheadLogging ? "on" : "off"));
90        this.tables = new HashMap<ImmutableBytesWritable, HTable>();
91        this.conf = conf;
92        this.useWriteAheadLogging = useWriteAheadLogging;
93      }
94  
95      /**
96       * @param tableName
97       *          the name of the table, as a string
98       * @return the named table
99       * @throws IOException
100      *           if there is a problem opening a table
101      */
102     HTable getTable(ImmutableBytesWritable tableName) throws IOException {
103       if (!tables.containsKey(tableName)) {
104         LOG.debug("Opening HTable \"" + Bytes.toString(tableName.get())+ "\" for writing");
105         HTable table = new HTable(conf, tableName.get());
106         table.setAutoFlush(false, true);
107         tables.put(tableName, table);
108       }
109       return tables.get(tableName);
110     }
111 
112     @Override
113     public void close(TaskAttemptContext context) throws IOException {
114       for (HTable table : tables.values()) {
115         table.flushCommits();
116       }
117     }
118 
119     /**
120      * Writes an action (Put or Delete) to the specified table.
121      *
122      * @param tableName
123      *          the table being updated.
124      * @param action
125      *          the update, either a put or a delete.
126      * @throws IllegalArgumentException
127      *          if the action is not a put or a delete.
128      */
129     @Override
130     public void write(ImmutableBytesWritable tableName, Mutation action) throws IOException {
131       HTable table = getTable(tableName);
132       // The actions are not immutable, so we defensively copy them
133       if (action instanceof Put) {
134         Put put = new Put((Put) action);
135         put.setDurability(useWriteAheadLogging ? Durability.SYNC_WAL
136             : Durability.SKIP_WAL);
137         table.put(put);
138       } else if (action instanceof Delete) {
139         Delete delete = new Delete((Delete) action);
140         table.delete(delete);
141       } else
142         throw new IllegalArgumentException(
143             "action must be either Delete or Put");
144     }
145   }
146 
147   @Override
148   public void checkOutputSpecs(JobContext context) throws IOException,
149       InterruptedException {
150     // we can't know ahead of time if it's going to blow up when the user
151     // passes a table name that doesn't exist, so nothing useful here.
152   }
153 
154   @Override
155   public OutputCommitter getOutputCommitter(TaskAttemptContext context)
156       throws IOException, InterruptedException {
157     return new TableOutputCommitter();
158   }
159 
160   @Override
161   public RecordWriter<ImmutableBytesWritable, Mutation> getRecordWriter(TaskAttemptContext context)
162       throws IOException, InterruptedException {
163     Configuration conf = context.getConfiguration();
164     return new MultiTableRecordWriter(HBaseConfiguration.create(conf),
165         conf.getBoolean(WAL_PROPERTY, WAL_ON));
166   }
167 
168 }