View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import java.io.IOException;
22  import java.util.HashMap;
23  import java.util.Map;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.hbase.classification.InterfaceStability;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.hbase.HBaseConfiguration;
31  import org.apache.hadoop.hbase.TableName;
32  import org.apache.hadoop.hbase.client.BufferedMutator;
33  import org.apache.hadoop.hbase.client.Connection;
34  import org.apache.hadoop.hbase.client.ConnectionFactory;
35  import org.apache.hadoop.hbase.client.Delete;
36  import org.apache.hadoop.hbase.client.HTable;
37  import org.apache.hadoop.hbase.client.Mutation;
38  import org.apache.hadoop.hbase.client.Put;
39  import org.apache.hadoop.hbase.client.Durability;
40  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
41  import org.apache.hadoop.hbase.util.Bytes;
42  import org.apache.hadoop.mapreduce.JobContext;
43  import org.apache.hadoop.mapreduce.OutputCommitter;
44  import org.apache.hadoop.mapreduce.OutputFormat;
45  import org.apache.hadoop.mapreduce.RecordWriter;
46  import org.apache.hadoop.mapreduce.TaskAttemptContext;
47  
48  /**
49   * <p>
50   * Hadoop output format that writes to one or more HBase tables. The key is
51   * taken to be the table name while the output value <em>must</em> be either a
52   * {@link Put} or a {@link Delete} instance. All tables must already exist, and
53   * all Puts and Deletes must reference only valid column families.
54   * </p>
55   *
56   * <p>
57   * Write-ahead logging (WAL) for Puts can be disabled by setting
58   * {@link #WAL_PROPERTY} to {@link #WAL_OFF}. Default value is {@link #WAL_ON}.
59   * Note that disabling write-ahead logging is only appropriate for jobs where
60   * loss of data due to region server failure can be tolerated (for example,
61   * because it is easy to rerun a bulk import).
62   * </p>
63   */
64  @InterfaceAudience.Public
65  @InterfaceStability.Stable
66  public class MultiTableOutputFormat extends OutputFormat<ImmutableBytesWritable, Mutation> {
67    /** Set this to {@link #WAL_OFF} to turn off write-ahead logging (WAL) */
68    public static final String WAL_PROPERTY = "hbase.mapreduce.multitableoutputformat.wal";
69    /** Property value to use write-ahead logging */
70    public static final boolean WAL_ON = true;
71    /** Property value to disable write-ahead logging */
72    public static final boolean WAL_OFF = false;
73    /**
74     * Record writer for outputting to multiple HTables.
75     */
76    protected static class MultiTableRecordWriter extends
77        RecordWriter<ImmutableBytesWritable, Mutation> {
78      private static final Log LOG = LogFactory.getLog(MultiTableRecordWriter.class);
79      Connection connection;
80      Map<ImmutableBytesWritable, BufferedMutator> mutatorMap = new HashMap<>();
81      Configuration conf;
82      boolean useWriteAheadLogging;
83  
84      /**
85       * @param conf
86       *          HBaseConfiguration to used
87       * @param useWriteAheadLogging
88       *          whether to use write ahead logging. This can be turned off (
89       *          <tt>false</tt>) to improve performance when bulk loading data.
90       */
91      public MultiTableRecordWriter(Configuration conf,
92          boolean useWriteAheadLogging) {
93        LOG.debug("Created new MultiTableRecordReader with WAL "
94            + (useWriteAheadLogging ? "on" : "off"));
95        this.conf = conf;
96        this.useWriteAheadLogging = useWriteAheadLogging;
97      }
98  
99      /**
100      * @param tableName
101      *          the name of the table, as a string
102      * @return the named mutator
103      * @throws IOException
104      *           if there is a problem opening a table
105      */
106     BufferedMutator getBufferedMutator(ImmutableBytesWritable tableName) throws IOException {
107       if(this.connection == null){
108         this.connection = ConnectionFactory.createConnection(conf);
109       }
110       if (!mutatorMap.containsKey(tableName)) {
111         LOG.debug("Opening HTable \"" + Bytes.toString(tableName.get())+ "\" for writing");
112 
113         BufferedMutator mutator =
114             connection.getBufferedMutator(TableName.valueOf(tableName.get()));
115         mutatorMap.put(tableName, mutator);
116       }
117       return mutatorMap.get(tableName);
118     }
119 
120     @Override
121     public void close(TaskAttemptContext context) throws IOException {
122       for (BufferedMutator mutator : mutatorMap.values()) {
123         mutator.close();
124       }
125       if (connection != null) {
126         connection.close();
127       }
128     }
129 
130     /**
131      * Writes an action (Put or Delete) to the specified table.
132      *
133      * @param tableName
134      *          the table being updated.
135      * @param action
136      *          the update, either a put or a delete.
137      * @throws IllegalArgumentException
138      *          if the action is not a put or a delete.
139      */
140     @Override
141     public void write(ImmutableBytesWritable tableName, Mutation action) throws IOException {
142       BufferedMutator mutator = getBufferedMutator(tableName);
143       // The actions are not immutable, so we defensively copy them
144       if (action instanceof Put) {
145         Put put = new Put((Put) action);
146         put.setDurability(useWriteAheadLogging ? Durability.SYNC_WAL
147             : Durability.SKIP_WAL);
148         mutator.mutate(put);
149       } else if (action instanceof Delete) {
150         Delete delete = new Delete((Delete) action);
151         mutator.mutate(delete);
152       } else
153         throw new IllegalArgumentException(
154             "action must be either Delete or Put");
155     }
156   }
157 
158   @Override
159   public void checkOutputSpecs(JobContext context) throws IOException,
160       InterruptedException {
161     // we can't know ahead of time if it's going to blow up when the user
162     // passes a table name that doesn't exist, so nothing useful here.
163   }
164 
165   @Override
166   public OutputCommitter getOutputCommitter(TaskAttemptContext context)
167       throws IOException, InterruptedException {
168     return new TableOutputCommitter();
169   }
170 
171   @Override
172   public RecordWriter<ImmutableBytesWritable, Mutation> getRecordWriter(TaskAttemptContext context)
173       throws IOException, InterruptedException {
174     Configuration conf = context.getConfiguration();
175     return new MultiTableRecordWriter(HBaseConfiguration.create(conf),
176         conf.getBoolean(WAL_PROPERTY, WAL_ON));
177   }
178 
179 }