View Javadoc

1   /**
2   *
3   * Licensed to the Apache Software Foundation (ASF) under one
4   * or more contributor license agreements.  See the NOTICE file
5   * distributed with this work for additional information
6   * regarding copyright ownership.  The ASF licenses this file
7   * to you under the Apache License, Version 2.0 (the
8   * "License"); you may not use this file except in compliance
9   * with the License.  You may obtain a copy of the License at
10  *
11  *     http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import java.io.IOException;
22  
23  import org.apache.commons.logging.Log;
24  import org.apache.commons.logging.LogFactory;
25  import org.apache.hadoop.classification.InterfaceAudience;
26  import org.apache.hadoop.classification.InterfaceStability;
27  import org.apache.hadoop.conf.Configuration;
28  import org.apache.hadoop.conf.Configured;
29  import org.apache.hadoop.fs.Path;
30  import org.apache.hadoop.hbase.HBaseConfiguration;
31  import org.apache.hadoop.hbase.client.Result;
32  import org.apache.hadoop.hbase.client.Scan;
33  import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
34  import org.apache.hadoop.hbase.filter.Filter;
35  import org.apache.hadoop.hbase.filter.IncompatibleFilterException;
36  import org.apache.hadoop.hbase.filter.PrefixFilter;
37  import org.apache.hadoop.hbase.filter.RegexStringComparator;
38  import org.apache.hadoop.hbase.filter.RowFilter;
39  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
40  import org.apache.hadoop.hbase.util.Bytes;
41  import org.apache.hadoop.mapreduce.Job;
42  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
43  import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
44  import org.apache.hadoop.util.GenericOptionsParser;
45  import org.apache.hadoop.util.Tool;
46  import org.apache.hadoop.util.ToolRunner;
47  
48  /**
49  * Export an HBase table.
50  * Writes content to sequence files up in HDFS.  Use {@link Import} to read it
51  * back in again.
52  */
53  @InterfaceAudience.Public
54  @InterfaceStability.Stable
55  public class Export extends Configured implements Tool {
56    private static final Log LOG = LogFactory.getLog(Export.class);
57    final static String NAME = "export";
58    final static String RAW_SCAN = "hbase.mapreduce.include.deleted.rows";
59    final static String EXPORT_BATCHING = "hbase.export.scanner.batch";
60  
61    private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name";
62  
63    /**
64     * Sets up the actual job.
65     *
66     * @param conf  The current configuration.
67     * @param args  The command line parameters.
68     * @return The newly created job.
69     * @throws IOException When setting up the job fails.
70     */
71    public static Job createSubmittableJob(Configuration conf, String[] args)
72    throws IOException {
73      String tableName = args[0];
74      Path outputDir = new Path(args[1]);
75      Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
76      job.setJobName(NAME + "_" + tableName);
77      job.setJarByClass(Export.class);
78      // Set optional scan parameters
79      Scan s = getConfiguredScanForJob(conf, args);
80      IdentityTableMapper.initJob(tableName, s, IdentityTableMapper.class, job);
81      // No reducers.  Just write straight to output files.
82      job.setNumReduceTasks(0);
83      job.setOutputFormatClass(SequenceFileOutputFormat.class);
84      job.setOutputKeyClass(ImmutableBytesWritable.class);
85      job.setOutputValueClass(Result.class);
86      FileOutputFormat.setOutputPath(job, outputDir); // job conf doesn't contain the conf so doesn't have a default fs.
87      return job;
88    }
89  
90    private static Scan getConfiguredScanForJob(Configuration conf, String[] args) throws IOException {
91      Scan s = new Scan();
92      // Optional arguments.
93      // Set Scan Versions
94      int versions = args.length > 2? Integer.parseInt(args[2]): 1;
95      s.setMaxVersions(versions);
96      // Set Scan Range
97      long startTime = args.length > 3? Long.parseLong(args[3]): 0L;
98      long endTime = args.length > 4? Long.parseLong(args[4]): Long.MAX_VALUE;
99      s.setTimeRange(startTime, endTime);
100     // Set cache blocks
101     s.setCacheBlocks(false);
102     // set Start and Stop row
103     if (conf.get(TableInputFormat.SCAN_ROW_START) != null) {
104       s.setStartRow(Bytes.toBytes(conf.get(TableInputFormat.SCAN_ROW_START)));
105     }
106     if (conf.get(TableInputFormat.SCAN_ROW_STOP) != null) {
107       s.setStopRow(Bytes.toBytes(conf.get(TableInputFormat.SCAN_ROW_STOP)));
108     }
109     // Set Scan Column Family
110     boolean raw = Boolean.parseBoolean(conf.get(RAW_SCAN));
111     if (raw) {
112       s.setRaw(raw);
113     }
114     
115     if (conf.get(TableInputFormat.SCAN_COLUMN_FAMILY) != null) {
116       s.addFamily(Bytes.toBytes(conf.get(TableInputFormat.SCAN_COLUMN_FAMILY)));
117     }
118     // Set RowFilter or Prefix Filter if applicable.
119     Filter exportFilter = getExportFilter(args);
120     if (exportFilter!= null) {
121         LOG.info("Setting Scan Filter for Export.");
122       s.setFilter(exportFilter);
123     }
124 
125     int batching = conf.getInt(EXPORT_BATCHING, -1);
126     if (batching !=  -1){
127       try {
128         s.setBatch(batching);
129       } catch (IncompatibleFilterException e) {
130         LOG.error("Batching could not be set", e);
131       }
132     }
133     LOG.info("versions=" + versions + ", starttime=" + startTime +
134       ", endtime=" + endTime + ", keepDeletedCells=" + raw);
135     return s;
136   }
137 
138   private static Filter getExportFilter(String[] args) {
139     Filter exportFilter = null;
140     String filterCriteria = (args.length > 5) ? args[5]: null;
141     if (filterCriteria == null) return null;
142     if (filterCriteria.startsWith("^")) {
143       String regexPattern = filterCriteria.substring(1, filterCriteria.length());
144       exportFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator(regexPattern));
145     } else {
146       exportFilter = new PrefixFilter(Bytes.toBytes(filterCriteria));
147     }
148     return exportFilter;
149   }
150 
151   /*
152    * @param errorMsg Error message.  Can be null.
153    */
154   private static void usage(final String errorMsg) {
155     if (errorMsg != null && errorMsg.length() > 0) {
156       System.err.println("ERROR: " + errorMsg);
157     }
158     System.err.println("Usage: Export [-D <property=value>]* <tablename> <outputdir> [<versions> " +
159       "[<starttime> [<endtime>]] [^[regex pattern] or [Prefix] to filter]]\n");
160     System.err.println("  Note: -D properties will be applied to the conf used. ");
161     System.err.println("  For example: ");
162     System.err.println("   -D mapreduce.output.fileoutputformat.compress=true");
163     System.err.println("   -D mapreduce.output.fileoutputformat.compress.codec=org.apache.hadoop.io.compress.GzipCodec");
164     System.err.println("   -D mapreduce.output.fileoutputformat.compress.type=BLOCK");
165     System.err.println("  Additionally, the following SCAN properties can be specified");
166     System.err.println("  to control/limit what is exported..");
167     System.err.println("   -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<familyName>");
168     System.err.println("   -D " + RAW_SCAN + "=true");
169     System.err.println("   -D " + TableInputFormat.SCAN_ROW_START + "=<ROWSTART>");
170     System.err.println("   -D " + TableInputFormat.SCAN_ROW_STOP + "=<ROWSTOP>");
171     System.err.println("   -D " + JOB_NAME_CONF_KEY
172         + "=jobName - use the specified mapreduce job name for the export");
173     System.err.println("For performance consider the following properties:\n"
174         + "   -Dhbase.client.scanner.caching=100\n"
175         + "   -Dmapreduce.map.speculative=false\n"
176         + "   -Dmapreduce.reduce.speculative=false");
177     System.err.println("For tables with very wide rows consider setting the batch size as below:\n"
178         + "   -D" + EXPORT_BATCHING + "=10");
179   }
180 
181 
182   @Override
183   public int run(String[] args) throws Exception {
184     String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
185     if (otherArgs.length < 2) {
186       usage("Wrong number of arguments: " + otherArgs.length);
187       return -1;
188     }
189     Job job = createSubmittableJob(getConf(), otherArgs);
190     return (job.waitForCompletion(true) ? 0 : 1);
191   }
192 
193   /**
194    * Main entry point.
195    * @param args The command line parameters.
196    * @throws Exception When running the job fails.
197    */
198   public static void main(String[] args) throws Exception {
199     int errCode = ToolRunner.run(HBaseConfiguration.create(), new Export(), args);
200     System.exit(errCode);
201   }
202 }