View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.mapreduce;
19  
20  import java.io.IOException;
21  import java.util.ArrayList;
22  import java.util.List;
23  
24  import org.apache.hadoop.hbase.classification.InterfaceAudience;
25  import org.apache.hadoop.hbase.classification.InterfaceStability;
26  import org.apache.hadoop.conf.Configurable;
27  import org.apache.hadoop.conf.Configuration;
28  import org.apache.hadoop.hbase.client.Scan;
29  
30  /**
31   * Convert HBase tabular data from multiple scanners into a format that 
32   * is consumable by Map/Reduce.
33   *
34   * <p>
35   * Usage example
36   * </p>
37   *
38   * <pre>
39   * List<Scan> scans = new ArrayList<Scan>();
40   * 
41   * Scan scan1 = new Scan();
42   * scan1.setStartRow(firstRow1);
43   * scan1.setStopRow(lastRow1);
44   * scan1.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, table1);
45   * scans.add(scan1);
46   *
47   * Scan scan2 = new Scan();
48   * scan2.setStartRow(firstRow2);
49   * scan2.setStopRow(lastRow2);
50   * scan1.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, table2);
51   * scans.add(scan2);
52   *
53   * TableMapReduceUtil.initTableMapperJob(scans, TableMapper.class, Text.class,
54   *     IntWritable.class, job);
55   * </pre>
56   */
57  @InterfaceAudience.Public
58  @InterfaceStability.Evolving
59  public class MultiTableInputFormat extends MultiTableInputFormatBase implements
60      Configurable {
61  
62    /** Job parameter that specifies the scan list. */
63    public static final String SCANS = "hbase.mapreduce.scans";
64  
65    /** The configuration. */
66    private Configuration conf = null;
67  
68    /**
69     * Returns the current configuration.
70     *
71     * @return The current configuration.
72     * @see org.apache.hadoop.conf.Configurable#getConf()
73     */
74    @Override
75    public Configuration getConf() {
76      return conf;
77    }
78  
79    /**
80     * Sets the configuration. This is used to set the details for the tables to
81     *  be scanned.
82     *
83     * @param configuration The configuration to set.
84     * @see org.apache.hadoop.conf.Configurable#setConf(
85     *        org.apache.hadoop.conf.Configuration)
86     */
87    @Override
88    public void setConf(Configuration configuration) {
89      this.conf = configuration;
90      String[] rawScans = conf.getStrings(SCANS);
91      if (rawScans.length <= 0) {
92        throw new IllegalArgumentException("There must be at least 1 scan configuration set to : "
93            + SCANS);
94      }
95      List<Scan> scans = new ArrayList<Scan>();
96  
97      for (int i = 0; i < rawScans.length; i++) {
98        try {
99          scans.add(TableMapReduceUtil.convertStringToScan(rawScans[i]));
100       } catch (IOException e) {
101         throw new RuntimeException("Failed to convert Scan : " + rawScans[i] + " to string", e);
102       }
103     }
104     this.setScans(scans);
105   }
106 }