001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.mapreduce;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.List;
023
024import org.apache.yetus.audience.InterfaceAudience;
025import org.apache.hadoop.conf.Configurable;
026import org.apache.hadoop.conf.Configuration;
027import org.apache.hadoop.hbase.client.Scan;
028
029/**
030 * Convert HBase tabular data from multiple scanners into a format that
031 * is consumable by Map/Reduce.
032 *
033 * <p>
034 * Usage example
035 * </p>
036 *
037 * <pre>
038 * List&lt;Scan&gt; scans = new ArrayList&lt;Scan&gt;();
039 *
040 * Scan scan1 = new Scan();
041 * scan1.setStartRow(firstRow1);
042 * scan1.setStopRow(lastRow1);
043 * scan1.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, table1);
044 * scans.add(scan1);
045 *
046 * Scan scan2 = new Scan();
047 * scan2.setStartRow(firstRow2);
048 * scan2.setStopRow(lastRow2);
049 * scan1.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, table2);
050 * scans.add(scan2);
051 *
052 * TableMapReduceUtil.initTableMapperJob(scans, TableMapper.class, Text.class,
053 *     IntWritable.class, job);
054 * </pre>
055 */
056@InterfaceAudience.Public
057public class MultiTableInputFormat extends MultiTableInputFormatBase implements
058    Configurable {
059
060  /** Job parameter that specifies the scan list. */
061  public static final String SCANS = "hbase.mapreduce.scans";
062
063  /** The configuration. */
064  private Configuration conf = null;
065
066  /**
067   * Returns the current configuration.
068   *
069   * @return The current configuration.
070   * @see org.apache.hadoop.conf.Configurable#getConf()
071   */
072  @Override
073  public Configuration getConf() {
074    return conf;
075  }
076
077  /**
078   * Sets the configuration. This is used to set the details for the tables to
079   *  be scanned.
080   *
081   * @param configuration The configuration to set.
082   * @see org.apache.hadoop.conf.Configurable#setConf(
083   *        org.apache.hadoop.conf.Configuration)
084   */
085  @Override
086  public void setConf(Configuration configuration) {
087    this.conf = configuration;
088    String[] rawScans = conf.getStrings(SCANS);
089    if (rawScans.length <= 0) {
090      throw new IllegalArgumentException("There must be at least 1 scan configuration set to : "
091          + SCANS);
092    }
093    List<Scan> scans = new ArrayList<>();
094
095    for (int i = 0; i < rawScans.length; i++) {
096      try {
097        scans.add(TableMapReduceUtil.convertStringToScan(rawScans[i]));
098      } catch (IOException e) {
099        throw new RuntimeException("Failed to convert Scan : " + rawScans[i] + " to string", e);
100      }
101    }
102    this.setScans(scans);
103  }
104}