001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.hbase.mapreduce; 020 021import java.util.Set; 022 023import org.apache.hadoop.conf.Configuration; 024import org.apache.hadoop.fs.Path; 025import org.apache.hadoop.hbase.HBaseConfiguration; 026import org.apache.hadoop.hbase.IntegrationTestBase; 027import org.apache.hadoop.hbase.IntegrationTestingUtility; 028import org.apache.hadoop.hbase.testclassification.IntegrationTests; 029import org.apache.hadoop.hbase.TableName; 030import org.apache.hadoop.hbase.util.Bytes; 031import org.apache.hadoop.util.ToolRunner; 032import org.junit.After; 033import org.junit.Before; 034import org.junit.experimental.categories.Category; 035import org.slf4j.Logger; 036import org.slf4j.LoggerFactory; 037 038/** 039 * An integration test to test {@link TableSnapshotInputFormat} which enables 040 * reading directly from snapshot files without going through hbase servers. 041 * 042 * This test creates a table and loads the table with the rows ranging from 043 * 'aaa' to 'zzz', and for each row, sets the columns f1:(null) and f2:(null) to be 044 * the the same as the row value. 045 * <pre> 046 * aaa, f1: => aaa 047 * aaa, f2: => aaa 048 * aab, f1: => aab 049 * .... 050 * zzz, f2: => zzz 051 * </pre> 052 * 053 * Then the test creates a snapshot from this table, and overrides the values in the original 054 * table with values 'after_snapshot_value'. The test, then runs a mapreduce job over the snapshot 055 * with a scan start row 'bbb' and stop row 'yyy'. The data is saved in a single reduce output 056 * file, and 057 * inspected later to verify that the MR job has seen all the values from the snapshot. 058 * 059 * <p> These parameters can be used to configure the job: 060 * <br>"IntegrationTestTableSnapshotInputFormat.table" => the name of the table 061 * <br>"IntegrationTestTableSnapshotInputFormat.snapshot" => the name of the snapshot 062 * <br>"IntegrationTestTableSnapshotInputFormat.numRegions" => number of regions in the table 063 * to be created (default, 32). 064 * <br>"IntegrationTestTableSnapshotInputFormat.tableDir" => temporary directory to restore the 065 * snapshot files 066 * 067 */ 068@Category(IntegrationTests.class) 069// Not runnable as a unit test. See TestTableSnapshotInputFormat 070public class IntegrationTestTableSnapshotInputFormat extends IntegrationTestBase { 071 private static final Logger LOG = 072 LoggerFactory.getLogger(IntegrationTestTableSnapshotInputFormat.class); 073 074 private static final String TABLE_NAME_KEY = "IntegrationTestTableSnapshotInputFormat.table"; 075 private static final String DEFAULT_TABLE_NAME = "IntegrationTestTableSnapshotInputFormat"; 076 077 private static final String SNAPSHOT_NAME_KEY = 078 "IntegrationTestTableSnapshotInputFormat.snapshot"; 079 private static final String NUM_REGIONS_KEY = 080 "IntegrationTestTableSnapshotInputFormat.numRegions"; 081 082 private static final String MR_IMPLEMENTATION_KEY = 083 "IntegrationTestTableSnapshotInputFormat.API"; 084 private static final String MAPRED_IMPLEMENTATION = "mapred"; 085 private static final String MAPREDUCE_IMPLEMENTATION = "mapreduce"; 086 087 private static final int DEFAULT_NUM_REGIONS = 32; 088 private static final String TABLE_DIR_KEY = "IntegrationTestTableSnapshotInputFormat.tableDir"; 089 090 private static final byte[] START_ROW = Bytes.toBytes("bbb"); 091 private static final byte[] END_ROW = Bytes.toBytes("yyy"); 092 093 // mapred API missing feature pairity with mapreduce. See comments in 094 // mapred.TestTableSnapshotInputFormat 095 private static final byte[] MAPRED_START_ROW = Bytes.toBytes("aaa"); 096 private static final byte[] MAPRED_END_ROW = Bytes.toBytes("zz{"); // 'z' + 1 => '{' 097 098 private IntegrationTestingUtility util; 099 100 @Override 101 public void setConf(Configuration conf) { 102 super.setConf(conf); 103 util = getTestingUtil(conf); 104 } 105 106 @Override 107 @Before 108 public void setUp() throws Exception { 109 super.setUp(); 110 util = getTestingUtil(getConf()); 111 util.initializeCluster(1); 112 this.setConf(util.getConfiguration()); 113 } 114 115 @Override 116 @After 117 public void cleanUp() throws Exception { 118 util.restoreCluster(); 119 } 120 121 @Override 122 public void setUpCluster() throws Exception { 123 } 124 125 @Override 126 public int runTestFromCommandLine() throws Exception { 127 Configuration conf = getConf(); 128 TableName tableName = TableName.valueOf(conf.get(TABLE_NAME_KEY, DEFAULT_TABLE_NAME)); 129 String snapshotName = conf.get(SNAPSHOT_NAME_KEY, tableName.getQualifierAsString() 130 + "_snapshot_" + System.currentTimeMillis()); 131 int numRegions = conf.getInt(NUM_REGIONS_KEY, DEFAULT_NUM_REGIONS); 132 String tableDirStr = conf.get(TABLE_DIR_KEY); 133 Path tableDir; 134 if (tableDirStr == null) { 135 tableDir = util.getDataTestDirOnTestFS(tableName.getQualifierAsString()); 136 } else { 137 tableDir = new Path(tableDirStr); 138 } 139 140 final String mr = conf.get(MR_IMPLEMENTATION_KEY, MAPREDUCE_IMPLEMENTATION); 141 if (mr.equalsIgnoreCase(MAPREDUCE_IMPLEMENTATION)) { 142 /* 143 * We create the table using HBaseAdmin#createTable(), which will create the table 144 * with desired number of regions. We pass bbb as startKey and yyy as endKey, so if 145 * desiredNumRegions is > 2, we create regions empty - bbb and yyy - empty, and we 146 * create numRegions - 2 regions between bbb - yyy. The test uses a Scan with startRow 147 * bbb and endRow yyy, so, we expect the first and last region to be filtered out in 148 * the input format, and we expect numRegions - 2 splits between bbb and yyy. 149 */ 150 LOG.debug("Running job with mapreduce API."); 151 int expectedNumSplits = numRegions > 2 ? numRegions - 2 : numRegions; 152 153 org.apache.hadoop.hbase.mapreduce.TestTableSnapshotInputFormat.doTestWithMapReduce(util, 154 tableName, snapshotName, START_ROW, END_ROW, tableDir, numRegions, 1, 155 expectedNumSplits, false); 156 } else if (mr.equalsIgnoreCase(MAPRED_IMPLEMENTATION)) { 157 /* 158 * Similar considerations to above. The difference is that mapred API does not support 159 * specifying start/end rows (or a scan object at all). Thus the omission of first and 160 * last regions are not performed. See comments in mapred.TestTableSnapshotInputFormat 161 * for details of how that test works around the problem. This feature should be added 162 * in follow-on work. 163 */ 164 LOG.debug("Running job with mapred API."); 165 int expectedNumSplits = numRegions; 166 167 org.apache.hadoop.hbase.mapred.TestTableSnapshotInputFormat.doTestWithMapReduce(util, 168 tableName, snapshotName, MAPRED_START_ROW, MAPRED_END_ROW, tableDir, numRegions, 1, 169 expectedNumSplits, false); 170 } else { 171 throw new IllegalArgumentException("Unrecognized mapreduce implementation: " + mr +"."); 172 } 173 174 return 0; 175 } 176 177 @Override // Chaos Monkey is not intended to be run with this test 178 public TableName getTablename() { 179 return null; 180 } 181 182 @Override // Chaos Monkey is not inteded to be run with this test 183 protected Set<String> getColumnFamilies() { return null; } 184 185 public static void main(String[] args) throws Exception { 186 Configuration conf = HBaseConfiguration.create(); 187 IntegrationTestingUtility.setUseDistributedCluster(conf); 188 int ret = ToolRunner.run(conf, new IntegrationTestTableSnapshotInputFormat(), args); 189 System.exit(ret); 190 } 191 192}