001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.mapreduce; 019 020import java.io.IOException; 021import org.apache.hadoop.hbase.KeyValue; 022import org.apache.hadoop.hbase.client.Durability; 023import org.apache.hadoop.hbase.client.Put; 024import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 025import org.apache.hadoop.hbase.util.Bytes; 026import org.apache.hadoop.io.LongWritable; 027import org.apache.hadoop.io.Text; 028 029/** 030 * Dummy mapper used for unit tests to verify that the mapper can be injected. This approach would 031 * be used if a custom transformation needed to be done after reading the input data before writing 032 * it to HFiles. 033 */ 034public class TsvImporterCustomTestMapper extends TsvImporterMapper { 035 @Override 036 protected void setup(Context context) { 037 doSetup(context); 038 } 039 040 /** 041 * Convert a line of TSV text into an HBase table row after transforming the values by multiplying 042 * them by 3. 043 */ 044 @Override 045 public void map(LongWritable offset, Text value, Context context) throws IOException { 046 byte[] family = Bytes.toBytes("FAM"); 047 final byte[][] qualifiers = { Bytes.toBytes("A"), Bytes.toBytes("B") }; 048 049 // do some basic line parsing 050 byte[] lineBytes = value.getBytes(); 051 String[] valueTokens = new String(lineBytes, "UTF-8").split("\u001b"); 052 053 // create the rowKey and Put 054 ImmutableBytesWritable rowKey = new ImmutableBytesWritable(Bytes.toBytes(valueTokens[0])); 055 Put put = new Put(rowKey.copyBytes()); 056 put.setDurability(Durability.SKIP_WAL); 057 058 // The value should look like this: VALUE1 or VALUE2. Let's multiply 059 // the integer by 3 060 for (int i = 1; i < valueTokens.length; i++) { 061 String prefix = valueTokens[i].substring(0, "VALUE".length()); 062 String suffix = valueTokens[i].substring("VALUE".length()); 063 String newValue = prefix + Integer.parseInt(suffix) * 3; 064 065 KeyValue kv = 066 new KeyValue(rowKey.copyBytes(), family, qualifiers[i - 1], Bytes.toBytes(newValue)); 067 put.add(kv); 068 } 069 070 try { 071 context.write(rowKey, put); 072 } catch (InterruptedException e) { 073 e.printStackTrace(); 074 } 075 } 076}