001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.mapreduce; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertFalse; 022import static org.junit.Assert.assertNull; 023import static org.junit.Assert.assertTrue; 024import static org.junit.Assert.fail; 025 026import java.util.ArrayList; 027import org.apache.hadoop.hbase.HBaseClassTestRule; 028import org.apache.hadoop.hbase.HConstants; 029import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser; 030import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.BadTsvLineException; 031import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.ParsedLine; 032import org.apache.hadoop.hbase.testclassification.MapReduceTests; 033import org.apache.hadoop.hbase.testclassification.SmallTests; 034import org.apache.hadoop.hbase.util.Bytes; 035import org.apache.hadoop.hbase.util.Pair; 036import org.junit.ClassRule; 037import org.junit.Test; 038import org.junit.experimental.categories.Category; 039 040import org.apache.hbase.thirdparty.com.google.common.base.Joiner; 041import org.apache.hbase.thirdparty.com.google.common.base.Splitter; 042import org.apache.hbase.thirdparty.com.google.common.collect.Iterables; 043 044/** 045 * Tests for {@link TsvParser}. 046 */ 047@Category({MapReduceTests.class, SmallTests.class}) 048public class TestImportTsvParser { 049 050 @ClassRule 051 public static final HBaseClassTestRule CLASS_RULE = 052 HBaseClassTestRule.forClass(TestImportTsvParser.class); 053 054 private void assertBytesEquals(byte[] a, byte[] b) { 055 assertEquals(Bytes.toStringBinary(a), Bytes.toStringBinary(b)); 056 } 057 058 private void checkParsing(ParsedLine parsed, Iterable<String> expected) { 059 ArrayList<String> parsedCols = new ArrayList<>(); 060 for (int i = 0; i < parsed.getColumnCount(); i++) { 061 parsedCols.add(Bytes.toString(parsed.getLineBytes(), parsed.getColumnOffset(i), 062 parsed.getColumnLength(i))); 063 } 064 if (!Iterables.elementsEqual(parsedCols, expected)) { 065 fail("Expected: " + Joiner.on(",").join(expected) + "\n" + "Got:" 066 + Joiner.on(",").join(parsedCols)); 067 } 068 } 069 070 @Test 071 public void testTsvParserSpecParsing() { 072 TsvParser parser; 073 074 parser = new TsvParser("HBASE_ROW_KEY", "\t"); 075 assertNull(parser.getFamily(0)); 076 assertNull(parser.getQualifier(0)); 077 assertEquals(0, parser.getRowKeyColumnIndex()); 078 assertFalse(parser.hasTimestamp()); 079 080 parser = new TsvParser("HBASE_ROW_KEY,col1:scol1", "\t"); 081 assertNull(parser.getFamily(0)); 082 assertNull(parser.getQualifier(0)); 083 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1)); 084 assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1)); 085 assertEquals(0, parser.getRowKeyColumnIndex()); 086 assertFalse(parser.hasTimestamp()); 087 088 parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,col1:scol2", "\t"); 089 assertNull(parser.getFamily(0)); 090 assertNull(parser.getQualifier(0)); 091 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1)); 092 assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1)); 093 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(2)); 094 assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(2)); 095 assertEquals(0, parser.getRowKeyColumnIndex()); 096 assertFalse(parser.hasTimestamp()); 097 098 parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,HBASE_TS_KEY,col1:scol2", "\t"); 099 assertNull(parser.getFamily(0)); 100 assertNull(parser.getQualifier(0)); 101 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1)); 102 assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1)); 103 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(3)); 104 assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(3)); 105 assertEquals(0, parser.getRowKeyColumnIndex()); 106 assertTrue(parser.hasTimestamp()); 107 assertEquals(2, parser.getTimestampKeyColumnIndex()); 108 109 parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,HBASE_TS_KEY,col1:scol2,HBASE_ATTRIBUTES_KEY", 110 "\t"); 111 assertNull(parser.getFamily(0)); 112 assertNull(parser.getQualifier(0)); 113 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1)); 114 assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1)); 115 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(3)); 116 assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(3)); 117 assertEquals(0, parser.getRowKeyColumnIndex()); 118 assertTrue(parser.hasTimestamp()); 119 assertEquals(2, parser.getTimestampKeyColumnIndex()); 120 assertEquals(4, parser.getAttributesKeyColumnIndex()); 121 122 parser = new TsvParser("HBASE_ATTRIBUTES_KEY,col1:scol1,HBASE_TS_KEY,col1:scol2,HBASE_ROW_KEY", 123 "\t"); 124 assertNull(parser.getFamily(0)); 125 assertNull(parser.getQualifier(0)); 126 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1)); 127 assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1)); 128 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(3)); 129 assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(3)); 130 assertEquals(4, parser.getRowKeyColumnIndex()); 131 assertTrue(parser.hasTimestamp()); 132 assertEquals(2, parser.getTimestampKeyColumnIndex()); 133 assertEquals(0, parser.getAttributesKeyColumnIndex()); 134 } 135 136 @Test 137 public void testTsvParser() throws BadTsvLineException { 138 TsvParser parser = new TsvParser("col_a,col_b:qual,HBASE_ROW_KEY,col_d", "\t"); 139 assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(0)); 140 assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(0)); 141 assertBytesEquals(Bytes.toBytes("col_b"), parser.getFamily(1)); 142 assertBytesEquals(Bytes.toBytes("qual"), parser.getQualifier(1)); 143 assertNull(parser.getFamily(2)); 144 assertNull(parser.getQualifier(2)); 145 assertEquals(2, parser.getRowKeyColumnIndex()); 146 147 assertEquals(TsvParser.DEFAULT_TIMESTAMP_COLUMN_INDEX, parser.getTimestampKeyColumnIndex()); 148 149 byte[] line = Bytes.toBytes("val_a\tval_b\tval_c\tval_d"); 150 ParsedLine parsed = parser.parse(line, line.length); 151 checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line))); 152 } 153 154 @Test 155 public void testTsvParserWithTimestamp() throws BadTsvLineException { 156 TsvParser parser = new TsvParser("HBASE_ROW_KEY,HBASE_TS_KEY,col_a,", "\t"); 157 assertNull(parser.getFamily(0)); 158 assertNull(parser.getQualifier(0)); 159 assertNull(parser.getFamily(1)); 160 assertNull(parser.getQualifier(1)); 161 assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(2)); 162 assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(2)); 163 assertEquals(0, parser.getRowKeyColumnIndex()); 164 assertEquals(1, parser.getTimestampKeyColumnIndex()); 165 166 byte[] line = Bytes.toBytes("rowkey\t1234\tval_a"); 167 ParsedLine parsed = parser.parse(line, line.length); 168 assertEquals(1234l, parsed.getTimestamp(-1)); 169 checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line))); 170 } 171 172 /** 173 * Test cases that throw BadTsvLineException 174 */ 175 @Test(expected = BadTsvLineException.class) 176 public void testTsvParserBadTsvLineExcessiveColumns() throws BadTsvLineException { 177 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t"); 178 byte[] line = Bytes.toBytes("val_a\tval_b\tval_c"); 179 parser.parse(line, line.length); 180 } 181 182 @Test(expected = BadTsvLineException.class) 183 public void testTsvParserBadTsvLineZeroColumn() throws BadTsvLineException { 184 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t"); 185 byte[] line = Bytes.toBytes(""); 186 parser.parse(line, line.length); 187 } 188 189 @Test(expected = BadTsvLineException.class) 190 public void testTsvParserBadTsvLineOnlyKey() throws BadTsvLineException { 191 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t"); 192 byte[] line = Bytes.toBytes("key_only"); 193 parser.parse(line, line.length); 194 } 195 196 @Test(expected = BadTsvLineException.class) 197 public void testTsvParserBadTsvLineNoRowKey() throws BadTsvLineException { 198 TsvParser parser = new TsvParser("col_a,HBASE_ROW_KEY", "\t"); 199 byte[] line = Bytes.toBytes("only_cola_data_and_no_row_key"); 200 parser.parse(line, line.length); 201 } 202 203 @Test(expected = BadTsvLineException.class) 204 public void testTsvParserInvalidTimestamp() throws BadTsvLineException { 205 TsvParser parser = new TsvParser("HBASE_ROW_KEY,HBASE_TS_KEY,col_a,", "\t"); 206 assertEquals(1, parser.getTimestampKeyColumnIndex()); 207 byte[] line = Bytes.toBytes("rowkey\ttimestamp\tval_a"); 208 ParsedLine parsed = parser.parse(line, line.length); 209 assertEquals(-1, parsed.getTimestamp(-1)); 210 checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line))); 211 } 212 213 @Test(expected = BadTsvLineException.class) 214 public void testTsvParserNoTimestampValue() throws BadTsvLineException { 215 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY", "\t"); 216 assertEquals(2, parser.getTimestampKeyColumnIndex()); 217 byte[] line = Bytes.toBytes("rowkey\tval_a"); 218 parser.parse(line, line.length); 219 } 220 221 @Test 222 public void testTsvParserParseRowKey() throws BadTsvLineException { 223 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY", "\t"); 224 assertEquals(0, parser.getRowKeyColumnIndex()); 225 byte[] line = Bytes.toBytes("rowkey\tval_a\t1234"); 226 Pair<Integer, Integer> rowKeyOffsets = parser.parseRowKey(line, line.length); 227 assertEquals(0, rowKeyOffsets.getFirst().intValue()); 228 assertEquals(6, rowKeyOffsets.getSecond().intValue()); 229 try { 230 line = Bytes.toBytes("\t\tval_a\t1234"); 231 parser.parseRowKey(line, line.length); 232 fail("Should get BadTsvLineException on empty rowkey."); 233 } catch (BadTsvLineException b) { 234 235 } 236 parser = new TsvParser("col_a,HBASE_ROW_KEY,HBASE_TS_KEY", "\t"); 237 assertEquals(1, parser.getRowKeyColumnIndex()); 238 line = Bytes.toBytes("val_a\trowkey\t1234"); 239 rowKeyOffsets = parser.parseRowKey(line, line.length); 240 assertEquals(6, rowKeyOffsets.getFirst().intValue()); 241 assertEquals(6, rowKeyOffsets.getSecond().intValue()); 242 try { 243 line = Bytes.toBytes("val_a"); 244 rowKeyOffsets = parser.parseRowKey(line, line.length); 245 fail("Should get BadTsvLineException when number of columns less than rowkey position."); 246 } catch (BadTsvLineException b) { 247 248 } 249 parser = new TsvParser("col_a,HBASE_TS_KEY,HBASE_ROW_KEY", "\t"); 250 assertEquals(2, parser.getRowKeyColumnIndex()); 251 line = Bytes.toBytes("val_a\t1234\trowkey"); 252 rowKeyOffsets = parser.parseRowKey(line, line.length); 253 assertEquals(11, rowKeyOffsets.getFirst().intValue()); 254 assertEquals(6, rowKeyOffsets.getSecond().intValue()); 255 } 256 257 @Test 258 public void testTsvParseAttributesKey() throws BadTsvLineException { 259 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY,HBASE_ATTRIBUTES_KEY", "\t"); 260 assertEquals(0, parser.getRowKeyColumnIndex()); 261 byte[] line = Bytes.toBytes("rowkey\tval_a\t1234\tkey=>value"); 262 ParsedLine parse = parser.parse(line, line.length); 263 assertEquals(18, parse.getAttributeKeyOffset()); 264 assertEquals(3, parser.getAttributesKeyColumnIndex()); 265 String attributes[] = parse.getIndividualAttributes(); 266 assertEquals(attributes[0], "key=>value"); 267 try { 268 line = Bytes.toBytes("rowkey\tval_a\t1234"); 269 parser.parse(line, line.length); 270 fail("Should get BadTsvLineException on empty rowkey."); 271 } catch (BadTsvLineException b) { 272 273 } 274 parser = new TsvParser("HBASE_ATTRIBUTES_KEY,col_a,HBASE_ROW_KEY,HBASE_TS_KEY", "\t"); 275 assertEquals(2, parser.getRowKeyColumnIndex()); 276 line = Bytes.toBytes("key=>value\tval_a\trowkey\t1234"); 277 parse = parser.parse(line, line.length); 278 assertEquals(0, parse.getAttributeKeyOffset()); 279 assertEquals(0, parser.getAttributesKeyColumnIndex()); 280 attributes = parse.getIndividualAttributes(); 281 assertEquals(attributes[0], "key=>value"); 282 try { 283 line = Bytes.toBytes("val_a"); 284 ParsedLine parse2 = parser.parse(line, line.length); 285 fail("Should get BadTsvLineException when number of columns less than rowkey position."); 286 } catch (BadTsvLineException b) { 287 288 } 289 parser = new TsvParser("col_a,HBASE_ATTRIBUTES_KEY,HBASE_TS_KEY,HBASE_ROW_KEY", "\t"); 290 assertEquals(3, parser.getRowKeyColumnIndex()); 291 line = Bytes.toBytes("val_a\tkey0=>value0,key1=>value1,key2=>value2\t1234\trowkey"); 292 parse = parser.parse(line, line.length); 293 assertEquals(1, parser.getAttributesKeyColumnIndex()); 294 assertEquals(6, parse.getAttributeKeyOffset()); 295 String[] attr = parse.getIndividualAttributes(); 296 int i = 0; 297 for(String str : attr) { 298 assertEquals(("key"+i+"=>"+"value"+i), str ); 299 i++; 300 } 301 } 302 303 @Test 304 public void testTsvParserWithCellVisibilityCol() throws BadTsvLineException { 305 TsvParser parser = new TsvParser( 306 "HBASE_ROW_KEY,col_a,HBASE_TS_KEY,HBASE_ATTRIBUTES_KEY,HBASE_CELL_VISIBILITY", "\t"); 307 assertEquals(0, parser.getRowKeyColumnIndex()); 308 assertEquals(4, parser.getCellVisibilityColumnIndex()); 309 byte[] line = Bytes.toBytes("rowkey\tval_a\t1234\tkey=>value\tPRIVATE&SECRET"); 310 ParsedLine parse = parser.parse(line, line.length); 311 assertEquals(18, parse.getAttributeKeyOffset()); 312 assertEquals(3, parser.getAttributesKeyColumnIndex()); 313 String attributes[] = parse.getIndividualAttributes(); 314 assertEquals(attributes[0], "key=>value"); 315 assertEquals(29, parse.getCellVisibilityColumnOffset()); 316 } 317 318}