001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.mapreduce; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertFalse; 022import static org.junit.Assert.assertNull; 023import static org.junit.Assert.assertTrue; 024import static org.junit.Assert.fail; 025 026import java.util.ArrayList; 027import org.apache.hadoop.hbase.HBaseClassTestRule; 028import org.apache.hadoop.hbase.HConstants; 029import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser; 030import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.BadTsvLineException; 031import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.ParsedLine; 032import org.apache.hadoop.hbase.testclassification.MapReduceTests; 033import org.apache.hadoop.hbase.testclassification.SmallTests; 034import org.apache.hadoop.hbase.util.Bytes; 035import org.apache.hadoop.hbase.util.Pair; 036import org.junit.ClassRule; 037import org.junit.Test; 038import org.junit.experimental.categories.Category; 039 040import org.apache.hbase.thirdparty.com.google.common.base.Joiner; 041import org.apache.hbase.thirdparty.com.google.common.base.Splitter; 042import org.apache.hbase.thirdparty.com.google.common.collect.Iterables; 043 044/** 045 * Tests for {@link TsvParser}. 046 */ 047@Category({ MapReduceTests.class, SmallTests.class }) 048public class TestImportTsvParser { 049 @ClassRule 050 public static final HBaseClassTestRule CLASS_RULE = 051 HBaseClassTestRule.forClass(TestImportTsvParser.class); 052 053 private void assertBytesEquals(byte[] a, byte[] b) { 054 assertEquals(Bytes.toStringBinary(a), Bytes.toStringBinary(b)); 055 } 056 057 private void checkParsing(ParsedLine parsed, Iterable<String> expected) { 058 ArrayList<String> parsedCols = new ArrayList<>(); 059 for (int i = 0; i < parsed.getColumnCount(); i++) { 060 parsedCols.add(Bytes.toString(parsed.getLineBytes(), parsed.getColumnOffset(i), 061 parsed.getColumnLength(i))); 062 } 063 if (!Iterables.elementsEqual(parsedCols, expected)) { 064 fail("Expected: " + Joiner.on(",").join(expected) + "\n" + "Got:" 065 + Joiner.on(",").join(parsedCols)); 066 } 067 } 068 069 @Test 070 public void testTsvParserSpecParsing() { 071 TsvParser parser; 072 073 parser = new TsvParser("HBASE_ROW_KEY", "\t"); 074 assertNull(parser.getFamily(0)); 075 assertNull(parser.getQualifier(0)); 076 assertEquals(0, parser.getRowKeyColumnIndex()); 077 assertFalse(parser.hasTimestamp()); 078 079 parser = new TsvParser("HBASE_ROW_KEY,col1:scol1", "\t"); 080 assertNull(parser.getFamily(0)); 081 assertNull(parser.getQualifier(0)); 082 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1)); 083 assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1)); 084 assertEquals(0, parser.getRowKeyColumnIndex()); 085 assertFalse(parser.hasTimestamp()); 086 087 parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,col1:scol2", "\t"); 088 assertNull(parser.getFamily(0)); 089 assertNull(parser.getQualifier(0)); 090 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1)); 091 assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1)); 092 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(2)); 093 assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(2)); 094 assertEquals(0, parser.getRowKeyColumnIndex()); 095 assertFalse(parser.hasTimestamp()); 096 097 parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,HBASE_TS_KEY,col1:scol2", "\t"); 098 assertNull(parser.getFamily(0)); 099 assertNull(parser.getQualifier(0)); 100 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1)); 101 assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1)); 102 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(3)); 103 assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(3)); 104 assertEquals(0, parser.getRowKeyColumnIndex()); 105 assertTrue(parser.hasTimestamp()); 106 assertEquals(2, parser.getTimestampKeyColumnIndex()); 107 108 parser = 109 new TsvParser("HBASE_ROW_KEY,col1:scol1,HBASE_TS_KEY,col1:scol2,HBASE_ATTRIBUTES_KEY", "\t"); 110 assertNull(parser.getFamily(0)); 111 assertNull(parser.getQualifier(0)); 112 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1)); 113 assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1)); 114 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(3)); 115 assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(3)); 116 assertEquals(0, parser.getRowKeyColumnIndex()); 117 assertTrue(parser.hasTimestamp()); 118 assertEquals(2, parser.getTimestampKeyColumnIndex()); 119 assertEquals(4, parser.getAttributesKeyColumnIndex()); 120 121 parser = 122 new TsvParser("HBASE_ATTRIBUTES_KEY,col1:scol1,HBASE_TS_KEY,col1:scol2,HBASE_ROW_KEY", "\t"); 123 assertNull(parser.getFamily(0)); 124 assertNull(parser.getQualifier(0)); 125 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1)); 126 assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1)); 127 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(3)); 128 assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(3)); 129 assertEquals(4, parser.getRowKeyColumnIndex()); 130 assertTrue(parser.hasTimestamp()); 131 assertEquals(2, parser.getTimestampKeyColumnIndex()); 132 assertEquals(0, parser.getAttributesKeyColumnIndex()); 133 } 134 135 @Test 136 public void testTsvParser() throws BadTsvLineException { 137 TsvParser parser = new TsvParser("col_a,col_b:qual,HBASE_ROW_KEY,col_d", "\t"); 138 assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(0)); 139 assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(0)); 140 assertBytesEquals(Bytes.toBytes("col_b"), parser.getFamily(1)); 141 assertBytesEquals(Bytes.toBytes("qual"), parser.getQualifier(1)); 142 assertNull(parser.getFamily(2)); 143 assertNull(parser.getQualifier(2)); 144 assertEquals(2, parser.getRowKeyColumnIndex()); 145 146 assertEquals(TsvParser.DEFAULT_TIMESTAMP_COLUMN_INDEX, parser.getTimestampKeyColumnIndex()); 147 148 byte[] line = Bytes.toBytes("val_a\tval_b\tval_c\tval_d"); 149 ParsedLine parsed = parser.parse(line, line.length); 150 checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line))); 151 } 152 153 @Test 154 public void testTsvParserWithTimestamp() throws BadTsvLineException { 155 TsvParser parser = new TsvParser("HBASE_ROW_KEY,HBASE_TS_KEY,col_a,", "\t"); 156 assertNull(parser.getFamily(0)); 157 assertNull(parser.getQualifier(0)); 158 assertNull(parser.getFamily(1)); 159 assertNull(parser.getQualifier(1)); 160 assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(2)); 161 assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(2)); 162 assertEquals(0, parser.getRowKeyColumnIndex()); 163 assertEquals(1, parser.getTimestampKeyColumnIndex()); 164 165 byte[] line = Bytes.toBytes("rowkey\t1234\tval_a"); 166 ParsedLine parsed = parser.parse(line, line.length); 167 assertEquals(1234L, parsed.getTimestamp(-1)); 168 checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line))); 169 } 170 171 /** 172 * Test cases that throw BadTsvLineException 173 */ 174 @Test(expected = BadTsvLineException.class) 175 public void testTsvParserBadTsvLineExcessiveColumns() throws BadTsvLineException { 176 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t"); 177 byte[] line = Bytes.toBytes("val_a\tval_b\tval_c"); 178 parser.parse(line, line.length); 179 } 180 181 @Test(expected = BadTsvLineException.class) 182 public void testTsvParserBadTsvLineZeroColumn() throws BadTsvLineException { 183 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t"); 184 byte[] line = Bytes.toBytes(""); 185 parser.parse(line, line.length); 186 } 187 188 @Test(expected = BadTsvLineException.class) 189 public void testTsvParserBadTsvLineOnlyKey() throws BadTsvLineException { 190 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t"); 191 byte[] line = Bytes.toBytes("key_only"); 192 parser.parse(line, line.length); 193 } 194 195 @Test(expected = BadTsvLineException.class) 196 public void testTsvParserBadTsvLineNoRowKey() throws BadTsvLineException { 197 TsvParser parser = new TsvParser("col_a,HBASE_ROW_KEY", "\t"); 198 byte[] line = Bytes.toBytes("only_cola_data_and_no_row_key"); 199 parser.parse(line, line.length); 200 } 201 202 @Test(expected = BadTsvLineException.class) 203 public void testTsvParserInvalidTimestamp() throws BadTsvLineException { 204 TsvParser parser = new TsvParser("HBASE_ROW_KEY,HBASE_TS_KEY,col_a,", "\t"); 205 assertEquals(1, parser.getTimestampKeyColumnIndex()); 206 byte[] line = Bytes.toBytes("rowkey\ttimestamp\tval_a"); 207 ParsedLine parsed = parser.parse(line, line.length); 208 assertEquals(-1, parsed.getTimestamp(-1)); 209 checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line))); 210 } 211 212 @Test(expected = BadTsvLineException.class) 213 public void testTsvParserNoTimestampValue() throws BadTsvLineException { 214 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY", "\t"); 215 assertEquals(2, parser.getTimestampKeyColumnIndex()); 216 byte[] line = Bytes.toBytes("rowkey\tval_a"); 217 parser.parse(line, line.length); 218 } 219 220 @Test 221 public void testTsvParserParseRowKey() throws BadTsvLineException { 222 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY", "\t"); 223 assertEquals(0, parser.getRowKeyColumnIndex()); 224 byte[] line = Bytes.toBytes("rowkey\tval_a\t1234"); 225 Pair<Integer, Integer> rowKeyOffsets = parser.parseRowKey(line, line.length); 226 assertEquals(0, rowKeyOffsets.getFirst().intValue()); 227 assertEquals(6, rowKeyOffsets.getSecond().intValue()); 228 try { 229 line = Bytes.toBytes("\t\tval_a\t1234"); 230 parser.parseRowKey(line, line.length); 231 fail("Should get BadTsvLineException on empty rowkey."); 232 } catch (BadTsvLineException ignored) { 233 } 234 235 parser = new TsvParser("col_a,HBASE_ROW_KEY,HBASE_TS_KEY", "\t"); 236 assertEquals(1, parser.getRowKeyColumnIndex()); 237 line = Bytes.toBytes("val_a\trowkey\t1234"); 238 rowKeyOffsets = parser.parseRowKey(line, line.length); 239 assertEquals(6, rowKeyOffsets.getFirst().intValue()); 240 assertEquals(6, rowKeyOffsets.getSecond().intValue()); 241 try { 242 line = Bytes.toBytes("val_a"); 243 rowKeyOffsets = parser.parseRowKey(line, line.length); 244 fail("Should get BadTsvLineException when number of columns less than rowkey position."); 245 } catch (BadTsvLineException ignored) { 246 } 247 248 parser = new TsvParser("col_a,HBASE_TS_KEY,HBASE_ROW_KEY", "\t"); 249 assertEquals(2, parser.getRowKeyColumnIndex()); 250 line = Bytes.toBytes("val_a\t1234\trowkey"); 251 rowKeyOffsets = parser.parseRowKey(line, line.length); 252 assertEquals(11, rowKeyOffsets.getFirst().intValue()); 253 assertEquals(6, rowKeyOffsets.getSecond().intValue()); 254 } 255 256 @Test 257 public void testTsvParseAttributesKey() throws BadTsvLineException { 258 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY,HBASE_ATTRIBUTES_KEY", "\t"); 259 assertEquals(0, parser.getRowKeyColumnIndex()); 260 byte[] line = Bytes.toBytes("rowkey\tval_a\t1234\tkey=>value"); 261 ParsedLine parse = parser.parse(line, line.length); 262 assertEquals(18, parse.getAttributeKeyOffset()); 263 assertEquals(3, parser.getAttributesKeyColumnIndex()); 264 String[] attributes = parse.getIndividualAttributes(); 265 assertEquals("key=>value", attributes[0]); 266 try { 267 line = Bytes.toBytes("rowkey\tval_a\t1234"); 268 parser.parse(line, line.length); 269 fail("Should get BadTsvLineException on empty rowkey."); 270 } catch (BadTsvLineException ignored) { 271 } 272 273 parser = new TsvParser("HBASE_ATTRIBUTES_KEY,col_a,HBASE_ROW_KEY,HBASE_TS_KEY", "\t"); 274 assertEquals(2, parser.getRowKeyColumnIndex()); 275 line = Bytes.toBytes("key=>value\tval_a\trowkey\t1234"); 276 parse = parser.parse(line, line.length); 277 assertEquals(0, parse.getAttributeKeyOffset()); 278 assertEquals(0, parser.getAttributesKeyColumnIndex()); 279 attributes = parse.getIndividualAttributes(); 280 assertEquals("key=>value", attributes[0]); 281 try { 282 line = Bytes.toBytes("val_a"); 283 ParsedLine parse2 = parser.parse(line, line.length); 284 fail("Should get BadTsvLineException when number of columns less than rowkey position."); 285 } catch (BadTsvLineException ignored) { 286 } 287 288 parser = new TsvParser("col_a,HBASE_ATTRIBUTES_KEY,HBASE_TS_KEY,HBASE_ROW_KEY", "\t"); 289 assertEquals(3, parser.getRowKeyColumnIndex()); 290 line = Bytes.toBytes("val_a\tkey0=>value0,key1=>value1,key2=>value2\t1234\trowkey"); 291 parse = parser.parse(line, line.length); 292 assertEquals(1, parser.getAttributesKeyColumnIndex()); 293 assertEquals(6, parse.getAttributeKeyOffset()); 294 String[] attr = parse.getIndividualAttributes(); 295 int i = 0; 296 for (String str : attr) { 297 assertEquals(("key" + i + "=>" + "value" + i), str); 298 i++; 299 } 300 } 301 302 @Test 303 public void testTsvParserWithCellVisibilityCol() throws BadTsvLineException { 304 TsvParser parser = new TsvParser( 305 "HBASE_ROW_KEY,col_a,HBASE_TS_KEY,HBASE_ATTRIBUTES_KEY,HBASE_CELL_VISIBILITY", "\t"); 306 assertEquals(0, parser.getRowKeyColumnIndex()); 307 assertEquals(4, parser.getCellVisibilityColumnIndex()); 308 byte[] line = Bytes.toBytes("rowkey\tval_a\t1234\tkey=>value\tPRIVATE&SECRET"); 309 ParsedLine parse = parser.parse(line, line.length); 310 assertEquals(18, parse.getAttributeKeyOffset()); 311 assertEquals(3, parser.getAttributesKeyColumnIndex()); 312 String[] attributes = parse.getIndividualAttributes(); 313 assertEquals("key=>value", attributes[0]); 314 assertEquals(29, parse.getCellVisibilityColumnOffset()); 315 } 316}