001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.mapreduce; 019 020import static org.junit.jupiter.api.Assertions.assertEquals; 021import static org.junit.jupiter.api.Assertions.assertFalse; 022import static org.junit.jupiter.api.Assertions.assertNull; 023import static org.junit.jupiter.api.Assertions.assertThrows; 024import static org.junit.jupiter.api.Assertions.assertTrue; 025import static org.junit.jupiter.api.Assertions.fail; 026 027import java.util.ArrayList; 028import org.apache.hadoop.hbase.HConstants; 029import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser; 030import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.BadTsvLineException; 031import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.ParsedLine; 032import org.apache.hadoop.hbase.testclassification.MapReduceTests; 033import org.apache.hadoop.hbase.testclassification.SmallTests; 034import org.apache.hadoop.hbase.util.Bytes; 035import org.apache.hadoop.hbase.util.Pair; 036import org.junit.jupiter.api.Tag; 037import org.junit.jupiter.api.Test; 038 039import org.apache.hbase.thirdparty.com.google.common.base.Joiner; 040import org.apache.hbase.thirdparty.com.google.common.base.Splitter; 041import org.apache.hbase.thirdparty.com.google.common.collect.Iterables; 042 043/** 044 * Tests for {@link TsvParser}. 045 */ 046@Tag(MapReduceTests.TAG) 047@Tag(SmallTests.TAG) 048public class TestImportTsvParser { 049 050 private void assertBytesEquals(byte[] a, byte[] b) { 051 assertEquals(Bytes.toStringBinary(a), Bytes.toStringBinary(b)); 052 } 053 054 private void checkParsing(ParsedLine parsed, Iterable<String> expected) { 055 ArrayList<String> parsedCols = new ArrayList<>(); 056 for (int i = 0; i < parsed.getColumnCount(); i++) { 057 parsedCols.add(Bytes.toString(parsed.getLineBytes(), parsed.getColumnOffset(i), 058 parsed.getColumnLength(i))); 059 } 060 if (!Iterables.elementsEqual(parsedCols, expected)) { 061 fail("Expected: " + Joiner.on(",").join(expected) + "\n" + "Got:" 062 + Joiner.on(",").join(parsedCols)); 063 } 064 } 065 066 @Test 067 public void testTsvParserSpecParsing() { 068 TsvParser parser; 069 070 parser = new TsvParser("HBASE_ROW_KEY", "\t"); 071 assertNull(parser.getFamily(0)); 072 assertNull(parser.getQualifier(0)); 073 assertEquals(0, parser.getRowKeyColumnIndex()); 074 assertFalse(parser.hasTimestamp()); 075 076 parser = new TsvParser("HBASE_ROW_KEY,col1:scol1", "\t"); 077 assertNull(parser.getFamily(0)); 078 assertNull(parser.getQualifier(0)); 079 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1)); 080 assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1)); 081 assertEquals(0, parser.getRowKeyColumnIndex()); 082 assertFalse(parser.hasTimestamp()); 083 084 parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,col1:scol2", "\t"); 085 assertNull(parser.getFamily(0)); 086 assertNull(parser.getQualifier(0)); 087 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1)); 088 assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1)); 089 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(2)); 090 assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(2)); 091 assertEquals(0, parser.getRowKeyColumnIndex()); 092 assertFalse(parser.hasTimestamp()); 093 094 parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,HBASE_TS_KEY,col1:scol2", "\t"); 095 assertNull(parser.getFamily(0)); 096 assertNull(parser.getQualifier(0)); 097 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1)); 098 assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1)); 099 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(3)); 100 assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(3)); 101 assertEquals(0, parser.getRowKeyColumnIndex()); 102 assertTrue(parser.hasTimestamp()); 103 assertEquals(2, parser.getTimestampKeyColumnIndex()); 104 105 parser = 106 new TsvParser("HBASE_ROW_KEY,col1:scol1,HBASE_TS_KEY,col1:scol2,HBASE_ATTRIBUTES_KEY", "\t"); 107 assertNull(parser.getFamily(0)); 108 assertNull(parser.getQualifier(0)); 109 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1)); 110 assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1)); 111 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(3)); 112 assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(3)); 113 assertEquals(0, parser.getRowKeyColumnIndex()); 114 assertTrue(parser.hasTimestamp()); 115 assertEquals(2, parser.getTimestampKeyColumnIndex()); 116 assertEquals(4, parser.getAttributesKeyColumnIndex()); 117 118 parser = 119 new TsvParser("HBASE_ATTRIBUTES_KEY,col1:scol1,HBASE_TS_KEY,col1:scol2,HBASE_ROW_KEY", "\t"); 120 assertNull(parser.getFamily(0)); 121 assertNull(parser.getQualifier(0)); 122 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1)); 123 assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1)); 124 assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(3)); 125 assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(3)); 126 assertEquals(4, parser.getRowKeyColumnIndex()); 127 assertTrue(parser.hasTimestamp()); 128 assertEquals(2, parser.getTimestampKeyColumnIndex()); 129 assertEquals(0, parser.getAttributesKeyColumnIndex()); 130 } 131 132 @Test 133 public void testTsvParser() throws BadTsvLineException { 134 TsvParser parser = new TsvParser("col_a,col_b:qual,HBASE_ROW_KEY,col_d", "\t"); 135 assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(0)); 136 assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(0)); 137 assertBytesEquals(Bytes.toBytes("col_b"), parser.getFamily(1)); 138 assertBytesEquals(Bytes.toBytes("qual"), parser.getQualifier(1)); 139 assertNull(parser.getFamily(2)); 140 assertNull(parser.getQualifier(2)); 141 assertEquals(2, parser.getRowKeyColumnIndex()); 142 143 assertEquals(TsvParser.DEFAULT_TIMESTAMP_COLUMN_INDEX, parser.getTimestampKeyColumnIndex()); 144 145 byte[] line = Bytes.toBytes("val_a\tval_b\tval_c\tval_d"); 146 ParsedLine parsed = parser.parse(line, line.length); 147 checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line))); 148 } 149 150 @Test 151 public void testTsvParserWithTimestamp() throws BadTsvLineException { 152 TsvParser parser = new TsvParser("HBASE_ROW_KEY,HBASE_TS_KEY,col_a,", "\t"); 153 assertNull(parser.getFamily(0)); 154 assertNull(parser.getQualifier(0)); 155 assertNull(parser.getFamily(1)); 156 assertNull(parser.getQualifier(1)); 157 assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(2)); 158 assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(2)); 159 assertEquals(0, parser.getRowKeyColumnIndex()); 160 assertEquals(1, parser.getTimestampKeyColumnIndex()); 161 162 byte[] line = Bytes.toBytes("rowkey\t1234\tval_a"); 163 ParsedLine parsed = parser.parse(line, line.length); 164 assertEquals(1234L, parsed.getTimestamp(-1)); 165 checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line))); 166 } 167 168 /** 169 * Test cases that throw BadTsvLineException 170 */ 171 @Test 172 public void testTsvParserBadTsvLineExcessiveColumns() throws BadTsvLineException { 173 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t"); 174 byte[] line = Bytes.toBytes("val_a\tval_b\tval_c"); 175 assertThrows(BadTsvLineException.class, () -> parser.parse(line, line.length)); 176 } 177 178 @Test 179 public void testTsvParserBadTsvLineZeroColumn() throws BadTsvLineException { 180 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t"); 181 byte[] line = Bytes.toBytes(""); 182 assertThrows(BadTsvLineException.class, () -> parser.parse(line, line.length)); 183 } 184 185 @Test 186 public void testTsvParserBadTsvLineOnlyKey() throws BadTsvLineException { 187 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a", "\t"); 188 byte[] line = Bytes.toBytes("key_only"); 189 assertThrows(BadTsvLineException.class, () -> parser.parse(line, line.length)); 190 } 191 192 @Test 193 public void testTsvParserBadTsvLineNoRowKey() throws BadTsvLineException { 194 TsvParser parser = new TsvParser("col_a,HBASE_ROW_KEY", "\t"); 195 byte[] line = Bytes.toBytes("only_cola_data_and_no_row_key"); 196 assertThrows(BadTsvLineException.class, () -> parser.parse(line, line.length)); 197 } 198 199 @Test 200 public void testTsvParserInvalidTimestamp() throws BadTsvLineException { 201 TsvParser parser = new TsvParser("HBASE_ROW_KEY,HBASE_TS_KEY,col_a,", "\t"); 202 assertEquals(1, parser.getTimestampKeyColumnIndex()); 203 byte[] line = Bytes.toBytes("rowkey\ttimestamp\tval_a"); 204 ParsedLine parsed = parser.parse(line, line.length); 205 assertThrows(BadTsvLineException.class, () -> parsed.getTimestamp(-1)); 206 checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line))); 207 } 208 209 @Test 210 public void testTsvParserNoTimestampValue() throws BadTsvLineException { 211 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY", "\t"); 212 assertEquals(2, parser.getTimestampKeyColumnIndex()); 213 byte[] line = Bytes.toBytes("rowkey\tval_a"); 214 assertThrows(BadTsvLineException.class, () -> parser.parse(line, line.length)); 215 } 216 217 @Test 218 public void testTsvParserParseRowKey() throws BadTsvLineException { 219 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY", "\t"); 220 assertEquals(0, parser.getRowKeyColumnIndex()); 221 byte[] line = Bytes.toBytes("rowkey\tval_a\t1234"); 222 Pair<Integer, Integer> rowKeyOffsets = parser.parseRowKey(line, line.length); 223 assertEquals(0, rowKeyOffsets.getFirst().intValue()); 224 assertEquals(6, rowKeyOffsets.getSecond().intValue()); 225 226 byte[] line2 = Bytes.toBytes("\t\tval_a\t1234"); 227 assertThrows(BadTsvLineException.class, () -> parser.parseRowKey(line2, line2.length)); 228 229 TsvParser parser2 = new TsvParser("col_a,HBASE_ROW_KEY,HBASE_TS_KEY", "\t"); 230 assertEquals(1, parser2.getRowKeyColumnIndex()); 231 line = Bytes.toBytes("val_a\trowkey\t1234"); 232 rowKeyOffsets = parser2.parseRowKey(line, line.length); 233 assertEquals(6, rowKeyOffsets.getFirst().intValue()); 234 assertEquals(6, rowKeyOffsets.getSecond().intValue()); 235 236 byte[] line3 = Bytes.toBytes("val_a"); 237 assertThrows(BadTsvLineException.class, () -> parser2.parseRowKey(line3, line3.length)); 238 239 TsvParser parser3 = new TsvParser("col_a,HBASE_TS_KEY,HBASE_ROW_KEY", "\t"); 240 assertEquals(2, parser3.getRowKeyColumnIndex()); 241 line = Bytes.toBytes("val_a\t1234\trowkey"); 242 rowKeyOffsets = parser3.parseRowKey(line, line.length); 243 assertEquals(11, rowKeyOffsets.getFirst().intValue()); 244 assertEquals(6, rowKeyOffsets.getSecond().intValue()); 245 } 246 247 @Test 248 public void testTsvParseAttributesKey() throws BadTsvLineException { 249 TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY,HBASE_ATTRIBUTES_KEY", "\t"); 250 assertEquals(0, parser.getRowKeyColumnIndex()); 251 byte[] line = Bytes.toBytes("rowkey\tval_a\t1234\tkey=>value"); 252 ParsedLine parse = parser.parse(line, line.length); 253 assertEquals(18, parse.getAttributeKeyOffset()); 254 assertEquals(3, parser.getAttributesKeyColumnIndex()); 255 String[] attributes = parse.getIndividualAttributes(); 256 assertEquals("key=>value", attributes[0]); 257 byte[] line2 = Bytes.toBytes("rowkey\tval_a\t1234"); 258 TsvParser finalParser = parser; 259 assertThrows(BadTsvLineException.class, () -> finalParser.parse(line2, line2.length)); 260 261 TsvParser parser2 = 262 new TsvParser("HBASE_ATTRIBUTES_KEY,col_a,HBASE_ROW_KEY,HBASE_TS_KEY", "\t"); 263 assertEquals(2, parser2.getRowKeyColumnIndex()); 264 line = Bytes.toBytes("key=>value\tval_a\trowkey\t1234"); 265 parse = parser2.parse(line, line.length); 266 assertEquals(0, parse.getAttributeKeyOffset()); 267 assertEquals(0, parser2.getAttributesKeyColumnIndex()); 268 attributes = parse.getIndividualAttributes(); 269 assertEquals("key=>value", attributes[0]); 270 byte[] line3 = Bytes.toBytes("val_a"); 271 assertThrows(BadTsvLineException.class, () -> parser2.parse(line3, line3.length)); 272 273 parser = new TsvParser("col_a,HBASE_ATTRIBUTES_KEY,HBASE_TS_KEY,HBASE_ROW_KEY", "\t"); 274 assertEquals(3, parser.getRowKeyColumnIndex()); 275 line = Bytes.toBytes("val_a\tkey0=>value0,key1=>value1,key2=>value2\t1234\trowkey"); 276 parse = parser.parse(line, line.length); 277 assertEquals(1, parser.getAttributesKeyColumnIndex()); 278 assertEquals(6, parse.getAttributeKeyOffset()); 279 String[] attr = parse.getIndividualAttributes(); 280 int i = 0; 281 for (String str : attr) { 282 assertEquals(("key" + i + "=>" + "value" + i), str); 283 i++; 284 } 285 } 286 287 @Test 288 public void testTsvParserWithCellVisibilityCol() throws BadTsvLineException { 289 TsvParser parser = new TsvParser( 290 "HBASE_ROW_KEY,col_a,HBASE_TS_KEY,HBASE_ATTRIBUTES_KEY,HBASE_CELL_VISIBILITY", "\t"); 291 assertEquals(0, parser.getRowKeyColumnIndex()); 292 assertEquals(4, parser.getCellVisibilityColumnIndex()); 293 byte[] line = Bytes.toBytes("rowkey\tval_a\t1234\tkey=>value\tPRIVATE&SECRET"); 294 ParsedLine parse = parser.parse(line, line.length); 295 assertEquals(18, parse.getAttributeKeyOffset()); 296 assertEquals(3, parser.getAttributesKeyColumnIndex()); 297 String[] attributes = parse.getIndividualAttributes(); 298 assertEquals("key=>value", attributes[0]); 299 assertEquals(29, parse.getCellVisibilityColumnOffset()); 300 } 301}