001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.List; 023import java.util.Random; 024 025import org.apache.hadoop.hbase.CompareOperator; 026import org.apache.hadoop.hbase.DoNotRetryIOException; 027import org.apache.hadoop.hbase.HBaseClassTestRule; 028import org.apache.hadoop.hbase.HBaseTestingUtility; 029import org.apache.hadoop.hbase.HColumnDescriptor; 030import org.apache.hadoop.hbase.HTableDescriptor; 031import org.apache.hadoop.hbase.StartMiniClusterOption; 032import org.apache.hadoop.hbase.TableName; 033import org.apache.hadoop.hbase.client.Admin; 034import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 035import org.apache.hadoop.hbase.client.Connection; 036import org.apache.hadoop.hbase.client.Put; 037import org.apache.hadoop.hbase.client.Result; 038import org.apache.hadoop.hbase.client.ResultScanner; 039import org.apache.hadoop.hbase.client.Scan; 040import org.apache.hadoop.hbase.client.Table; 041import org.apache.hadoop.hbase.client.TableDescriptor; 042import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 043import org.apache.hadoop.hbase.filter.CompareFilter; 044import org.apache.hadoop.hbase.filter.SingleColumnValueFilter; 045import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 046import org.apache.hadoop.hbase.testclassification.LargeTests; 047import org.apache.hadoop.hbase.testclassification.RegionServerTests; 048import org.apache.hadoop.hbase.util.Bytes; 049import org.junit.AfterClass; 050import org.junit.BeforeClass; 051import org.junit.ClassRule; 052import org.junit.Rule; 053import org.junit.Test; 054import org.junit.experimental.categories.Category; 055import org.junit.rules.TestName; 056import org.slf4j.Logger; 057import org.slf4j.LoggerFactory; 058import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine; 059import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLineParser; 060import org.apache.hbase.thirdparty.org.apache.commons.cli.GnuParser; 061import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter; 062import org.apache.hbase.thirdparty.org.apache.commons.cli.Option; 063import org.apache.hbase.thirdparty.org.apache.commons.cli.Options; 064 065/** 066 * Test performance improvement of joined scanners optimization: 067 * https://issues.apache.org/jira/browse/HBASE-5416 068 */ 069@Category({RegionServerTests.class, LargeTests.class}) 070public class TestJoinedScanners { 071 072 @ClassRule 073 public static final HBaseClassTestRule CLASS_RULE = 074 HBaseClassTestRule.forClass(TestJoinedScanners.class); 075 076 private static final Logger LOG = LoggerFactory.getLogger(TestJoinedScanners.class); 077 078 private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 079 080 private static final byte[] cf_essential = Bytes.toBytes("essential"); 081 private static final byte[] cf_joined = Bytes.toBytes("joined"); 082 private static final byte[] col_name = Bytes.toBytes("a"); 083 private static final byte[] flag_yes = Bytes.toBytes("Y"); 084 private static final byte[] flag_no = Bytes.toBytes("N"); 085 086 private static DataBlockEncoding blockEncoding = DataBlockEncoding.FAST_DIFF; 087 private static int selectionRatio = 30; 088 private static int valueWidth = 128 * 1024; 089 090 @Rule 091 public TestName name = new TestName(); 092 093 @BeforeClass 094 public static void setUpBeforeClass() throws Exception { 095 final int DEFAULT_BLOCK_SIZE = 1024 * 1024; 096 TEST_UTIL.getConfiguration().setLong("dfs.blocksize", DEFAULT_BLOCK_SIZE); 097 TEST_UTIL.getConfiguration().setInt("dfs.replication", 1); 098 TEST_UTIL.getConfiguration().setLong("hbase.hregion.max.filesize", 322122547200L); 099 100 String[] dataNodeHosts = new String[] {"host1", "host2", "host3"}; 101 int regionServersCount = 3; 102 StartMiniClusterOption option = StartMiniClusterOption.builder() 103 .numRegionServers(regionServersCount).dataNodeHosts(dataNodeHosts).build(); 104 TEST_UTIL.startMiniCluster(option); 105 } 106 107 @AfterClass 108 public static void tearDownAfterClass() throws Exception { 109 TEST_UTIL.shutdownMiniCluster(); 110 } 111 112 @Test 113 public void testJoinedScanners() throws Exception { 114 byte[][] families = {cf_essential, cf_joined}; 115 116 final TableName tableName = TableName.valueOf(name.getMethodName()); 117 HTableDescriptor desc = new HTableDescriptor(tableName); 118 for (byte[] family : families) { 119 HColumnDescriptor hcd = new HColumnDescriptor(family); 120 hcd.setDataBlockEncoding(blockEncoding); 121 desc.addFamily(hcd); 122 } 123 TEST_UTIL.getAdmin().createTable(desc); 124 Table ht = TEST_UTIL.getConnection().getTable(tableName); 125 126 long rows_to_insert = 1000; 127 int insert_batch = 20; 128 long time = System.nanoTime(); 129 Random rand = new Random(time); 130 131 LOG.info("Make " + Long.toString(rows_to_insert) + " rows, total size = " + Float 132 .toString(rows_to_insert * valueWidth / 1024 / 1024) + " MB"); 133 134 byte[] val_large = new byte[valueWidth]; 135 136 List<Put> puts = new ArrayList<>(); 137 138 for (long i = 0; i < rows_to_insert; i++) { 139 Put put = new Put(Bytes.toBytes(Long.toString(i))); 140 if (rand.nextInt(100) <= selectionRatio) { 141 put.addColumn(cf_essential, col_name, flag_yes); 142 } else { 143 put.addColumn(cf_essential, col_name, flag_no); 144 } 145 put.addColumn(cf_joined, col_name, val_large); 146 puts.add(put); 147 if (puts.size() >= insert_batch) { 148 ht.put(puts); 149 puts.clear(); 150 } 151 } 152 if (!puts.isEmpty()) { 153 ht.put(puts); 154 puts.clear(); 155 } 156 157 LOG.info("Data generated in " 158 + Double.toString((System.nanoTime() - time) / 1000000000.0) + " seconds"); 159 160 boolean slow = true; 161 for (int i = 0; i < 10; ++i) { 162 runScanner(ht, slow); 163 slow = !slow; 164 } 165 166 ht.close(); 167 } 168 169 private void runScanner(Table table, boolean slow) throws Exception { 170 long time = System.nanoTime(); 171 Scan scan = new Scan(); 172 scan.addColumn(cf_essential, col_name); 173 scan.addColumn(cf_joined, col_name); 174 175 SingleColumnValueFilter filter = new SingleColumnValueFilter( 176 cf_essential, col_name, CompareFilter.CompareOp.EQUAL, flag_yes); 177 filter.setFilterIfMissing(true); 178 scan.setFilter(filter); 179 scan.setLoadColumnFamiliesOnDemand(!slow); 180 181 ResultScanner result_scanner = table.getScanner(scan); 182 Result res; 183 long rows_count = 0; 184 while ((res = result_scanner.next()) != null) { 185 rows_count++; 186 } 187 188 double timeSec = (System.nanoTime() - time) / 1000000000.0; 189 result_scanner.close(); 190 LOG.info((slow ? "Slow" : "Joined") + " scanner finished in " + Double.toString(timeSec) 191 + " seconds, got " + Long.toString(rows_count/2) + " rows"); 192 } 193 194 private static Options options = new Options(); 195 196 /** 197 * Command line interface: 198 * @param args 199 * @throws IOException if there is a bug while reading from disk 200 */ 201 public static void main(final String[] args) throws Exception { 202 Option encodingOption = new Option("e", "blockEncoding", true, 203 "Data block encoding; Default: FAST_DIFF"); 204 encodingOption.setRequired(false); 205 options.addOption(encodingOption); 206 207 Option ratioOption = new Option("r", "selectionRatio", true, 208 "Ratio of selected rows using essential column family"); 209 ratioOption.setRequired(false); 210 options.addOption(ratioOption); 211 212 Option widthOption = new Option("w", "valueWidth", true, 213 "Width of value for non-essential column family"); 214 widthOption.setRequired(false); 215 options.addOption(widthOption); 216 217 CommandLineParser parser = new GnuParser(); 218 CommandLine cmd = parser.parse(options, args); 219 if (args.length < 1) { 220 HelpFormatter formatter = new HelpFormatter(); 221 formatter.printHelp("TestJoinedScanners", options, true); 222 } 223 224 if (cmd.hasOption("e")) { 225 blockEncoding = DataBlockEncoding.valueOf(cmd.getOptionValue("e")); 226 } 227 if (cmd.hasOption("r")) { 228 selectionRatio = Integer.parseInt(cmd.getOptionValue("r")); 229 } 230 if (cmd.hasOption("w")) { 231 valueWidth = Integer.parseInt(cmd.getOptionValue("w")); 232 } 233 // run the test 234 TestJoinedScanners test = new TestJoinedScanners(); 235 test.testJoinedScanners(); 236 } 237 238 @Test(expected = DoNotRetryIOException.class) 239 public void testWithReverseScan() throws Exception { 240 try (Connection con = TEST_UTIL.getConnection(); Admin admin = con.getAdmin()) { 241 TableName tableName = TableName.valueOf(name.getMethodName()); 242 243 TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName) 244 .setColumnFamily(ColumnFamilyDescriptorBuilder.of("cf1")) 245 .setColumnFamily(ColumnFamilyDescriptorBuilder.of("cf2")) 246 .build(); 247 admin.createTable(tableDescriptor); 248 249 try (Table table = con.getTable(tableName)) { 250 SingleColumnValueFilter filter = new SingleColumnValueFilter(Bytes.toBytes("cf1"), 251 Bytes.toBytes("col"), CompareOperator.EQUAL, Bytes.toBytes("val")); 252 filter.setFilterIfMissing(true); 253 254 // Reverse scan with loading CFs on demand 255 Scan scan = new Scan(); 256 scan.setFilter(filter); 257 scan.setReversed(true); 258 scan.setLoadColumnFamiliesOnDemand(true); 259 260 try (ResultScanner scanner = table.getScanner(scan)) { 261 // DoNotRetryIOException should occur 262 scanner.next(); 263 } 264 } 265 } 266 } 267}