001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.List;
023import java.util.Random;
024
025import org.apache.hadoop.hbase.CompareOperator;
026import org.apache.hadoop.hbase.DoNotRetryIOException;
027import org.apache.hadoop.hbase.HBaseClassTestRule;
028import org.apache.hadoop.hbase.HBaseTestingUtility;
029import org.apache.hadoop.hbase.HColumnDescriptor;
030import org.apache.hadoop.hbase.HTableDescriptor;
031import org.apache.hadoop.hbase.StartMiniClusterOption;
032import org.apache.hadoop.hbase.TableName;
033import org.apache.hadoop.hbase.client.Admin;
034import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
035import org.apache.hadoop.hbase.client.Connection;
036import org.apache.hadoop.hbase.client.Put;
037import org.apache.hadoop.hbase.client.Result;
038import org.apache.hadoop.hbase.client.ResultScanner;
039import org.apache.hadoop.hbase.client.Scan;
040import org.apache.hadoop.hbase.client.Table;
041import org.apache.hadoop.hbase.client.TableDescriptor;
042import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
043import org.apache.hadoop.hbase.filter.CompareFilter;
044import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
045import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
046import org.apache.hadoop.hbase.testclassification.LargeTests;
047import org.apache.hadoop.hbase.testclassification.RegionServerTests;
048import org.apache.hadoop.hbase.util.Bytes;
049import org.junit.AfterClass;
050import org.junit.BeforeClass;
051import org.junit.ClassRule;
052import org.junit.Rule;
053import org.junit.Test;
054import org.junit.experimental.categories.Category;
055import org.junit.rules.TestName;
056import org.slf4j.Logger;
057import org.slf4j.LoggerFactory;
058import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine;
059import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLineParser;
060import org.apache.hbase.thirdparty.org.apache.commons.cli.GnuParser;
061import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter;
062import org.apache.hbase.thirdparty.org.apache.commons.cli.Option;
063import org.apache.hbase.thirdparty.org.apache.commons.cli.Options;
064
065/**
066 * Test performance improvement of joined scanners optimization:
067 * https://issues.apache.org/jira/browse/HBASE-5416
068 */
069@Category({RegionServerTests.class, LargeTests.class})
070public class TestJoinedScanners {
071
072  @ClassRule
073  public static final HBaseClassTestRule CLASS_RULE =
074      HBaseClassTestRule.forClass(TestJoinedScanners.class);
075
076  private static final Logger LOG = LoggerFactory.getLogger(TestJoinedScanners.class);
077
078  private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
079
080  private static final byte[] cf_essential = Bytes.toBytes("essential");
081  private static final byte[] cf_joined = Bytes.toBytes("joined");
082  private static final byte[] col_name = Bytes.toBytes("a");
083  private static final byte[] flag_yes = Bytes.toBytes("Y");
084  private static final byte[] flag_no = Bytes.toBytes("N");
085
086  private static DataBlockEncoding blockEncoding = DataBlockEncoding.FAST_DIFF;
087  private static int selectionRatio = 30;
088  private static int valueWidth = 128 * 1024;
089
090  @Rule
091  public TestName name = new TestName();
092
093  @BeforeClass
094  public static void setUpBeforeClass() throws Exception {
095    final int DEFAULT_BLOCK_SIZE = 1024 * 1024;
096    TEST_UTIL.getConfiguration().setLong("dfs.blocksize", DEFAULT_BLOCK_SIZE);
097    TEST_UTIL.getConfiguration().setInt("dfs.replication", 1);
098    TEST_UTIL.getConfiguration().setLong("hbase.hregion.max.filesize", 322122547200L);
099
100    String[] dataNodeHosts = new String[] {"host1", "host2", "host3"};
101    int regionServersCount = 3;
102    StartMiniClusterOption option = StartMiniClusterOption.builder()
103        .numRegionServers(regionServersCount).dataNodeHosts(dataNodeHosts).build();
104    TEST_UTIL.startMiniCluster(option);
105  }
106
107  @AfterClass
108  public static void tearDownAfterClass() throws Exception {
109    TEST_UTIL.shutdownMiniCluster();
110  }
111
112  @Test
113  public void testJoinedScanners() throws Exception {
114    byte[][] families = {cf_essential, cf_joined};
115
116    final TableName tableName = TableName.valueOf(name.getMethodName());
117    HTableDescriptor desc = new HTableDescriptor(tableName);
118    for (byte[] family : families) {
119      HColumnDescriptor hcd = new HColumnDescriptor(family);
120      hcd.setDataBlockEncoding(blockEncoding);
121      desc.addFamily(hcd);
122    }
123    TEST_UTIL.getAdmin().createTable(desc);
124    Table ht = TEST_UTIL.getConnection().getTable(tableName);
125
126    long rows_to_insert = 1000;
127    int insert_batch = 20;
128    long time = System.nanoTime();
129    Random rand = new Random(time);
130
131    LOG.info("Make " + Long.toString(rows_to_insert) + " rows, total size = " + Float
132      .toString(rows_to_insert * valueWidth / 1024 / 1024) + " MB");
133
134    byte[] val_large = new byte[valueWidth];
135
136    List<Put> puts = new ArrayList<>();
137
138    for (long i = 0; i < rows_to_insert; i++) {
139      Put put = new Put(Bytes.toBytes(Long.toString(i)));
140      if (rand.nextInt(100) <= selectionRatio) {
141        put.addColumn(cf_essential, col_name, flag_yes);
142      } else {
143        put.addColumn(cf_essential, col_name, flag_no);
144      }
145      put.addColumn(cf_joined, col_name, val_large);
146      puts.add(put);
147      if (puts.size() >= insert_batch) {
148        ht.put(puts);
149        puts.clear();
150      }
151    }
152    if (!puts.isEmpty()) {
153      ht.put(puts);
154      puts.clear();
155    }
156
157    LOG.info("Data generated in "
158      + Double.toString((System.nanoTime() - time) / 1000000000.0) + " seconds");
159
160    boolean slow = true;
161    for (int i = 0; i < 10; ++i) {
162      runScanner(ht, slow);
163      slow = !slow;
164    }
165
166    ht.close();
167  }
168
169  private void runScanner(Table table, boolean slow) throws Exception {
170    long time = System.nanoTime();
171    Scan scan = new Scan();
172    scan.addColumn(cf_essential, col_name);
173    scan.addColumn(cf_joined, col_name);
174
175    SingleColumnValueFilter filter = new SingleColumnValueFilter(
176        cf_essential, col_name, CompareFilter.CompareOp.EQUAL, flag_yes);
177    filter.setFilterIfMissing(true);
178    scan.setFilter(filter);
179    scan.setLoadColumnFamiliesOnDemand(!slow);
180
181    ResultScanner result_scanner = table.getScanner(scan);
182    Result res;
183    long rows_count = 0;
184    while ((res = result_scanner.next()) != null) {
185      rows_count++;
186    }
187
188    double timeSec = (System.nanoTime() - time) / 1000000000.0;
189    result_scanner.close();
190    LOG.info((slow ? "Slow" : "Joined") + " scanner finished in " + Double.toString(timeSec)
191      + " seconds, got " + Long.toString(rows_count/2) + " rows");
192  }
193
194  private static Options options = new Options();
195
196  /**
197   * Command line interface:
198   * @param args
199   * @throws IOException if there is a bug while reading from disk
200   */
201  public static void main(final String[] args) throws Exception {
202    Option encodingOption = new Option("e", "blockEncoding", true,
203      "Data block encoding; Default: FAST_DIFF");
204    encodingOption.setRequired(false);
205    options.addOption(encodingOption);
206
207    Option ratioOption = new Option("r", "selectionRatio", true,
208      "Ratio of selected rows using essential column family");
209    ratioOption.setRequired(false);
210    options.addOption(ratioOption);
211
212    Option widthOption = new Option("w", "valueWidth", true,
213      "Width of value for non-essential column family");
214    widthOption.setRequired(false);
215    options.addOption(widthOption);
216
217    CommandLineParser parser = new GnuParser();
218    CommandLine cmd = parser.parse(options, args);
219    if (args.length < 1) {
220      HelpFormatter formatter = new HelpFormatter();
221      formatter.printHelp("TestJoinedScanners", options, true);
222    }
223
224    if (cmd.hasOption("e")) {
225      blockEncoding = DataBlockEncoding.valueOf(cmd.getOptionValue("e"));
226    }
227    if (cmd.hasOption("r")) {
228      selectionRatio = Integer.parseInt(cmd.getOptionValue("r"));
229    }
230    if (cmd.hasOption("w")) {
231      valueWidth = Integer.parseInt(cmd.getOptionValue("w"));
232    }
233    // run the test
234    TestJoinedScanners test = new TestJoinedScanners();
235    test.testJoinedScanners();
236  }
237
238  @Test(expected = DoNotRetryIOException.class)
239  public void testWithReverseScan() throws Exception {
240    try (Connection con = TEST_UTIL.getConnection(); Admin admin = con.getAdmin()) {
241      TableName tableName = TableName.valueOf(name.getMethodName());
242
243      TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName)
244          .setColumnFamily(ColumnFamilyDescriptorBuilder.of("cf1"))
245          .setColumnFamily(ColumnFamilyDescriptorBuilder.of("cf2"))
246          .build();
247      admin.createTable(tableDescriptor);
248
249      try (Table table = con.getTable(tableName)) {
250        SingleColumnValueFilter filter = new SingleColumnValueFilter(Bytes.toBytes("cf1"),
251          Bytes.toBytes("col"), CompareOperator.EQUAL, Bytes.toBytes("val"));
252        filter.setFilterIfMissing(true);
253
254        // Reverse scan with loading CFs on demand
255        Scan scan = new Scan();
256        scan.setFilter(filter);
257        scan.setReversed(true);
258        scan.setLoadColumnFamiliesOnDemand(true);
259
260        try (ResultScanner scanner = table.getScanner(scan)) {
261          // DoNotRetryIOException should occur
262          scanner.next();
263        }
264      }
265    }
266  }
267}