001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import java.util.ArrayList;
021import java.util.List;
022import java.util.Random;
023import java.util.concurrent.ThreadLocalRandom;
024import org.apache.hadoop.hbase.CompareOperator;
025import org.apache.hadoop.hbase.DoNotRetryIOException;
026import org.apache.hadoop.hbase.HBaseClassTestRule;
027import org.apache.hadoop.hbase.HBaseTestingUtil;
028import org.apache.hadoop.hbase.StartTestingClusterOption;
029import org.apache.hadoop.hbase.TableName;
030import org.apache.hadoop.hbase.client.Admin;
031import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
032import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
033import org.apache.hadoop.hbase.client.Connection;
034import org.apache.hadoop.hbase.client.Put;
035import org.apache.hadoop.hbase.client.Result;
036import org.apache.hadoop.hbase.client.ResultScanner;
037import org.apache.hadoop.hbase.client.Scan;
038import org.apache.hadoop.hbase.client.Table;
039import org.apache.hadoop.hbase.client.TableDescriptor;
040import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
041import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
042import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
043import org.apache.hadoop.hbase.testclassification.LargeTests;
044import org.apache.hadoop.hbase.testclassification.RegionServerTests;
045import org.apache.hadoop.hbase.util.Bytes;
046import org.junit.AfterClass;
047import org.junit.BeforeClass;
048import org.junit.ClassRule;
049import org.junit.Rule;
050import org.junit.Test;
051import org.junit.experimental.categories.Category;
052import org.junit.rules.TestName;
053import org.slf4j.Logger;
054import org.slf4j.LoggerFactory;
055
056import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine;
057import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLineParser;
058import org.apache.hbase.thirdparty.org.apache.commons.cli.GnuParser;
059import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter;
060import org.apache.hbase.thirdparty.org.apache.commons.cli.Option;
061import org.apache.hbase.thirdparty.org.apache.commons.cli.Options;
062
063/**
064 * Test performance improvement of joined scanners optimization:
065 * https://issues.apache.org/jira/browse/HBASE-5416
066 */
067@Category({ RegionServerTests.class, LargeTests.class })
068public class TestJoinedScanners {
069
070  @ClassRule
071  public static final HBaseClassTestRule CLASS_RULE =
072    HBaseClassTestRule.forClass(TestJoinedScanners.class);
073
074  private static final Logger LOG = LoggerFactory.getLogger(TestJoinedScanners.class);
075
076  private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
077
078  private static final byte[] cf_essential = Bytes.toBytes("essential");
079  private static final byte[] cf_joined = Bytes.toBytes("joined");
080  private static final byte[] col_name = Bytes.toBytes("a");
081  private static final byte[] flag_yes = Bytes.toBytes("Y");
082  private static final byte[] flag_no = Bytes.toBytes("N");
083
084  private static DataBlockEncoding blockEncoding = DataBlockEncoding.FAST_DIFF;
085  private static int selectionRatio = 30;
086  private static int valueWidth = 128 * 1024;
087
088  @Rule
089  public TestName name = new TestName();
090
091  @BeforeClass
092  public static void setUpBeforeClass() throws Exception {
093    final int DEFAULT_BLOCK_SIZE = 1024 * 1024;
094    TEST_UTIL.getConfiguration().setLong("dfs.blocksize", DEFAULT_BLOCK_SIZE);
095    TEST_UTIL.getConfiguration().setInt("dfs.replication", 1);
096    TEST_UTIL.getConfiguration().setLong("hbase.hregion.max.filesize", 322122547200L);
097
098    String[] dataNodeHosts = new String[] { "host1", "host2", "host3" };
099    int regionServersCount = 3;
100    StartTestingClusterOption option = StartTestingClusterOption.builder()
101      .numRegionServers(regionServersCount).dataNodeHosts(dataNodeHosts).build();
102    TEST_UTIL.startMiniCluster(option);
103  }
104
105  @AfterClass
106  public static void tearDownAfterClass() throws Exception {
107    TEST_UTIL.shutdownMiniCluster();
108  }
109
110  @Test
111  public void testJoinedScanners() throws Exception {
112    byte[][] families = { cf_essential, cf_joined };
113
114    final TableName tableName = TableName.valueOf(name.getMethodName());
115    TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName);
116    for (byte[] family : families) {
117      ColumnFamilyDescriptor familyDescriptor = ColumnFamilyDescriptorBuilder.newBuilder(family)
118        .setDataBlockEncoding(blockEncoding).build();
119      builder.setColumnFamily(familyDescriptor);
120    }
121    TableDescriptor tableDescriptor = builder.build();
122    TEST_UTIL.getAdmin().createTable(tableDescriptor);
123    Table ht = TEST_UTIL.getConnection().getTable(tableName);
124
125    long rows_to_insert = 1000;
126    int insert_batch = 20;
127
128    LOG.info("Make " + Long.toString(rows_to_insert) + " rows, total size = "
129      + Float.toString(rows_to_insert * valueWidth / 1024 / 1024) + " MB");
130
131    long time = System.nanoTime();
132    Random rand = ThreadLocalRandom.current();
133    byte[] val_large = new byte[valueWidth];
134    List<Put> puts = new ArrayList<>();
135    for (long i = 0; i < rows_to_insert; i++) {
136      Put put = new Put(Bytes.toBytes(Long.toString(i)));
137      if (rand.nextInt(100) <= selectionRatio) {
138        put.addColumn(cf_essential, col_name, flag_yes);
139      } else {
140        put.addColumn(cf_essential, col_name, flag_no);
141      }
142      put.addColumn(cf_joined, col_name, val_large);
143      puts.add(put);
144      if (puts.size() >= insert_batch) {
145        ht.put(puts);
146        puts.clear();
147      }
148    }
149    if (!puts.isEmpty()) {
150      ht.put(puts);
151      puts.clear();
152    }
153
154    LOG.info("Data generated in " + Double.toString((System.nanoTime() - time) / 1000000000.0)
155      + " seconds");
156
157    boolean slow = true;
158    for (int i = 0; i < 10; ++i) {
159      runScanner(ht, slow);
160      slow = !slow;
161    }
162
163    ht.close();
164  }
165
166  private void runScanner(Table table, boolean slow) throws Exception {
167    long time = System.nanoTime();
168    Scan scan = new Scan();
169    scan.addColumn(cf_essential, col_name);
170    scan.addColumn(cf_joined, col_name);
171
172    SingleColumnValueFilter filter =
173      new SingleColumnValueFilter(cf_essential, col_name, CompareOperator.EQUAL, flag_yes);
174    filter.setFilterIfMissing(true);
175    scan.setFilter(filter);
176    scan.setLoadColumnFamiliesOnDemand(!slow);
177
178    ResultScanner result_scanner = table.getScanner(scan);
179    Result res;
180    long rows_count = 0;
181    while ((res = result_scanner.next()) != null) {
182      rows_count++;
183    }
184
185    double timeSec = (System.nanoTime() - time) / 1000000000.0;
186    result_scanner.close();
187    LOG.info((slow ? "Slow" : "Joined") + " scanner finished in " + Double.toString(timeSec)
188      + " seconds, got " + Long.toString(rows_count / 2) + " rows");
189  }
190
191  private static Options options = new Options();
192
193  /**
194   * Command line interface:
195   * @throws IOException if there is a bug while reading from disk
196   */
197  public static void main(final String[] args) throws Exception {
198    Option encodingOption =
199      new Option("e", "blockEncoding", true, "Data block encoding; Default: FAST_DIFF");
200    encodingOption.setRequired(false);
201    options.addOption(encodingOption);
202
203    Option ratioOption = new Option("r", "selectionRatio", true,
204      "Ratio of selected rows using essential column family");
205    ratioOption.setRequired(false);
206    options.addOption(ratioOption);
207
208    Option widthOption =
209      new Option("w", "valueWidth", true, "Width of value for non-essential column family");
210    widthOption.setRequired(false);
211    options.addOption(widthOption);
212
213    CommandLineParser parser = new GnuParser();
214    CommandLine cmd = parser.parse(options, args);
215    if (args.length < 1) {
216      HelpFormatter formatter = new HelpFormatter();
217      formatter.printHelp("TestJoinedScanners", options, true);
218    }
219
220    if (cmd.hasOption("e")) {
221      blockEncoding = DataBlockEncoding.valueOf(cmd.getOptionValue("e"));
222    }
223    if (cmd.hasOption("r")) {
224      selectionRatio = Integer.parseInt(cmd.getOptionValue("r"));
225    }
226    if (cmd.hasOption("w")) {
227      valueWidth = Integer.parseInt(cmd.getOptionValue("w"));
228    }
229    // run the test
230    TestJoinedScanners test = new TestJoinedScanners();
231    test.testJoinedScanners();
232  }
233
234  @Test(expected = DoNotRetryIOException.class)
235  public void testWithReverseScan() throws Exception {
236    try (Connection con = TEST_UTIL.getConnection(); Admin admin = con.getAdmin()) {
237      TableName tableName = TableName.valueOf(name.getMethodName());
238
239      TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName)
240        .setColumnFamily(ColumnFamilyDescriptorBuilder.of("cf1"))
241        .setColumnFamily(ColumnFamilyDescriptorBuilder.of("cf2")).build();
242      admin.createTable(tableDescriptor);
243
244      try (Table table = con.getTable(tableName)) {
245        SingleColumnValueFilter filter = new SingleColumnValueFilter(Bytes.toBytes("cf1"),
246          Bytes.toBytes("col"), CompareOperator.EQUAL, Bytes.toBytes("val"));
247        filter.setFilterIfMissing(true);
248
249        // Reverse scan with loading CFs on demand
250        Scan scan = new Scan();
251        scan.setFilter(filter);
252        scan.setReversed(true);
253        scan.setLoadColumnFamiliesOnDemand(true);
254
255        try (ResultScanner scanner = table.getScanner(scan)) {
256          // DoNotRetryIOException should occur
257          scanner.next();
258        }
259      }
260    }
261  }
262}