001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.filter;
019
020import static org.junit.Assert.assertEquals;
021
022import java.io.IOException;
023import java.nio.ByteBuffer;
024import java.util.ArrayList;
025import java.util.Arrays;
026import java.util.List;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.hbase.Cell;
029import org.apache.hadoop.hbase.CellUtil;
030import org.apache.hadoop.hbase.HBaseClassTestRule;
031import org.apache.hadoop.hbase.HBaseTestingUtility;
032import org.apache.hadoop.hbase.HConstants;
033import org.apache.hadoop.hbase.TableName;
034import org.apache.hadoop.hbase.client.Durability;
035import org.apache.hadoop.hbase.client.Put;
036import org.apache.hadoop.hbase.client.Result;
037import org.apache.hadoop.hbase.client.ResultScanner;
038import org.apache.hadoop.hbase.client.Scan;
039import org.apache.hadoop.hbase.client.Table;
040import org.apache.hadoop.hbase.filter.FilterList.Operator;
041import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy;
042import org.apache.hadoop.hbase.regionserver.HRegion;
043import org.apache.hadoop.hbase.regionserver.RegionScanner;
044import org.apache.hadoop.hbase.testclassification.FilterTests;
045import org.apache.hadoop.hbase.testclassification.LargeTests;
046import org.apache.hadoop.hbase.util.Bytes;
047import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
048import org.apache.hadoop.hbase.util.Pair;
049import org.junit.After;
050import org.junit.AfterClass;
051import org.junit.Before;
052import org.junit.BeforeClass;
053import org.junit.ClassRule;
054import org.junit.Rule;
055import org.junit.Test;
056import org.junit.experimental.categories.Category;
057import org.junit.rules.TestName;
058import org.slf4j.Logger;
059import org.slf4j.LoggerFactory;
060
061import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
062
063@Category({ FilterTests.class, LargeTests.class })
064public class TestFuzzyRowFilterEndToEnd {
065
066  @ClassRule
067  public static final HBaseClassTestRule CLASS_RULE =
068    HBaseClassTestRule.forClass(TestFuzzyRowFilterEndToEnd.class);
069
070  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
071  private final static byte fuzzyValue = (byte) 63;
072  private static final Logger LOG = LoggerFactory.getLogger(TestFuzzyRowFilterEndToEnd.class);
073
074  private static int firstPartCardinality = 50;
075  private static int secondPartCardinality = 50;
076  private static int thirdPartCardinality = 50;
077  private static int colQualifiersTotal = 5;
078  private static int totalFuzzyKeys = thirdPartCardinality / 2;
079
080  private static String table = "TestFuzzyRowFilterEndToEnd";
081
082  @Rule
083  public TestName name = new TestName();
084
085  /**
086   * @throws java.lang.Exception
087   */
088  @BeforeClass
089  public static void setUpBeforeClass() throws Exception {
090    Configuration conf = TEST_UTIL.getConfiguration();
091    conf.setInt("hbase.client.scanner.caching", 1000);
092    conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY,
093      ConstantSizeRegionSplitPolicy.class.getName());
094    // set no splits
095    conf.setLong(HConstants.HREGION_MAX_FILESIZE, (1024L) * 1024 * 1024 * 10);
096
097    TEST_UTIL.startMiniCluster();
098  }
099
100  /**
101   * @throws java.lang.Exception
102   */
103  @AfterClass
104  public static void tearDownAfterClass() throws Exception {
105    TEST_UTIL.shutdownMiniCluster();
106  }
107
108  /**
109   * @throws java.lang.Exception
110   */
111  @Before
112  public void setUp() throws Exception {
113    // Nothing to do.
114  }
115
116  /**
117   * @throws java.lang.Exception
118   */
119  @After
120  public void tearDown() throws Exception {
121    // Nothing to do.
122  }
123
124  // HBASE-15676 Test that fuzzy info of all fixed bits (0s) finds matching row.
125  @Test
126  public void testAllFixedBits() throws IOException {
127    String cf = "f";
128    String cq = "q";
129
130    Table ht = TEST_UTIL.createTable(TableName.valueOf(name.getMethodName()), Bytes.toBytes(cf),
131      Integer.MAX_VALUE);
132    // Load data
133    String[] rows = new String[] { "\\x9C\\x00\\x044\\x00\\x00\\x00\\x00",
134      "\\x9C\\x00\\x044\\x01\\x00\\x00\\x00", "\\x9C\\x00\\x044\\x00\\x01\\x00\\x00",
135      "\\x9B\\x00\\x044e\\x9B\\x02\\xBB", "\\x9C\\x00\\x044\\x00\\x00\\x01\\x00",
136      "\\x9C\\x00\\x044\\x00\\x01\\x00\\x01", "\\x9B\\x00\\x044e\\xBB\\xB2\\xBB", };
137
138    for (int i = 0; i < rows.length; i++) {
139      Put p = new Put(Bytes.toBytesBinary(rows[i]));
140      p.addColumn(cf.getBytes(), cq.getBytes(), "value".getBytes());
141      ht.put(p);
142    }
143
144    TEST_UTIL.flush();
145
146    // v1 should match all rows, because v2 has the actual fix for this bug
147    testAllFixedBitsRunScanWithMask(ht, rows.length, FuzzyRowFilter.V1_PROCESSED_WILDCARD_MASK);
148    testAllFixedBitsRunScanWithMask(ht, 2, FuzzyRowFilter.V2_PROCESSED_WILDCARD_MASK);
149
150    TEST_UTIL.deleteTable(TableName.valueOf(table));
151  }
152
153  private void testAllFixedBitsRunScanWithMask(Table ht, int expectedRows, byte processedRowMask)
154    throws IOException {
155    List<Pair<byte[], byte[]>> data = new ArrayList<Pair<byte[], byte[]>>();
156    byte[] fuzzyKey = Bytes.toBytesBinary("\\x9B\\x00\\x044e");
157    byte[] mask = new byte[] { 0, 0, 0, 0, 0 };
158
159    // copy the fuzzy key and mask to test HBASE-18617
160    byte[] copyFuzzyKey = Arrays.copyOf(fuzzyKey, fuzzyKey.length);
161    byte[] copyMask = Arrays.copyOf(mask, mask.length);
162
163    data.add(new Pair<>(fuzzyKey, mask));
164    FuzzyRowFilter filter = new FuzzyRowFilter(data, processedRowMask);
165
166    Scan scan = new Scan();
167    scan.setFilter(filter);
168
169    ResultScanner scanner = ht.getScanner(scan);
170    int total = 0;
171    while (scanner.next() != null) {
172      total++;
173    }
174    assertEquals(expectedRows, total);
175
176    assertEquals(true, Arrays.equals(copyFuzzyKey, fuzzyKey));
177    assertEquals(true, Arrays.equals(copyMask, mask));
178  }
179
180  @Test
181  public void testHBASE14782() throws IOException {
182    String cf = "f";
183    String cq = "q";
184
185    Table ht = TEST_UTIL.createTable(TableName.valueOf(name.getMethodName()), Bytes.toBytes(cf),
186      Integer.MAX_VALUE);
187    // Load data
188    String[] rows =
189      new String[] { "\\x9C\\x00\\x044\\x00\\x00\\x00\\x00", "\\x9C\\x00\\x044\\x01\\x00\\x00\\x00",
190        "\\x9C\\x00\\x044\\x00\\x01\\x00\\x00", "\\x9C\\x00\\x044\\x00\\x00\\x01\\x00",
191        "\\x9C\\x00\\x044\\x00\\x01\\x00\\x01", "\\x9B\\x00\\x044e\\xBB\\xB2\\xBB", };
192
193    String badRow = "\\x9C\\x00\\x03\\xE9e\\xBB{X\\x1Fwts\\x1F\\x15vRX";
194
195    for (int i = 0; i < rows.length; i++) {
196      Put p = new Put(Bytes.toBytesBinary(rows[i]));
197      p.addColumn(cf.getBytes(), cq.getBytes(), "value".getBytes());
198      ht.put(p);
199    }
200
201    Put p = new Put(Bytes.toBytesBinary(badRow));
202    p.addColumn(cf.getBytes(), cq.getBytes(), "value".getBytes());
203    ht.put(p);
204
205    TEST_UTIL.flush();
206
207    testHBASE14782RunScanWithMask(ht, rows.length, FuzzyRowFilter.V1_PROCESSED_WILDCARD_MASK);
208    testHBASE14782RunScanWithMask(ht, rows.length, FuzzyRowFilter.V2_PROCESSED_WILDCARD_MASK);
209
210    TEST_UTIL.deleteTable(TableName.valueOf(name.getMethodName()));
211  }
212
213  private void testHBASE14782RunScanWithMask(Table ht, int expectedRows, byte processedRowMask)
214    throws IOException {
215    List<Pair<byte[], byte[]>> data = new ArrayList<Pair<byte[], byte[]>>();
216
217    byte[] fuzzyKey = Bytes.toBytesBinary("\\x00\\x00\\x044");
218    byte[] mask = new byte[] { 1, 0, 0, 0 };
219    data.add(new Pair<>(fuzzyKey, mask));
220    FuzzyRowFilter filter = new FuzzyRowFilter(data, processedRowMask);
221
222    Scan scan = new Scan();
223    scan.setFilter(filter);
224
225    ResultScanner scanner = ht.getScanner(scan);
226    int total = 0;
227    while (scanner.next() != null) {
228      total++;
229    }
230    assertEquals(expectedRows, total);
231  }
232
233  @Test
234  public void testEndToEnd() throws Exception {
235    String cf = "f";
236
237    Table ht =
238      TEST_UTIL.createTable(TableName.valueOf(table), Bytes.toBytes(cf), Integer.MAX_VALUE);
239
240    // 10 byte row key - (2 bytes 4 bytes 4 bytes)
241    // 4 byte qualifier
242    // 4 byte value
243
244    for (int i0 = 0; i0 < firstPartCardinality; i0++) {
245
246      for (int i1 = 0; i1 < secondPartCardinality; i1++) {
247
248        for (int i2 = 0; i2 < thirdPartCardinality; i2++) {
249          byte[] rk = new byte[10];
250
251          ByteBuffer buf = ByteBuffer.wrap(rk);
252          buf.clear();
253          buf.putShort((short) i0);
254          buf.putInt(i1);
255          buf.putInt(i2);
256          for (int c = 0; c < colQualifiersTotal; c++) {
257            byte[] cq = new byte[4];
258            Bytes.putBytes(cq, 0, Bytes.toBytes(c), 0, 4);
259
260            Put p = new Put(rk);
261            p.setDurability(Durability.SKIP_WAL);
262            p.addColumn(cf.getBytes(), cq, Bytes.toBytes(c));
263            ht.put(p);
264          }
265        }
266      }
267    }
268
269    TEST_UTIL.flush();
270
271    // test passes
272    runTest1(ht, FuzzyRowFilter.V1_PROCESSED_WILDCARD_MASK);
273    runTest1(ht, FuzzyRowFilter.V2_PROCESSED_WILDCARD_MASK);
274    runTest2(ht, FuzzyRowFilter.V1_PROCESSED_WILDCARD_MASK);
275    runTest2(ht, FuzzyRowFilter.V2_PROCESSED_WILDCARD_MASK);
276
277  }
278
279  private void runTest1(Table hTable, byte processedWildcardMask) throws IOException {
280    // [0, 2, ?, ?, ?, ?, 0, 0, 0, 1]
281
282    byte[] mask = new byte[] { 0, 0, 1, 1, 1, 1, 0, 0, 0, 0 };
283
284    List<Pair<byte[], byte[]>> list = new ArrayList<>();
285    for (int i = 0; i < totalFuzzyKeys; i++) {
286      byte[] fuzzyKey = new byte[10];
287      ByteBuffer buf = ByteBuffer.wrap(fuzzyKey);
288      buf.clear();
289      buf.putShort((short) 2);
290      for (int j = 0; j < 4; j++) {
291        buf.put(fuzzyValue);
292      }
293      buf.putInt(i);
294
295      Pair<byte[], byte[]> pair = new Pair<>(fuzzyKey, mask);
296      list.add(pair);
297    }
298
299    int expectedSize = secondPartCardinality * totalFuzzyKeys * colQualifiersTotal;
300    FuzzyRowFilter fuzzyRowFilter0 = new FuzzyRowFilter(list, processedWildcardMask);
301    // Filters are not stateless - we can't reuse them
302    FuzzyRowFilter fuzzyRowFilter1 = new FuzzyRowFilter(list, processedWildcardMask);
303
304    // regular test
305    runScanner(hTable, expectedSize, fuzzyRowFilter0);
306    // optimized from block cache
307    runScanner(hTable, expectedSize, fuzzyRowFilter1);
308
309  }
310
311  private void runTest2(Table hTable, byte processedWildcardMask) throws IOException {
312    // [0, 0, ?, ?, ?, ?, 0, 0, 0, 0] , [0, 1, ?, ?, ?, ?, 0, 0, 0, 1]...
313
314    byte[] mask = new byte[] { 0, 0, 1, 1, 1, 1, 0, 0, 0, 0 };
315
316    List<Pair<byte[], byte[]>> list = new ArrayList<>();
317
318    for (int i = 0; i < totalFuzzyKeys; i++) {
319      byte[] fuzzyKey = new byte[10];
320      ByteBuffer buf = ByteBuffer.wrap(fuzzyKey);
321      buf.clear();
322      buf.putShort((short) (i * 2));
323      for (int j = 0; j < 4; j++) {
324        buf.put(fuzzyValue);
325      }
326      buf.putInt(i * 2);
327
328      Pair<byte[], byte[]> pair = new Pair<>(fuzzyKey, mask);
329      list.add(pair);
330    }
331
332    int expectedSize = totalFuzzyKeys * secondPartCardinality * colQualifiersTotal;
333
334    FuzzyRowFilter fuzzyRowFilter0 = new FuzzyRowFilter(list, processedWildcardMask);
335    // Filters are not stateless - we can't reuse them
336    FuzzyRowFilter fuzzyRowFilter1 = new FuzzyRowFilter(list, processedWildcardMask);
337
338    // regular test
339    runScanner(hTable, expectedSize, fuzzyRowFilter0);
340    // optimized from block cache
341    runScanner(hTable, expectedSize, fuzzyRowFilter1);
342
343  }
344
345  private void runScanner(Table hTable, int expectedSize, Filter filter) throws IOException {
346
347    String cf = "f";
348    Scan scan = new Scan();
349    scan.addFamily(cf.getBytes());
350    scan.setFilter(filter);
351    List<HRegion> regions = TEST_UTIL.getHBaseCluster().getRegions(table.getBytes());
352    HRegion first = regions.get(0);
353    first.getScanner(scan);
354    RegionScanner scanner = first.getScanner(scan);
355    List<Cell> results = new ArrayList<>();
356    // Result result;
357    long timeBeforeScan = EnvironmentEdgeManager.currentTime();
358    int found = 0;
359    while (scanner.next(results)) {
360      found += results.size();
361      results.clear();
362    }
363    found += results.size();
364    long scanTime = EnvironmentEdgeManager.currentTime() - timeBeforeScan;
365    scanner.close();
366
367    LOG.info("\nscan time = " + scanTime + "ms");
368    LOG.info("found " + found + " results\n");
369
370    assertEquals(expectedSize, found);
371  }
372
373  @SuppressWarnings("deprecation")
374  @Test
375  public void testFilterList() throws Exception {
376    String cf = "f";
377    Table ht = TEST_UTIL.createTable(TableName.valueOf(name.getMethodName()), Bytes.toBytes(cf),
378      Integer.MAX_VALUE);
379
380    // 10 byte row key - (2 bytes 4 bytes 4 bytes)
381    // 4 byte qualifier
382    // 4 byte value
383
384    for (int i1 = 0; i1 < 5; i1++) {
385      for (int i2 = 0; i2 < 5; i2++) {
386        byte[] rk = new byte[10];
387
388        ByteBuffer buf = ByteBuffer.wrap(rk);
389        buf.clear();
390        buf.putShort((short) 2);
391        buf.putInt(i1);
392        buf.putInt(i2);
393
394        // Each row contains 5 columns
395        for (int c = 0; c < 5; c++) {
396          byte[] cq = new byte[4];
397          Bytes.putBytes(cq, 0, Bytes.toBytes(c), 0, 4);
398
399          Put p = new Put(rk);
400          p.setDurability(Durability.SKIP_WAL);
401          p.addColumn(cf.getBytes(), cq, Bytes.toBytes(c));
402          ht.put(p);
403          LOG.info(
404            "Inserting: rk: " + Bytes.toStringBinary(rk) + " cq: " + Bytes.toStringBinary(cq));
405        }
406      }
407    }
408
409    TEST_UTIL.flush();
410
411    // test passes if we get back 5 KV's (1 row)
412    runTest(ht, 5);
413
414  }
415
416  @SuppressWarnings("unchecked")
417  private void runTest(Table hTable, int expectedSize) throws IOException {
418    // [0, 2, ?, ?, ?, ?, 0, 0, 0, 1]
419    byte[] fuzzyKey1 = new byte[10];
420    ByteBuffer buf = ByteBuffer.wrap(fuzzyKey1);
421    buf.clear();
422    buf.putShort((short) 2);
423    for (int i = 0; i < 4; i++)
424      buf.put(fuzzyValue);
425    buf.putInt((short) 1);
426    byte[] mask1 = new byte[] { 0, 0, 1, 1, 1, 1, 0, 0, 0, 0 };
427
428    byte[] fuzzyKey2 = new byte[10];
429    buf = ByteBuffer.wrap(fuzzyKey2);
430    buf.clear();
431    buf.putShort((short) 2);
432    buf.putInt((short) 2);
433    for (int i = 0; i < 4; i++)
434      buf.put(fuzzyValue);
435
436    byte[] mask2 = new byte[] { 0, 0, 0, 0, 0, 0, 1, 1, 1, 1 };
437
438    Pair<byte[], byte[]> pair1 = new Pair<>(fuzzyKey1, mask1);
439    Pair<byte[], byte[]> pair2 = new Pair<>(fuzzyKey2, mask2);
440
441    FuzzyRowFilter fuzzyRowFilter1 = new FuzzyRowFilter(Lists.newArrayList(pair1));
442    FuzzyRowFilter fuzzyRowFilter2 = new FuzzyRowFilter(Lists.newArrayList(pair2));
443    // regular test - we expect 1 row back (5 KVs)
444    runScanner(hTable, expectedSize, fuzzyRowFilter1, fuzzyRowFilter2);
445  }
446
447  private void runScanner(Table hTable, int expectedSize, Filter filter1, Filter filter2)
448    throws IOException {
449    String cf = "f";
450    Scan scan = new Scan();
451    scan.addFamily(cf.getBytes());
452    FilterList filterList = new FilterList(Operator.MUST_PASS_ALL, filter1, filter2);
453    scan.setFilter(filterList);
454
455    ResultScanner scanner = hTable.getScanner(scan);
456    List<Cell> results = new ArrayList<>();
457    Result result;
458    long timeBeforeScan = EnvironmentEdgeManager.currentTime();
459    while ((result = scanner.next()) != null) {
460      for (Cell kv : result.listCells()) {
461        LOG.info("Got rk: " + Bytes.toStringBinary(CellUtil.cloneRow(kv)) + " cq: "
462          + Bytes.toStringBinary(CellUtil.cloneQualifier(kv)));
463        results.add(kv);
464      }
465    }
466    long scanTime = EnvironmentEdgeManager.currentTime() - timeBeforeScan;
467    scanner.close();
468
469    LOG.info("scan time = " + scanTime + "ms");
470    LOG.info("found " + results.size() + " results");
471
472    assertEquals(expectedSize, results.size());
473  }
474}