001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.filter;
019
020import static org.junit.Assert.assertEquals;
021
022import java.io.IOException;
023import java.nio.ByteBuffer;
024import java.util.ArrayList;
025import java.util.Arrays;
026import java.util.List;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.hbase.Cell;
029import org.apache.hadoop.hbase.CellUtil;
030import org.apache.hadoop.hbase.HBaseClassTestRule;
031import org.apache.hadoop.hbase.HBaseTestingUtility;
032import org.apache.hadoop.hbase.HConstants;
033import org.apache.hadoop.hbase.TableName;
034import org.apache.hadoop.hbase.client.Durability;
035import org.apache.hadoop.hbase.client.Put;
036import org.apache.hadoop.hbase.client.Result;
037import org.apache.hadoop.hbase.client.ResultScanner;
038import org.apache.hadoop.hbase.client.Scan;
039import org.apache.hadoop.hbase.client.Table;
040import org.apache.hadoop.hbase.filter.FilterList.Operator;
041import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy;
042import org.apache.hadoop.hbase.regionserver.HRegion;
043import org.apache.hadoop.hbase.regionserver.RegionScanner;
044import org.apache.hadoop.hbase.testclassification.FilterTests;
045import org.apache.hadoop.hbase.testclassification.LargeTests;
046import org.apache.hadoop.hbase.util.Bytes;
047import org.apache.hadoop.hbase.util.Pair;
048import org.junit.After;
049import org.junit.AfterClass;
050import org.junit.Before;
051import org.junit.BeforeClass;
052import org.junit.ClassRule;
053import org.junit.Rule;
054import org.junit.Test;
055import org.junit.experimental.categories.Category;
056import org.junit.rules.TestName;
057import org.slf4j.Logger;
058import org.slf4j.LoggerFactory;
059
060import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
061
062@Category({ FilterTests.class, LargeTests.class })
063public class TestFuzzyRowFilterEndToEnd {
064
065  @ClassRule
066  public static final HBaseClassTestRule CLASS_RULE =
067      HBaseClassTestRule.forClass(TestFuzzyRowFilterEndToEnd.class);
068
069  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
070  private final static byte fuzzyValue = (byte) 63;
071  private static final Logger LOG = LoggerFactory.getLogger(TestFuzzyRowFilterEndToEnd.class);
072
073  private static int firstPartCardinality = 50;
074  private static int secondPartCardinality = 50;
075  private static int thirdPartCardinality = 50;
076  private static int colQualifiersTotal = 5;
077  private static int totalFuzzyKeys = thirdPartCardinality / 2;
078
079  private static String table = "TestFuzzyRowFilterEndToEnd";
080
081  @Rule
082  public TestName name = new TestName();
083
084  /**
085   * @throws java.lang.Exception
086   */
087  @BeforeClass
088  public static void setUpBeforeClass() throws Exception {
089    Configuration conf = TEST_UTIL.getConfiguration();
090    conf.setInt("hbase.client.scanner.caching", 1000);
091    conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY,
092      ConstantSizeRegionSplitPolicy.class.getName());
093    // set no splits
094    conf.setLong(HConstants.HREGION_MAX_FILESIZE, (1024L) * 1024 * 1024 * 10);
095
096    TEST_UTIL.startMiniCluster();
097  }
098
099  /**
100   * @throws java.lang.Exception
101   */
102  @AfterClass
103  public static void tearDownAfterClass() throws Exception {
104    TEST_UTIL.shutdownMiniCluster();
105  }
106
107  /**
108   * @throws java.lang.Exception
109   */
110  @Before
111  public void setUp() throws Exception {
112    // Nothing to do.
113  }
114
115  /**
116   * @throws java.lang.Exception
117   */
118  @After
119  public void tearDown() throws Exception {
120    // Nothing to do.
121  }
122
123  // HBASE-15676 Test that fuzzy info of all fixed bits (0s) finds matching row.
124  @Test
125  public void testAllFixedBits() throws IOException {
126    String cf = "f";
127    String cq = "q";
128
129    Table ht =
130        TEST_UTIL.createTable(TableName.valueOf(name.getMethodName()), Bytes.toBytes(cf), Integer.MAX_VALUE);
131    // Load data
132    String[] rows = new String[] { "\\x9C\\x00\\x044\\x00\\x00\\x00\\x00",
133        "\\x9C\\x00\\x044\\x01\\x00\\x00\\x00", "\\x9C\\x00\\x044\\x00\\x01\\x00\\x00",
134        "\\x9B\\x00\\x044e\\x9B\\x02\\xBB", "\\x9C\\x00\\x044\\x00\\x00\\x01\\x00",
135        "\\x9C\\x00\\x044\\x00\\x01\\x00\\x01", "\\x9B\\x00\\x044e\\xBB\\xB2\\xBB", };
136
137    for (int i = 0; i < rows.length; i++) {
138      Put p = new Put(Bytes.toBytesBinary(rows[i]));
139      p.addColumn(cf.getBytes(), cq.getBytes(), "value".getBytes());
140      ht.put(p);
141    }
142
143    TEST_UTIL.flush();
144
145    List<Pair<byte[], byte[]>> data = new ArrayList<>();
146    byte[] fuzzyKey = Bytes.toBytesBinary("\\x9B\\x00\\x044e");
147    byte[] mask = new byte[] { 0, 0, 0, 0, 0 };
148
149    // copy the fuzzy key and mask to test HBASE-18617
150    byte[] copyFuzzyKey = Arrays.copyOf(fuzzyKey, fuzzyKey.length);
151    byte[] copyMask = Arrays.copyOf(mask, mask.length);
152
153    data.add(new Pair<>(fuzzyKey, mask));
154    FuzzyRowFilter filter = new FuzzyRowFilter(data);
155
156    Scan scan = new Scan();
157    scan.setFilter(filter);
158
159    ResultScanner scanner = ht.getScanner(scan);
160    int total = 0;
161    while (scanner.next() != null) {
162      total++;
163    }
164    assertEquals(2, total);
165
166    assertEquals(true, Arrays.equals(copyFuzzyKey, fuzzyKey));
167    assertEquals(true, Arrays.equals(copyMask, mask));
168
169    TEST_UTIL.deleteTable(TableName.valueOf(name.getMethodName()));
170  }
171
172  @Test
173  public void testHBASE14782() throws IOException
174  {
175    String cf = "f";
176    String cq = "q";
177
178    Table ht =
179        TEST_UTIL.createTable(TableName.valueOf(name.getMethodName()), Bytes.toBytes(cf), Integer.MAX_VALUE);
180    // Load data
181    String[] rows = new String[] {
182      "\\x9C\\x00\\x044\\x00\\x00\\x00\\x00",
183      "\\x9C\\x00\\x044\\x01\\x00\\x00\\x00",
184      "\\x9C\\x00\\x044\\x00\\x01\\x00\\x00",
185      "\\x9C\\x00\\x044\\x00\\x00\\x01\\x00",
186      "\\x9C\\x00\\x044\\x00\\x01\\x00\\x01",
187      "\\x9B\\x00\\x044e\\xBB\\xB2\\xBB",
188    };
189
190    String badRow = "\\x9C\\x00\\x03\\xE9e\\xBB{X\\x1Fwts\\x1F\\x15vRX";
191
192    for(int i=0; i < rows.length; i++){
193      Put p = new Put(Bytes.toBytesBinary(rows[i]));
194      p.addColumn(cf.getBytes(), cq.getBytes(), "value".getBytes());
195      ht.put(p);
196    }
197
198    Put p = new Put(Bytes.toBytesBinary(badRow));
199    p.addColumn(cf.getBytes(), cq.getBytes(), "value".getBytes());
200    ht.put(p);
201
202    TEST_UTIL.flush();
203
204    List<Pair<byte[], byte[]>> data =  new ArrayList<>();
205    byte[] fuzzyKey = Bytes.toBytesBinary("\\x00\\x00\\x044");
206    byte[] mask = new byte[] { 1,0,0,0};
207    data.add(new Pair<>(fuzzyKey, mask));
208    FuzzyRowFilter filter = new FuzzyRowFilter(data);
209
210    Scan scan = new Scan();
211    scan.setFilter(filter);
212
213    ResultScanner scanner = ht.getScanner(scan);
214    int total = 0;
215    while(scanner.next() != null){
216      total++;
217    }
218    assertEquals(rows.length, total);
219    TEST_UTIL.deleteTable(TableName.valueOf(name.getMethodName()));
220  }
221
222  @Test
223  public void testEndToEnd() throws Exception {
224    String cf = "f";
225
226    Table ht =
227        TEST_UTIL.createTable(TableName.valueOf(table), Bytes.toBytes(cf), Integer.MAX_VALUE);
228
229    // 10 byte row key - (2 bytes 4 bytes 4 bytes)
230    // 4 byte qualifier
231    // 4 byte value
232
233    for (int i0 = 0; i0 < firstPartCardinality; i0++) {
234
235      for (int i1 = 0; i1 < secondPartCardinality; i1++) {
236
237        for (int i2 = 0; i2 < thirdPartCardinality; i2++) {
238          byte[] rk = new byte[10];
239
240          ByteBuffer buf = ByteBuffer.wrap(rk);
241          buf.clear();
242          buf.putShort((short) i0);
243          buf.putInt(i1);
244          buf.putInt(i2);
245          for (int c = 0; c < colQualifiersTotal; c++) {
246            byte[] cq = new byte[4];
247            Bytes.putBytes(cq, 0, Bytes.toBytes(c), 0, 4);
248
249            Put p = new Put(rk);
250            p.setDurability(Durability.SKIP_WAL);
251            p.addColumn(cf.getBytes(), cq, Bytes.toBytes(c));
252            ht.put(p);
253          }
254        }
255      }
256    }
257
258    TEST_UTIL.flush();
259
260    // test passes
261    runTest1(ht);
262    runTest2(ht);
263
264  }
265
266  private void runTest1(Table hTable) throws IOException {
267    // [0, 2, ?, ?, ?, ?, 0, 0, 0, 1]
268
269    byte[] mask = new byte[] { 0, 0, 1, 1, 1, 1, 0, 0, 0, 0 };
270
271    List<Pair<byte[], byte[]>> list = new ArrayList<>();
272    for (int i = 0; i < totalFuzzyKeys; i++) {
273      byte[] fuzzyKey = new byte[10];
274      ByteBuffer buf = ByteBuffer.wrap(fuzzyKey);
275      buf.clear();
276      buf.putShort((short) 2);
277      for (int j = 0; j < 4; j++) {
278        buf.put(fuzzyValue);
279      }
280      buf.putInt(i);
281
282      Pair<byte[], byte[]> pair = new Pair<>(fuzzyKey, mask);
283      list.add(pair);
284    }
285
286    int expectedSize = secondPartCardinality * totalFuzzyKeys * colQualifiersTotal;
287    FuzzyRowFilter fuzzyRowFilter0 = new FuzzyRowFilter(list);
288    // Filters are not stateless - we can't reuse them
289    FuzzyRowFilter fuzzyRowFilter1 = new FuzzyRowFilter(list);
290
291    // regular test
292    runScanner(hTable, expectedSize, fuzzyRowFilter0);
293    // optimized from block cache
294    runScanner(hTable, expectedSize, fuzzyRowFilter1);
295
296  }
297
298  private void runTest2(Table hTable) throws IOException {
299    // [0, 0, ?, ?, ?, ?, 0, 0, 0, 0] , [0, 1, ?, ?, ?, ?, 0, 0, 0, 1]...
300
301    byte[] mask = new byte[] { 0, 0, 1, 1, 1, 1, 0, 0, 0, 0 };
302
303    List<Pair<byte[], byte[]>> list = new ArrayList<>();
304
305    for (int i = 0; i < totalFuzzyKeys; i++) {
306      byte[] fuzzyKey = new byte[10];
307      ByteBuffer buf = ByteBuffer.wrap(fuzzyKey);
308      buf.clear();
309      buf.putShort((short) (i * 2));
310      for (int j = 0; j < 4; j++) {
311        buf.put(fuzzyValue);
312      }
313      buf.putInt(i * 2);
314
315      Pair<byte[], byte[]> pair = new Pair<>(fuzzyKey, mask);
316      list.add(pair);
317    }
318
319    int expectedSize = totalFuzzyKeys * secondPartCardinality * colQualifiersTotal;
320
321    FuzzyRowFilter fuzzyRowFilter0 = new FuzzyRowFilter(list);
322    // Filters are not stateless - we can't reuse them
323    FuzzyRowFilter fuzzyRowFilter1 = new FuzzyRowFilter(list);
324
325    // regular test
326    runScanner(hTable, expectedSize, fuzzyRowFilter0);
327    // optimized from block cache
328    runScanner(hTable, expectedSize, fuzzyRowFilter1);
329
330  }
331
332  private void runScanner(Table hTable, int expectedSize, Filter filter) throws IOException {
333
334    String cf = "f";
335    Scan scan = new Scan();
336    scan.addFamily(cf.getBytes());
337    scan.setFilter(filter);
338    List<HRegion> regions = TEST_UTIL.getHBaseCluster().getRegions(table.getBytes());
339    HRegion first = regions.get(0);
340    first.getScanner(scan);
341    RegionScanner scanner = first.getScanner(scan);
342    List<Cell> results = new ArrayList<>();
343    // Result result;
344    long timeBeforeScan = System.currentTimeMillis();
345    int found = 0;
346    while (scanner.next(results)) {
347      found += results.size();
348      results.clear();
349    }
350    found += results.size();
351    long scanTime = System.currentTimeMillis() - timeBeforeScan;
352    scanner.close();
353
354    LOG.info("\nscan time = " + scanTime + "ms");
355    LOG.info("found " + found + " results\n");
356
357    assertEquals(expectedSize, found);
358  }
359
360  @SuppressWarnings("deprecation")
361  @Test
362  public void testFilterList() throws Exception {
363    String cf = "f";
364    Table ht =
365        TEST_UTIL.createTable(TableName.valueOf(name.getMethodName()), Bytes.toBytes(cf), Integer.MAX_VALUE);
366
367    // 10 byte row key - (2 bytes 4 bytes 4 bytes)
368    // 4 byte qualifier
369    // 4 byte value
370
371    for (int i1 = 0; i1 < 5; i1++) {
372      for (int i2 = 0; i2 < 5; i2++) {
373        byte[] rk = new byte[10];
374
375        ByteBuffer buf = ByteBuffer.wrap(rk);
376        buf.clear();
377        buf.putShort((short) 2);
378        buf.putInt(i1);
379        buf.putInt(i2);
380
381        // Each row contains 5 columns
382        for (int c = 0; c < 5; c++) {
383          byte[] cq = new byte[4];
384          Bytes.putBytes(cq, 0, Bytes.toBytes(c), 0, 4);
385
386          Put p = new Put(rk);
387          p.setDurability(Durability.SKIP_WAL);
388          p.addColumn(cf.getBytes(), cq, Bytes.toBytes(c));
389          ht.put(p);
390          LOG.info("Inserting: rk: " + Bytes.toStringBinary(rk) + " cq: "
391              + Bytes.toStringBinary(cq));
392        }
393      }
394    }
395
396    TEST_UTIL.flush();
397
398    // test passes if we get back 5 KV's (1 row)
399    runTest(ht, 5);
400
401  }
402
403  @SuppressWarnings("unchecked")
404  private void runTest(Table hTable, int expectedSize) throws IOException {
405    // [0, 2, ?, ?, ?, ?, 0, 0, 0, 1]
406    byte[] fuzzyKey1 = new byte[10];
407    ByteBuffer buf = ByteBuffer.wrap(fuzzyKey1);
408    buf.clear();
409    buf.putShort((short) 2);
410    for (int i = 0; i < 4; i++)
411      buf.put(fuzzyValue);
412    buf.putInt((short) 1);
413    byte[] mask1 = new byte[] { 0, 0, 1, 1, 1, 1, 0, 0, 0, 0 };
414
415    byte[] fuzzyKey2 = new byte[10];
416    buf = ByteBuffer.wrap(fuzzyKey2);
417    buf.clear();
418    buf.putShort((short) 2);
419    buf.putInt((short) 2);
420    for (int i = 0; i < 4; i++)
421      buf.put(fuzzyValue);
422
423    byte[] mask2 = new byte[] { 0, 0, 0, 0, 0, 0, 1, 1, 1, 1 };
424
425    Pair<byte[], byte[]> pair1 = new Pair<>(fuzzyKey1, mask1);
426    Pair<byte[], byte[]> pair2 = new Pair<>(fuzzyKey2, mask2);
427
428    FuzzyRowFilter fuzzyRowFilter1 = new FuzzyRowFilter(Lists.newArrayList(pair1));
429    FuzzyRowFilter fuzzyRowFilter2 = new FuzzyRowFilter(Lists.newArrayList(pair2));
430    // regular test - we expect 1 row back (5 KVs)
431    runScanner(hTable, expectedSize, fuzzyRowFilter1, fuzzyRowFilter2);
432  }
433
434  private void runScanner(Table hTable, int expectedSize, Filter filter1, Filter filter2)
435      throws IOException {
436    String cf = "f";
437    Scan scan = new Scan();
438    scan.addFamily(cf.getBytes());
439    FilterList filterList = new FilterList(Operator.MUST_PASS_ALL, filter1, filter2);
440    scan.setFilter(filterList);
441
442    ResultScanner scanner = hTable.getScanner(scan);
443    List<Cell> results = new ArrayList<>();
444    Result result;
445    long timeBeforeScan = System.currentTimeMillis();
446    while ((result = scanner.next()) != null) {
447      for (Cell kv : result.listCells()) {
448        LOG.info("Got rk: " + Bytes.toStringBinary(CellUtil.cloneRow(kv)) + " cq: "
449            + Bytes.toStringBinary(CellUtil.cloneQualifier(kv)));
450        results.add(kv);
451      }
452    }
453    long scanTime = System.currentTimeMillis() - timeBeforeScan;
454    scanner.close();
455
456    LOG.info("scan time = " + scanTime + "ms");
457    LOG.info("found " + results.size() + " results");
458
459    assertEquals(expectedSize, results.size());
460  }
461}