001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.filter;
019
020import static org.junit.Assert.assertEquals;
021
022import java.io.IOException;
023import java.util.ArrayList;
024import java.util.HashMap;
025import java.util.HashSet;
026import java.util.List;
027import java.util.Map;
028import java.util.Set;
029import org.apache.hadoop.hbase.Cell;
030import org.apache.hadoop.hbase.HBaseClassTestRule;
031import org.apache.hadoop.hbase.HBaseTestingUtil;
032import org.apache.hadoop.hbase.KeyValue;
033import org.apache.hadoop.hbase.KeyValueTestUtil;
034import org.apache.hadoop.hbase.TableName;
035import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
036import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
037import org.apache.hadoop.hbase.client.Durability;
038import org.apache.hadoop.hbase.client.Put;
039import org.apache.hadoop.hbase.client.RegionInfo;
040import org.apache.hadoop.hbase.client.RegionInfoBuilder;
041import org.apache.hadoop.hbase.client.Scan;
042import org.apache.hadoop.hbase.client.TableDescriptor;
043import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
044import org.apache.hadoop.hbase.regionserver.HRegion;
045import org.apache.hadoop.hbase.regionserver.InternalScanner;
046import org.apache.hadoop.hbase.testclassification.FilterTests;
047import org.apache.hadoop.hbase.testclassification.MediumTests;
048import org.apache.hadoop.hbase.util.Bytes;
049import org.junit.ClassRule;
050import org.junit.Rule;
051import org.junit.Test;
052import org.junit.experimental.categories.Category;
053import org.junit.rules.TestName;
054
055@Category({ FilterTests.class, MediumTests.class })
056public class TestMultipleColumnPrefixFilter {
057
058  @ClassRule
059  public static final HBaseClassTestRule CLASS_RULE =
060    HBaseClassTestRule.forClass(TestMultipleColumnPrefixFilter.class);
061
062  private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
063
064  @Rule
065  public TestName name = new TestName();
066
067  @Test
068  public void testMultipleColumnPrefixFilter() throws IOException {
069    String family = "Family";
070    TableDescriptorBuilder tableDescriptorBuilder =
071      TableDescriptorBuilder.newBuilder(TableName.valueOf(name.getMethodName()));
072    ColumnFamilyDescriptor columnFamilyDescriptor =
073      ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(family)).setMaxVersions(3).build();
074    tableDescriptorBuilder.setColumnFamily(columnFamilyDescriptor);
075    TableDescriptor tableDescriptor = tableDescriptorBuilder.build();
076    // HRegionInfo info = new HRegionInfo(htd, null, null, false);
077    RegionInfo info = RegionInfoBuilder.newBuilder(tableDescriptor.getTableName()).build();
078    HRegion region = HBaseTestingUtil.createRegionAndWAL(info, TEST_UTIL.getDataTestDir(),
079      TEST_UTIL.getConfiguration(), tableDescriptor);
080
081    List<String> rows = generateRandomWords(100, "row");
082    List<String> columns = generateRandomWords(10000, "column");
083    long maxTimestamp = 2;
084
085    List<Cell> kvList = new ArrayList<>();
086
087    Map<String, List<Cell>> prefixMap = new HashMap<>();
088
089    prefixMap.put("p", new ArrayList<>());
090    prefixMap.put("q", new ArrayList<>());
091    prefixMap.put("s", new ArrayList<>());
092
093    String valueString = "ValueString";
094
095    for (String row : rows) {
096      Put p = new Put(Bytes.toBytes(row));
097      p.setDurability(Durability.SKIP_WAL);
098      for (String column : columns) {
099        for (long timestamp = 1; timestamp <= maxTimestamp; timestamp++) {
100          KeyValue kv = KeyValueTestUtil.create(row, family, column, timestamp, valueString);
101          p.add(kv);
102          kvList.add(kv);
103          for (String s : prefixMap.keySet()) {
104            if (column.startsWith(s)) {
105              prefixMap.get(s).add(kv);
106            }
107          }
108        }
109      }
110      region.put(p);
111    }
112
113    MultipleColumnPrefixFilter filter;
114    Scan scan = new Scan();
115    scan.readAllVersions();
116    byte[][] filter_prefix = new byte[2][];
117    filter_prefix[0] = new byte[] { 'p' };
118    filter_prefix[1] = new byte[] { 'q' };
119
120    filter = new MultipleColumnPrefixFilter(filter_prefix);
121    scan.setFilter(filter);
122    List<Cell> results = new ArrayList<>();
123    InternalScanner scanner = region.getScanner(scan);
124    while (scanner.next(results))
125      ;
126    assertEquals(prefixMap.get("p").size() + prefixMap.get("q").size(), results.size());
127
128    HBaseTestingUtil.closeRegionAndWAL(region);
129  }
130
131  @Test
132  public void testMultipleColumnPrefixFilterWithManyFamilies() throws IOException {
133    String family1 = "Family1";
134    String family2 = "Family2";
135    TableDescriptorBuilder tableDescriptorBuilder =
136      TableDescriptorBuilder.newBuilder(TableName.valueOf(name.getMethodName()));
137    ColumnFamilyDescriptor columnFamilyDescriptor =
138      ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(family1)).setMaxVersions(3).build();
139    tableDescriptorBuilder.setColumnFamily(columnFamilyDescriptor);
140    columnFamilyDescriptor =
141      ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(family2)).setMaxVersions(3).build();
142    tableDescriptorBuilder.setColumnFamily(columnFamilyDescriptor);
143    TableDescriptor tableDescriptor = tableDescriptorBuilder.build();
144    RegionInfo info = RegionInfoBuilder.newBuilder(tableDescriptor.getTableName()).build();
145    HRegion region = HBaseTestingUtil.createRegionAndWAL(info, TEST_UTIL.getDataTestDir(),
146      TEST_UTIL.getConfiguration(), tableDescriptor);
147
148    List<String> rows = generateRandomWords(100, "row");
149    List<String> columns = generateRandomWords(10000, "column");
150    long maxTimestamp = 3;
151
152    List<Cell> kvList = new ArrayList<>();
153
154    Map<String, List<Cell>> prefixMap = new HashMap<>();
155
156    prefixMap.put("p", new ArrayList<>());
157    prefixMap.put("q", new ArrayList<>());
158    prefixMap.put("s", new ArrayList<>());
159
160    String valueString = "ValueString";
161
162    for (String row : rows) {
163      Put p = new Put(Bytes.toBytes(row));
164      p.setDurability(Durability.SKIP_WAL);
165      for (String column : columns) {
166        for (long timestamp = 1; timestamp <= maxTimestamp; timestamp++) {
167          double rand = Math.random();
168          Cell kv;
169          if (rand < 0.5) {
170            kv = KeyValueTestUtil.create(row, family1, column, timestamp, valueString);
171          } else {
172            kv = KeyValueTestUtil.create(row, family2, column, timestamp, valueString);
173          }
174          p.add(kv);
175          kvList.add(kv);
176          for (String s : prefixMap.keySet()) {
177            if (column.startsWith(s)) {
178              prefixMap.get(s).add(kv);
179            }
180          }
181        }
182      }
183      region.put(p);
184    }
185
186    MultipleColumnPrefixFilter filter;
187    Scan scan = new Scan();
188    scan.readAllVersions();
189    byte[][] filter_prefix = new byte[2][];
190    filter_prefix[0] = new byte[] { 'p' };
191    filter_prefix[1] = new byte[] { 'q' };
192
193    filter = new MultipleColumnPrefixFilter(filter_prefix);
194    scan.setFilter(filter);
195    List<Cell> results = new ArrayList<>();
196    InternalScanner scanner = region.getScanner(scan);
197    while (scanner.next(results))
198      ;
199    assertEquals(prefixMap.get("p").size() + prefixMap.get("q").size(), results.size());
200
201    HBaseTestingUtil.closeRegionAndWAL(region);
202  }
203
204  @Test
205  public void testMultipleColumnPrefixFilterWithColumnPrefixFilter() throws IOException {
206    String family = "Family";
207    TableDescriptorBuilder tableDescriptorBuilder =
208      TableDescriptorBuilder.newBuilder(TableName.valueOf(name.getMethodName()));
209    ColumnFamilyDescriptor columnFamilyDescriptor =
210      ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(family)).build();
211    tableDescriptorBuilder.setColumnFamily(columnFamilyDescriptor);
212    TableDescriptor tableDescriptor = tableDescriptorBuilder.build();
213    RegionInfo info = RegionInfoBuilder.newBuilder(tableDescriptor.getTableName()).build();
214    HRegion region = HBaseTestingUtil.createRegionAndWAL(info, TEST_UTIL.getDataTestDir(),
215      TEST_UTIL.getConfiguration(), tableDescriptor);
216
217    List<String> rows = generateRandomWords(100, "row");
218    List<String> columns = generateRandomWords(10000, "column");
219    long maxTimestamp = 2;
220
221    String valueString = "ValueString";
222
223    for (String row : rows) {
224      Put p = new Put(Bytes.toBytes(row));
225      p.setDurability(Durability.SKIP_WAL);
226      for (String column : columns) {
227        for (long timestamp = 1; timestamp <= maxTimestamp; timestamp++) {
228          KeyValue kv = KeyValueTestUtil.create(row, family, column, timestamp, valueString);
229          p.add(kv);
230        }
231      }
232      region.put(p);
233    }
234
235    MultipleColumnPrefixFilter multiplePrefixFilter;
236    Scan scan1 = new Scan();
237    scan1.readAllVersions();
238    byte[][] filter_prefix = new byte[1][];
239    filter_prefix[0] = new byte[] { 'p' };
240
241    multiplePrefixFilter = new MultipleColumnPrefixFilter(filter_prefix);
242    scan1.setFilter(multiplePrefixFilter);
243    List<Cell> results1 = new ArrayList<>();
244    InternalScanner scanner1 = region.getScanner(scan1);
245    while (scanner1.next(results1))
246      ;
247
248    ColumnPrefixFilter singlePrefixFilter;
249    Scan scan2 = new Scan();
250    scan2.readAllVersions();
251    singlePrefixFilter = new ColumnPrefixFilter(Bytes.toBytes("p"));
252
253    scan2.setFilter(singlePrefixFilter);
254    List<Cell> results2 = new ArrayList<>();
255    InternalScanner scanner2 = region.getScanner(scan1);
256    while (scanner2.next(results2))
257      ;
258
259    assertEquals(results1.size(), results2.size());
260
261    HBaseTestingUtil.closeRegionAndWAL(region);
262  }
263
264  List<String> generateRandomWords(int numberOfWords, String suffix) {
265    Set<String> wordSet = new HashSet<>();
266    for (int i = 0; i < numberOfWords; i++) {
267      int lengthOfWords = (int) (Math.random() * 2) + 1;
268      char[] wordChar = new char[lengthOfWords];
269      for (int j = 0; j < wordChar.length; j++) {
270        wordChar[j] = (char) (Math.random() * 26 + 97);
271      }
272      String word;
273      if (suffix == null) {
274        word = new String(wordChar);
275      } else {
276        word = new String(wordChar) + suffix;
277      }
278      wordSet.add(word);
279    }
280    List<String> wordList = new ArrayList<>(wordSet);
281    return wordList;
282  }
283
284}