001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.filter;
019
020import static org.junit.Assert.assertEquals;
021
022import java.io.IOException;
023import java.util.ArrayList;
024import java.util.HashMap;
025import java.util.HashSet;
026import java.util.List;
027import java.util.Map;
028import java.util.Set;
029import org.apache.hadoop.hbase.Cell;
030import org.apache.hadoop.hbase.HBaseClassTestRule;
031import org.apache.hadoop.hbase.HBaseTestingUtil;
032import org.apache.hadoop.hbase.KeyValue;
033import org.apache.hadoop.hbase.KeyValueTestUtil;
034import org.apache.hadoop.hbase.TableName;
035import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
036import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
037import org.apache.hadoop.hbase.client.Durability;
038import org.apache.hadoop.hbase.client.Put;
039import org.apache.hadoop.hbase.client.RegionInfo;
040import org.apache.hadoop.hbase.client.RegionInfoBuilder;
041import org.apache.hadoop.hbase.client.Scan;
042import org.apache.hadoop.hbase.client.TableDescriptor;
043import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
044import org.apache.hadoop.hbase.regionserver.HRegion;
045import org.apache.hadoop.hbase.regionserver.InternalScanner;
046import org.apache.hadoop.hbase.testclassification.FilterTests;
047import org.apache.hadoop.hbase.testclassification.MediumTests;
048import org.apache.hadoop.hbase.util.Bytes;
049import org.junit.ClassRule;
050import org.junit.Rule;
051import org.junit.Test;
052import org.junit.experimental.categories.Category;
053import org.junit.rules.TestName;
054
055@Category({FilterTests.class, MediumTests.class})
056public class TestMultipleColumnPrefixFilter {
057
058  @ClassRule
059  public static final HBaseClassTestRule CLASS_RULE =
060      HBaseClassTestRule.forClass(TestMultipleColumnPrefixFilter.class);
061
062  private final static HBaseTestingUtil TEST_UTIL = new
063      HBaseTestingUtil();
064
065  @Rule
066  public TestName name = new TestName();
067
068  @Test
069  public void testMultipleColumnPrefixFilter() throws IOException {
070    String family = "Family";
071    TableDescriptorBuilder tableDescriptorBuilder =
072      TableDescriptorBuilder.newBuilder(TableName.valueOf(name.getMethodName()));
073    ColumnFamilyDescriptor columnFamilyDescriptor =
074      ColumnFamilyDescriptorBuilder
075        .newBuilder(Bytes.toBytes(family))
076        .setMaxVersions(3)
077        .build();
078    tableDescriptorBuilder.setColumnFamily(columnFamilyDescriptor);
079    TableDescriptor tableDescriptor = tableDescriptorBuilder.build();
080    // HRegionInfo info = new HRegionInfo(htd, null, null, false);
081    RegionInfo info = RegionInfoBuilder.newBuilder(tableDescriptor.getTableName()).build();
082    HRegion region = HBaseTestingUtil.createRegionAndWAL(info, TEST_UTIL.
083        getDataTestDir(), TEST_UTIL.getConfiguration(), tableDescriptor);
084
085    List<String> rows = generateRandomWords(100, "row");
086    List<String> columns = generateRandomWords(10000, "column");
087    long maxTimestamp = 2;
088
089    List<Cell> kvList = new ArrayList<>();
090
091    Map<String, List<Cell>> prefixMap = new HashMap<>();
092
093    prefixMap.put("p", new ArrayList<>());
094    prefixMap.put("q", new ArrayList<>());
095    prefixMap.put("s", new ArrayList<>());
096
097    String valueString = "ValueString";
098
099    for (String row: rows) {
100      Put p = new Put(Bytes.toBytes(row));
101      p.setDurability(Durability.SKIP_WAL);
102      for (String column: columns) {
103        for (long timestamp = 1; timestamp <= maxTimestamp; timestamp++) {
104          KeyValue kv = KeyValueTestUtil.create(row, family, column, timestamp,
105              valueString);
106          p.add(kv);
107          kvList.add(kv);
108          for (String s: prefixMap.keySet()) {
109            if (column.startsWith(s)) {
110              prefixMap.get(s).add(kv);
111            }
112          }
113        }
114      }
115      region.put(p);
116    }
117
118    MultipleColumnPrefixFilter filter;
119    Scan scan = new Scan();
120    scan.readAllVersions();
121    byte [][] filter_prefix = new byte [2][];
122    filter_prefix[0] = new byte [] {'p'};
123    filter_prefix[1] = new byte [] {'q'};
124
125    filter = new MultipleColumnPrefixFilter(filter_prefix);
126    scan.setFilter(filter);
127    List<Cell> results = new ArrayList<>();
128    InternalScanner scanner = region.getScanner(scan);
129    while (scanner.next(results))
130      ;
131    assertEquals(prefixMap.get("p").size() + prefixMap.get("q").size(), results.size());
132
133    HBaseTestingUtil.closeRegionAndWAL(region);
134  }
135
136  @Test
137  public void testMultipleColumnPrefixFilterWithManyFamilies() throws IOException {
138    String family1 = "Family1";
139    String family2 = "Family2";
140    TableDescriptorBuilder tableDescriptorBuilder =
141      TableDescriptorBuilder.newBuilder(TableName.valueOf(name.getMethodName()));
142    ColumnFamilyDescriptor columnFamilyDescriptor =
143      ColumnFamilyDescriptorBuilder
144        .newBuilder(Bytes.toBytes(family1))
145        .setMaxVersions(3)
146        .build();
147    tableDescriptorBuilder.setColumnFamily(columnFamilyDescriptor);
148    columnFamilyDescriptor = ColumnFamilyDescriptorBuilder
149      .newBuilder(Bytes.toBytes(family2))
150      .setMaxVersions(3)
151      .build();
152    tableDescriptorBuilder.setColumnFamily(columnFamilyDescriptor);
153    TableDescriptor tableDescriptor = tableDescriptorBuilder.build();
154    RegionInfo info = RegionInfoBuilder.newBuilder(tableDescriptor.getTableName()).build();
155    HRegion region = HBaseTestingUtil.createRegionAndWAL(info, TEST_UTIL.
156      getDataTestDir(), TEST_UTIL.getConfiguration(), tableDescriptor);
157
158    List<String> rows = generateRandomWords(100, "row");
159    List<String> columns = generateRandomWords(10000, "column");
160    long maxTimestamp = 3;
161
162    List<Cell> kvList = new ArrayList<>();
163
164    Map<String, List<Cell>> prefixMap = new HashMap<>();
165
166    prefixMap.put("p", new ArrayList<>());
167    prefixMap.put("q", new ArrayList<>());
168    prefixMap.put("s", new ArrayList<>());
169
170    String valueString = "ValueString";
171
172    for (String row: rows) {
173      Put p = new Put(Bytes.toBytes(row));
174      p.setDurability(Durability.SKIP_WAL);
175      for (String column: columns) {
176        for (long timestamp = 1; timestamp <= maxTimestamp; timestamp++) {
177          double rand = Math.random();
178          Cell kv;
179          if (rand < 0.5) {
180            kv = KeyValueTestUtil.create(row, family1, column, timestamp, valueString);
181          } else {
182            kv = KeyValueTestUtil.create(row, family2, column, timestamp, valueString);
183          }
184          p.add(kv);
185          kvList.add(kv);
186          for (String s: prefixMap.keySet()) {
187            if (column.startsWith(s)) {
188              prefixMap.get(s).add(kv);
189            }
190          }
191        }
192      }
193      region.put(p);
194    }
195
196    MultipleColumnPrefixFilter filter;
197    Scan scan = new Scan();
198    scan.readAllVersions();
199    byte [][] filter_prefix = new byte [2][];
200    filter_prefix[0] = new byte [] {'p'};
201    filter_prefix[1] = new byte [] {'q'};
202
203    filter = new MultipleColumnPrefixFilter(filter_prefix);
204    scan.setFilter(filter);
205    List<Cell> results = new ArrayList<>();
206    InternalScanner scanner = region.getScanner(scan);
207    while (scanner.next(results))
208      ;
209    assertEquals(prefixMap.get("p").size() + prefixMap.get("q").size(), results.size());
210
211    HBaseTestingUtil.closeRegionAndWAL(region);
212  }
213
214  @Test
215  public void testMultipleColumnPrefixFilterWithColumnPrefixFilter() throws IOException {
216    String family = "Family";
217    TableDescriptorBuilder tableDescriptorBuilder =
218      TableDescriptorBuilder.newBuilder(TableName.valueOf(name.getMethodName()));
219    ColumnFamilyDescriptor columnFamilyDescriptor =
220      ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(family)).build();
221    tableDescriptorBuilder.setColumnFamily(columnFamilyDescriptor);
222    TableDescriptor tableDescriptor = tableDescriptorBuilder.build();
223    RegionInfo info = RegionInfoBuilder.newBuilder(tableDescriptor.getTableName()).build();
224    HRegion region = HBaseTestingUtil.createRegionAndWAL(info, TEST_UTIL.
225      getDataTestDir(), TEST_UTIL.getConfiguration(), tableDescriptor);
226
227    List<String> rows = generateRandomWords(100, "row");
228    List<String> columns = generateRandomWords(10000, "column");
229    long maxTimestamp = 2;
230
231    String valueString = "ValueString";
232
233    for (String row: rows) {
234      Put p = new Put(Bytes.toBytes(row));
235      p.setDurability(Durability.SKIP_WAL);
236      for (String column: columns) {
237        for (long timestamp = 1; timestamp <= maxTimestamp; timestamp++) {
238          KeyValue kv = KeyValueTestUtil.create(row, family, column, timestamp,
239              valueString);
240          p.add(kv);
241        }
242      }
243      region.put(p);
244    }
245
246    MultipleColumnPrefixFilter multiplePrefixFilter;
247    Scan scan1 = new Scan();
248    scan1.readAllVersions();
249    byte [][] filter_prefix = new byte [1][];
250    filter_prefix[0] = new byte [] {'p'};
251
252    multiplePrefixFilter = new MultipleColumnPrefixFilter(filter_prefix);
253    scan1.setFilter(multiplePrefixFilter);
254    List<Cell> results1 = new ArrayList<>();
255    InternalScanner scanner1 = region.getScanner(scan1);
256    while (scanner1.next(results1))
257      ;
258
259    ColumnPrefixFilter singlePrefixFilter;
260    Scan scan2 = new Scan();
261    scan2.readAllVersions();
262    singlePrefixFilter = new ColumnPrefixFilter(Bytes.toBytes("p"));
263
264    scan2.setFilter(singlePrefixFilter);
265    List<Cell> results2 = new ArrayList<>();
266    InternalScanner scanner2 = region.getScanner(scan1);
267    while (scanner2.next(results2))
268      ;
269
270    assertEquals(results1.size(), results2.size());
271
272    HBaseTestingUtil.closeRegionAndWAL(region);
273  }
274
275  List<String> generateRandomWords(int numberOfWords, String suffix) {
276    Set<String> wordSet = new HashSet<>();
277    for (int i = 0; i < numberOfWords; i++) {
278      int lengthOfWords = (int) (Math.random()*2) + 1;
279      char[] wordChar = new char[lengthOfWords];
280      for (int j = 0; j < wordChar.length; j++) {
281        wordChar[j] = (char) (Math.random() * 26 + 97);
282      }
283      String word;
284      if (suffix == null) {
285        word = new String(wordChar);
286      } else {
287        word = new String(wordChar) + suffix;
288      }
289      wordSet.add(word);
290    }
291    List<String> wordList = new ArrayList<>(wordSet);
292    return wordList;
293  }
294
295}
296
297