001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.filter; 019 020import static org.junit.Assert.*; 021 022import java.util.regex.Pattern; 023import org.apache.hadoop.hbase.HBaseClassTestRule; 024import org.apache.hadoop.hbase.filter.RegexStringComparator.EngineType; 025import org.apache.hadoop.hbase.testclassification.FilterTests; 026import org.apache.hadoop.hbase.testclassification.SmallTests; 027import org.apache.hadoop.hbase.util.Bytes; 028import org.junit.ClassRule; 029import org.junit.Test; 030import org.junit.experimental.categories.Category; 031 032@Category({ FilterTests.class, SmallTests.class }) 033public class TestRegexComparator { 034 035 @ClassRule 036 public static final HBaseClassTestRule CLASS_RULE = 037 HBaseClassTestRule.forClass(TestRegexComparator.class); 038 039 @Test 040 public void testSerialization() throws Exception { 041 // Default engine is the Java engine 042 RegexStringComparator a = new RegexStringComparator("a|b"); 043 RegexStringComparator b = RegexStringComparator.parseFrom(a.toByteArray()); 044 assertTrue(a.areSerializedFieldsEqual(b)); 045 assertTrue(b.getEngine() instanceof RegexStringComparator.JavaRegexEngine); 046 047 // joni engine 048 a = new RegexStringComparator("a|b", EngineType.JONI); 049 b = RegexStringComparator.parseFrom(a.toByteArray()); 050 assertTrue(a.areSerializedFieldsEqual(b)); 051 assertTrue(b.getEngine() instanceof RegexStringComparator.JoniRegexEngine); 052 } 053 054 @Test 055 public void testJavaEngine() throws Exception { 056 for (TestCase t : TEST_CASES) { 057 boolean result = new RegexStringComparator(t.regex, t.flags, EngineType.JAVA) 058 .compareTo(Bytes.toBytes(t.haystack)) == 0; 059 assertEquals("Regex '" + t.regex + "' failed test '" + t.haystack + "'", result, t.expected); 060 } 061 } 062 063 @Test 064 public void testJoniEngine() throws Exception { 065 for (TestCase t : TEST_CASES) { 066 boolean result = new RegexStringComparator(t.regex, t.flags, EngineType.JONI) 067 .compareTo(Bytes.toBytes(t.haystack)) == 0; 068 assertEquals("Regex '" + t.regex + "' failed test '" + t.haystack + "'", result, t.expected); 069 } 070 } 071 072 private static class TestCase { 073 String regex; 074 String haystack; 075 int flags; 076 boolean expected; 077 078 public TestCase(String regex, String haystack, boolean expected) { 079 this(regex, Pattern.DOTALL, haystack, expected); 080 } 081 082 public TestCase(String regex, int flags, String haystack, boolean expected) { 083 this.regex = regex; 084 this.flags = flags; 085 this.haystack = haystack; 086 this.expected = expected; 087 } 088 } 089 090 // These are a subset of the regex tests from OpenJDK 7 091 private static TestCase TEST_CASES[] = { new TestCase("a|b", "a", true), 092 new TestCase("a|b", "b", true), new TestCase("a|b", Pattern.CASE_INSENSITIVE, "A", true), 093 new TestCase("a|b", Pattern.CASE_INSENSITIVE, "B", true), new TestCase("a|b", "z", false), 094 new TestCase("a|b|cd", "cd", true), new TestCase("z(a|ac)b", "zacb", true), 095 new TestCase("[abc]+", "ababab", true), new TestCase("[abc]+", "defg", false), 096 new TestCase("[abc]+[def]+[ghi]+", "zzzaaddggzzz", true), 097 new TestCase("[a-\\u4444]+", "za-9z", true), new TestCase("[^abc]+", "ababab", false), 098 new TestCase("[^abc]+", "aaabbbcccdefg", true), new TestCase("[abc^b]", "b", true), 099 new TestCase("[abc[def]]", "b", true), new TestCase("[abc[def]]", "e", true), 100 new TestCase("[a-c[d-f[g-i]]]", "h", true), new TestCase("[a-c[d-f[g-i]]m]", "m", true), 101 new TestCase("[a-c&&[d-f]]", "a", false), new TestCase("[a-c&&[d-f]]", "z", false), 102 new TestCase("[a-m&&m-z&&a-c]", "m", false), new TestCase("[a-m&&m-z&&a-z]", "m", true), 103 new TestCase("[[a-m]&&[^a-c]]", "a", false), new TestCase("[[a-m]&&[^a-c]]", "d", true), 104 new TestCase("[[a-c][d-f]&&abc[def]]", "e", true), 105 new TestCase("[[a-c]&&[b-d]&&[c-e]]", "c", true), 106 new TestCase("[[a-c]&&[b-d][c-e]&&[u-z]]", "c", false), 107 new TestCase("[[a]&&[b][c][a]&&[^d]]", "a", true), 108 new TestCase("[[a]&&[b][c][a]&&[^d]]", "d", false), 109 new TestCase("[[[a-d]&&[c-f]]&&[c]&&c&&[cde]]", "c", true), 110 new TestCase("[x[[wz]abc&&bcd[z]]&&[u-z]]", "z", true), new TestCase("a.c.+", "a#c%&", true), 111 new TestCase("ab.", "ab\n", true), new TestCase("(?s)ab.", "ab\n", true), 112 new TestCase("ab\\wc", "abcc", true), new TestCase("\\W\\w\\W", "#r#", true), 113 new TestCase("\\W\\w\\W", "rrrr#ggg", false), new TestCase("abc[\\sdef]*", "abc def", true), 114 new TestCase("abc[\\sy-z]*", "abc y z", true), 115 new TestCase("abc[a-d\\sm-p]*", "abcaa mn p", true), 116 new TestCase("\\s\\s\\s", "blah err", false), new TestCase("\\S\\S\\s", "blah err", true), 117 new TestCase("ab\\dc", "ab9c", true), new TestCase("\\d\\d\\d", "blah45", false), 118 new TestCase("^abc", "abcdef", true), new TestCase("^abc", "bcdabc", false), 119 new TestCase("^(a)?a", "a", true), new TestCase("^(aa(bb)?)+$", "aabbaa", true), 120 new TestCase("((a|b)?b)+", "b", true), new TestCase("^(a(b)?)+$", "aba", true), 121 new TestCase("^(a(b(c)?)?)?abc", "abc", true), new TestCase("^(a(b(c))).*", "abc", true), 122 new TestCase("a?b", "aaaab", true), new TestCase("a?b", "aaacc", false), 123 new TestCase("a??b", "aaaab", true), new TestCase("a??b", "aaacc", false), 124 new TestCase("a?+b", "aaaab", true), new TestCase("a?+b", "aaacc", false), 125 new TestCase("a+b", "aaaab", true), new TestCase("a+b", "aaacc", false), 126 new TestCase("a+?b", "aaaab", true), new TestCase("a+?b", "aaacc", false), 127 new TestCase("a++b", "aaaab", true), new TestCase("a++b", "aaacc", false), 128 new TestCase("a{2,3}", "a", false), new TestCase("a{2,3}", "aa", true), 129 new TestCase("a{2,3}", "aaa", true), new TestCase("a{3,}", "zzzaaaazzz", true), 130 new TestCase("a{3,}", "zzzaazzz", false), new TestCase("abc(?=d)", "zzzabcd", true), 131 new TestCase("abc(?=d)", "zzzabced", false), new TestCase("abc(?!d)", "zzabcd", false), 132 new TestCase("abc(?!d)", "zzabced", true), new TestCase("\\w(?<=a)", "###abc###", true), 133 new TestCase("\\w(?<=a)", "###ert###", false), new TestCase("(?<!a)c", "bc", true), 134 new TestCase("(?<!a)c", "ac", false), new TestCase("(a+b)+", "ababab", true), 135 new TestCase("(a+b)+", "accccd", false), new TestCase("(ab)+", "ababab", true), 136 new TestCase("(ab)+", "accccd", false), new TestCase("(ab)(cd*)", "zzzabczzz", true), 137 new TestCase("abc(d)*abc", "abcdddddabc", true), new TestCase("a*b", "aaaab", true), 138 new TestCase("a*b", "b", true), new TestCase("a*b", "aaaac", false), 139 new TestCase(".*?b", "aaaab", true), new TestCase("a*+b", "aaaab", true), 140 new TestCase("a*+b", "b", true), new TestCase("a*+b", "aaaac", false), 141 new TestCase("(?i)foobar", "fOobAr", true), new TestCase("f(?i)oobar", "fOobAr", true), 142 new TestCase("f(?i)oobar", "FOobAr", false), new TestCase("foo(?i)bar", "fOobAr", false), 143 new TestCase("(?i)foo[bar]+", "foObAr", true), new TestCase("(?i)foo[a-r]+", "foObAr", true), 144 new TestCase("abc(?x)blah", "abcblah", true), new TestCase("abc(?x) blah", "abcblah", true), 145 new TestCase("abc(?x) blah blech", "abcblahblech", true), new TestCase("[\\n-#]", "!", true), 146 new TestCase("[\\n-#]", "-", false), new TestCase("[\\043]+", "blahblah#blech", true), 147 new TestCase("[\\042-\\044]+", "blahblah#blech", true), 148 new TestCase("[\\u1234-\\u1236]", "blahblah\u1235blech", true), 149 new TestCase("[^\043]*", "blahblah#blech", true), new TestCase("(|f)?+", "foo", true), }; 150}