1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.filter;
20
21 import com.google.protobuf.InvalidProtocolBufferException;
22
23 import java.nio.charset.Charset;
24 import java.nio.charset.IllegalCharsetNameException;
25 import java.util.Arrays;
26 import java.util.regex.Pattern;
27
28 import org.apache.commons.logging.Log;
29 import org.apache.commons.logging.LogFactory;
30 import org.apache.hadoop.hbase.classification.InterfaceAudience;
31 import org.apache.hadoop.hbase.classification.InterfaceStability;
32 import org.apache.hadoop.hbase.exceptions.DeserializationException;
33 import org.apache.hadoop.hbase.protobuf.generated.ComparatorProtos;
34 import org.apache.hadoop.hbase.util.Bytes;
35
36 import org.jcodings.Encoding;
37 import org.jcodings.EncodingDB;
38 import org.jcodings.specific.UTF8Encoding;
39 import org.joni.Matcher;
40 import org.joni.Option;
41 import org.joni.Regex;
42 import org.joni.Syntax;
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74 @InterfaceAudience.Public
75 @InterfaceStability.Stable
76 public class RegexStringComparator extends ByteArrayComparable {
77
78 private static final Log LOG = LogFactory.getLog(RegexStringComparator.class);
79
80 private Engine engine;
81
82
83 @InterfaceAudience.Public
84 @InterfaceStability.Stable
85 public enum EngineType {
86 JAVA,
87 JONI
88 }
89
90
91
92
93
94
95 public RegexStringComparator(String expr) {
96 this(expr, Pattern.DOTALL);
97 }
98
99
100
101
102
103
104
105 public RegexStringComparator(String expr, EngineType engine) {
106 this(expr, Pattern.DOTALL, engine);
107 }
108
109
110
111
112
113
114 public RegexStringComparator(String expr, int flags) {
115 this(expr, flags, EngineType.JAVA);
116 }
117
118
119
120
121
122
123
124 public RegexStringComparator(String expr, int flags, EngineType engine) {
125 super(Bytes.toBytes(expr));
126 switch (engine) {
127 case JAVA:
128 this.engine = new JavaRegexEngine(expr, flags);
129 break;
130 case JONI:
131 this.engine = new JoniRegexEngine(expr, flags);
132 break;
133 }
134 }
135
136
137
138
139
140
141
142
143
144
145
146
147 public void setCharset(final Charset charset) {
148 engine.setCharset(charset.name());
149 }
150
151 @Override
152 public int compareTo(byte[] value, int offset, int length) {
153 return engine.compareTo(value, offset, length);
154 }
155
156
157
158
159 @Override
160 public byte [] toByteArray() {
161 return engine.toByteArray();
162 }
163
164
165
166
167
168
169
170 public static RegexStringComparator parseFrom(final byte [] pbBytes)
171 throws DeserializationException {
172 ComparatorProtos.RegexStringComparator proto;
173 try {
174 proto = ComparatorProtos.RegexStringComparator.parseFrom(pbBytes);
175 } catch (InvalidProtocolBufferException e) {
176 throw new DeserializationException(e);
177 }
178 RegexStringComparator comparator;
179 if (proto.hasEngine()) {
180 EngineType engine = EngineType.valueOf(proto.getEngine());
181 comparator = new RegexStringComparator(proto.getPattern(), proto.getPatternFlags(),
182 engine);
183 } else {
184 comparator = new RegexStringComparator(proto.getPattern(), proto.getPatternFlags());
185 }
186 String charset = proto.getCharset();
187 if (charset.length() > 0) {
188 try {
189 comparator.getEngine().setCharset(charset);
190 } catch (IllegalCharsetNameException e) {
191 LOG.error("invalid charset", e);
192 }
193 }
194 return comparator;
195 }
196
197
198
199
200
201
202 @Override
203 boolean areSerializedFieldsEqual(ByteArrayComparable other) {
204 if (other == this) return true;
205 if (!(other instanceof RegexStringComparator)) return false;
206 RegexStringComparator comparator = (RegexStringComparator)other;
207 return super.areSerializedFieldsEqual(comparator)
208 && engine.getClass().isInstance(comparator.getEngine())
209 && engine.getPattern().equals(comparator.getEngine().getPattern())
210 && engine.getFlags() == comparator.getEngine().getFlags()
211 && engine.getCharset().equals(comparator.getEngine().getCharset());
212 }
213
214 Engine getEngine() {
215 return engine;
216 }
217
218
219
220
221
222 static interface Engine {
223
224
225
226
227 String getPattern();
228
229
230
231
232
233 int getFlags();
234
235
236
237
238 String getCharset();
239
240
241
242
243
244 void setCharset(final String charset);
245
246
247
248
249 byte [] toByteArray();
250
251
252
253
254
255
256
257
258 int compareTo(byte[] value, int offset, int length);
259 }
260
261
262
263
264
265
266 static class JavaRegexEngine implements Engine {
267 private Charset charset = Charset.forName("UTF-8");
268 private Pattern pattern;
269
270 public JavaRegexEngine(String regex, int flags) {
271 this.pattern = Pattern.compile(regex, flags);
272 }
273
274 @Override
275 public String getPattern() {
276 return pattern.toString();
277 }
278
279 @Override
280 public int getFlags() {
281 return pattern.flags();
282 }
283
284 @Override
285 public String getCharset() {
286 return charset.name();
287 }
288
289 @Override
290 public void setCharset(String charset) {
291 this.charset = Charset.forName(charset);
292 }
293
294 @Override
295 public int compareTo(byte[] value, int offset, int length) {
296
297
298 String tmp;
299 if (length < value.length / 2) {
300
301
302 tmp = new String(Arrays.copyOfRange(value, offset, offset + length), charset);
303 } else {
304 tmp = new String(value, offset, length, charset);
305 }
306 return pattern.matcher(tmp).find() ? 0 : 1;
307 }
308
309 @Override
310 public byte[] toByteArray() {
311 ComparatorProtos.RegexStringComparator.Builder builder =
312 ComparatorProtos.RegexStringComparator.newBuilder();
313 builder.setPattern(pattern.pattern());
314 builder.setPatternFlags(pattern.flags());
315 builder.setCharset(charset.name());
316 builder.setEngine(EngineType.JAVA.name());
317 return builder.build().toByteArray();
318 }
319 }
320
321
322
323
324
325
326
327
328
329
330 static class JoniRegexEngine implements Engine {
331 private Encoding encoding = UTF8Encoding.INSTANCE;
332 private String regex;
333 private Regex pattern;
334
335 public JoniRegexEngine(String regex, int flags) {
336 this.regex = regex;
337 byte[] b = Bytes.toBytes(regex);
338 this.pattern = new Regex(b, 0, b.length, patternToJoniFlags(flags), encoding, Syntax.Java);
339 }
340
341 @Override
342 public String getPattern() {
343 return regex;
344 }
345
346 @Override
347 public int getFlags() {
348 return pattern.getOptions();
349 }
350
351 @Override
352 public String getCharset() {
353 return encoding.getCharsetName();
354 }
355
356 @Override
357 public void setCharset(String name) {
358 setEncoding(name);
359 }
360
361 @Override
362 public int compareTo(byte[] value, int offset, int length) {
363
364
365 Matcher m = pattern.matcher(value);
366 return m.search(offset, length, pattern.getOptions()) < 0 ? 1 : 0;
367 }
368
369 @Override
370 public byte[] toByteArray() {
371 ComparatorProtos.RegexStringComparator.Builder builder =
372 ComparatorProtos.RegexStringComparator.newBuilder();
373 builder.setPattern(regex);
374 builder.setPatternFlags(joniToPatternFlags(pattern.getOptions()));
375 builder.setCharset(encoding.getCharsetName());
376 builder.setEngine(EngineType.JONI.name());
377 return builder.build().toByteArray();
378 }
379
380 private int patternToJoniFlags(int flags) {
381 int newFlags = 0;
382 if ((flags & Pattern.CASE_INSENSITIVE) != 0) {
383 newFlags |= Option.IGNORECASE;
384 }
385 if ((flags & Pattern.DOTALL) != 0) {
386
387 newFlags |= Option.MULTILINE;
388 }
389 if ((flags & Pattern.MULTILINE) != 0) {
390
391
392 newFlags &= ~Option.SINGLELINE;
393 newFlags |= Option.NEGATE_SINGLELINE;
394 }
395 return newFlags;
396 }
397
398 private int joniToPatternFlags(int flags) {
399 int newFlags = 0;
400 if ((flags & Option.IGNORECASE) != 0) {
401 newFlags |= Pattern.CASE_INSENSITIVE;
402 }
403
404 if ((flags & Option.MULTILINE) != 0) {
405 newFlags |= Pattern.DOTALL;
406 }
407
408 if ((flags & Option.NEGATE_SINGLELINE) != 0) {
409 newFlags |= Pattern.MULTILINE;
410 }
411 return newFlags;
412 }
413
414 private void setEncoding(String name) {
415 EncodingDB.Entry e = EncodingDB.getEncodings().get(Bytes.toBytes(name));
416 if (e != null) {
417 encoding = e.getEncoding();
418 } else {
419 throw new IllegalCharsetNameException(name);
420 }
421 }
422 }
423 }