001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.filter; 019 020import java.lang.reflect.InvocationTargetException; 021import java.lang.reflect.Method; 022import java.nio.ByteBuffer; 023import java.nio.charset.CharacterCodingException; 024import java.nio.charset.StandardCharsets; 025import java.util.ArrayList; 026import java.util.Collections; 027import java.util.EmptyStackException; 028import java.util.HashMap; 029import java.util.Map; 030import java.util.Set; 031import java.util.Stack; 032import java.util.regex.Pattern; 033import org.apache.hadoop.hbase.CompareOperator; 034import org.apache.hadoop.hbase.util.Bytes; 035import org.apache.yetus.audience.InterfaceAudience; 036import org.slf4j.Logger; 037import org.slf4j.LoggerFactory; 038 039/** 040 * This class allows a user to specify a filter via a string The string is parsed using the methods 041 * of this class and a filter object is constructed. This filter object is then wrapped in a scanner 042 * object which is then returned 043 * <p> 044 * This class addresses the HBASE-4168 JIRA. More documentation on this Filter Language can be found 045 * at: https://issues.apache.org/jira/browse/HBASE-4176 046 */ 047@InterfaceAudience.Public 048public class ParseFilter { 049 private static final Logger LOG = LoggerFactory.getLogger(ParseFilter.class); 050 051 private static HashMap<ByteBuffer, Integer> operatorPrecedenceHashMap; 052 private static HashMap<String, String> filterHashMap; 053 054 static { 055 // Registers all the filter supported by the Filter Language 056 filterHashMap = new HashMap<>(); 057 filterHashMap.put("KeyOnlyFilter", ParseConstants.FILTER_PACKAGE + "." + "KeyOnlyFilter"); 058 filterHashMap.put("FirstKeyOnlyFilter", 059 ParseConstants.FILTER_PACKAGE + "." + "FirstKeyOnlyFilter"); 060 filterHashMap.put("PrefixFilter", ParseConstants.FILTER_PACKAGE + "." + "PrefixFilter"); 061 filterHashMap.put("ColumnPrefixFilter", 062 ParseConstants.FILTER_PACKAGE + "." + "ColumnPrefixFilter"); 063 filterHashMap.put("MultipleColumnPrefixFilter", 064 ParseConstants.FILTER_PACKAGE + "." + "MultipleColumnPrefixFilter"); 065 filterHashMap.put("ColumnCountGetFilter", 066 ParseConstants.FILTER_PACKAGE + "." + "ColumnCountGetFilter"); 067 filterHashMap.put("PageFilter", ParseConstants.FILTER_PACKAGE + "." + "PageFilter"); 068 filterHashMap.put("ColumnPaginationFilter", 069 ParseConstants.FILTER_PACKAGE + "." + "ColumnPaginationFilter"); 070 filterHashMap.put("InclusiveStopFilter", 071 ParseConstants.FILTER_PACKAGE + "." + "InclusiveStopFilter"); 072 filterHashMap.put("TimestampsFilter", ParseConstants.FILTER_PACKAGE + "." + "TimestampsFilter"); 073 filterHashMap.put("RowFilter", ParseConstants.FILTER_PACKAGE + "." + "RowFilter"); 074 filterHashMap.put("FamilyFilter", ParseConstants.FILTER_PACKAGE + "." + "FamilyFilter"); 075 filterHashMap.put("QualifierFilter", ParseConstants.FILTER_PACKAGE + "." + "QualifierFilter"); 076 filterHashMap.put("ValueFilter", ParseConstants.FILTER_PACKAGE + "." + "ValueFilter"); 077 filterHashMap.put("ColumnRangeFilter", 078 ParseConstants.FILTER_PACKAGE + "." + "ColumnRangeFilter"); 079 filterHashMap.put("SingleColumnValueFilter", 080 ParseConstants.FILTER_PACKAGE + "." + "SingleColumnValueFilter"); 081 filterHashMap.put("SingleColumnValueExcludeFilter", 082 ParseConstants.FILTER_PACKAGE + "." + "SingleColumnValueExcludeFilter"); 083 filterHashMap.put("DependentColumnFilter", 084 ParseConstants.FILTER_PACKAGE + "." + "DependentColumnFilter"); 085 filterHashMap.put("ColumnValueFilter", 086 ParseConstants.FILTER_PACKAGE + "." + "ColumnValueFilter"); 087 088 // Creates the operatorPrecedenceHashMap 089 operatorPrecedenceHashMap = new HashMap<>(); 090 operatorPrecedenceHashMap.put(ParseConstants.SKIP_BUFFER, 1); 091 operatorPrecedenceHashMap.put(ParseConstants.WHILE_BUFFER, 1); 092 operatorPrecedenceHashMap.put(ParseConstants.AND_BUFFER, 2); 093 operatorPrecedenceHashMap.put(ParseConstants.OR_BUFFER, 3); 094 } 095 096 /** 097 * Parses the filterString and constructs a filter using it 098 * <p> 099 * @param filterString filter string given by the user 100 * @return filter object we constructed 101 */ 102 public Filter parseFilterString(String filterString) throws CharacterCodingException { 103 return parseFilterString(Bytes.toBytes(filterString)); 104 } 105 106 /** 107 * Parses the filterString and constructs a filter using it 108 * <p> 109 * @param filterStringAsByteArray filter string given by the user 110 * @return filter object we constructed 111 */ 112 public Filter parseFilterString(byte[] filterStringAsByteArray) throws CharacterCodingException { 113 // stack for the operators and parenthesis 114 Stack<ByteBuffer> operatorStack = new Stack<>(); 115 // stack for the filter objects 116 Stack<Filter> filterStack = new Stack<>(); 117 118 Filter filter = null; 119 for (int i = 0; i < filterStringAsByteArray.length; i++) { 120 if (filterStringAsByteArray[i] == ParseConstants.LPAREN) { 121 // LPAREN found 122 operatorStack.push(ParseConstants.LPAREN_BUFFER); 123 } else if ( 124 filterStringAsByteArray[i] == ParseConstants.WHITESPACE 125 || filterStringAsByteArray[i] == ParseConstants.TAB 126 ) { 127 // WHITESPACE or TAB found 128 continue; 129 } else if (checkForOr(filterStringAsByteArray, i)) { 130 // OR found 131 i += ParseConstants.OR_ARRAY.length - 1; 132 reduce(operatorStack, filterStack, ParseConstants.OR_BUFFER); 133 operatorStack.push(ParseConstants.OR_BUFFER); 134 } else if (checkForAnd(filterStringAsByteArray, i)) { 135 // AND found 136 i += ParseConstants.AND_ARRAY.length - 1; 137 reduce(operatorStack, filterStack, ParseConstants.AND_BUFFER); 138 operatorStack.push(ParseConstants.AND_BUFFER); 139 } else if (checkForSkip(filterStringAsByteArray, i)) { 140 // SKIP found 141 i += ParseConstants.SKIP_ARRAY.length - 1; 142 reduce(operatorStack, filterStack, ParseConstants.SKIP_BUFFER); 143 operatorStack.push(ParseConstants.SKIP_BUFFER); 144 } else if (checkForWhile(filterStringAsByteArray, i)) { 145 // WHILE found 146 i += ParseConstants.WHILE_ARRAY.length - 1; 147 reduce(operatorStack, filterStack, ParseConstants.WHILE_BUFFER); 148 operatorStack.push(ParseConstants.WHILE_BUFFER); 149 } else if (filterStringAsByteArray[i] == ParseConstants.RPAREN) { 150 // RPAREN found 151 if (operatorStack.empty()) { 152 throw new IllegalArgumentException("Mismatched parenthesis"); 153 } 154 ByteBuffer argumentOnTopOfStack = operatorStack.peek(); 155 if (argumentOnTopOfStack.equals(ParseConstants.LPAREN_BUFFER)) { 156 operatorStack.pop(); 157 continue; 158 } 159 while (!argumentOnTopOfStack.equals(ParseConstants.LPAREN_BUFFER)) { 160 filterStack.push(popArguments(operatorStack, filterStack)); 161 if (operatorStack.empty()) { 162 throw new IllegalArgumentException("Mismatched parenthesis"); 163 } 164 argumentOnTopOfStack = operatorStack.pop(); 165 } 166 } else { 167 // SimpleFilterExpression found 168 byte[] filterSimpleExpression = extractFilterSimpleExpression(filterStringAsByteArray, i); 169 i += (filterSimpleExpression.length - 1); 170 filter = parseSimpleFilterExpression(filterSimpleExpression); 171 filterStack.push(filter); 172 } 173 } 174 175 // Finished parsing filterString 176 while (!operatorStack.empty()) { 177 filterStack.push(popArguments(operatorStack, filterStack)); 178 } 179 if (filterStack.empty()) { 180 throw new IllegalArgumentException("Incorrect Filter String"); 181 } 182 filter = filterStack.pop(); 183 if (!filterStack.empty()) { 184 throw new IllegalArgumentException("Incorrect Filter String"); 185 } 186 return filter; 187 } 188 189 /** 190 * Extracts a simple filter expression from the filter string given by the user 191 * <p> 192 * A simpleFilterExpression is of the form: FilterName('arg', 'arg', 'arg') The user given filter 193 * string can have many simpleFilterExpressions combined using operators. 194 * <p> 195 * This function extracts a simpleFilterExpression from the larger filterString given the start 196 * offset of the simpler expression 197 * <p> 198 * @param filterStringAsByteArray filter string given by the user 199 * @param filterExpressionStartOffset start index of the simple filter expression 200 * @return byte array containing the simple filter expression 201 */ 202 public byte[] extractFilterSimpleExpression(byte[] filterStringAsByteArray, 203 int filterExpressionStartOffset) throws CharacterCodingException { 204 int quoteCount = 0; 205 for (int i = filterExpressionStartOffset; i < filterStringAsByteArray.length; i++) { 206 if (filterStringAsByteArray[i] == ParseConstants.SINGLE_QUOTE) { 207 if (isQuoteUnescaped(filterStringAsByteArray, i)) { 208 quoteCount++; 209 } else { 210 // To skip the next quote that has been escaped 211 i++; 212 } 213 } 214 if (filterStringAsByteArray[i] == ParseConstants.RPAREN && (quoteCount % 2) == 0) { 215 byte[] filterSimpleExpression = new byte[i - filterExpressionStartOffset + 1]; 216 Bytes.putBytes(filterSimpleExpression, 0, filterStringAsByteArray, 217 filterExpressionStartOffset, i - filterExpressionStartOffset + 1); 218 return filterSimpleExpression; 219 } 220 } 221 throw new IllegalArgumentException("Incorrect Filter String"); 222 } 223 224 /** 225 * Constructs a filter object given a simple filter expression 226 * <p> 227 * @param filterStringAsByteArray filter string given by the user 228 * @return filter object we constructed 229 */ 230 public Filter parseSimpleFilterExpression(byte[] filterStringAsByteArray) 231 throws CharacterCodingException { 232 233 String filterName = Bytes.toString(getFilterName(filterStringAsByteArray)); 234 ArrayList<byte[]> filterArguments = getFilterArguments(filterStringAsByteArray); 235 if (!filterHashMap.containsKey(filterName)) { 236 throw new IllegalArgumentException("Filter Name " + filterName + " not supported"); 237 } 238 filterName = filterHashMap.get(filterName); 239 final String methodName = "createFilterFromArguments"; 240 try { 241 Class<?> c = Class.forName(filterName); 242 Class<?>[] argTypes = new Class[] { ArrayList.class }; 243 Method m = c.getDeclaredMethod(methodName, argTypes); 244 return (Filter) m.invoke(null, filterArguments); 245 } catch (ClassNotFoundException e) { 246 LOG.error("Could not find class {}", filterName, e); 247 } catch (NoSuchMethodException e) { 248 LOG.error("Could not find method {} in {}", methodName, filterName, e); 249 } catch (IllegalAccessException e) { 250 LOG.error("Unable to access specified class {}", filterName, e); 251 } catch (InvocationTargetException e) { 252 LOG.error("Method {} threw an exception for {}", methodName, filterName, e); 253 } 254 throw new IllegalArgumentException( 255 "Incorrect filter string " + new String(filterStringAsByteArray, StandardCharsets.UTF_8)); 256 } 257 258 /** 259 * Returns the filter name given a simple filter expression 260 * <p> 261 * @param filterStringAsByteArray a simple filter expression 262 * @return name of filter in the simple filter expression 263 */ 264 public static byte[] getFilterName(byte[] filterStringAsByteArray) { 265 int filterNameStartIndex = 0; 266 int filterNameEndIndex = 0; 267 268 for (int i = filterNameStartIndex; i < filterStringAsByteArray.length; i++) { 269 if ( 270 filterStringAsByteArray[i] == ParseConstants.LPAREN 271 || filterStringAsByteArray[i] == ParseConstants.WHITESPACE 272 ) { 273 filterNameEndIndex = i; 274 break; 275 } 276 } 277 278 if (filterNameEndIndex == 0) { 279 throw new IllegalArgumentException("Incorrect Filter Name"); 280 } 281 282 byte[] filterName = new byte[filterNameEndIndex - filterNameStartIndex]; 283 Bytes.putBytes(filterName, 0, filterStringAsByteArray, 0, 284 filterNameEndIndex - filterNameStartIndex); 285 return filterName; 286 } 287 288 /** 289 * Returns the arguments of the filter from the filter string 290 * <p> 291 * @param filterStringAsByteArray filter string given by the user 292 * @return an ArrayList containing the arguments of the filter in the filter string 293 */ 294 public static ArrayList<byte[]> getFilterArguments(byte[] filterStringAsByteArray) { 295 int argumentListStartIndex = Bytes.searchDelimiterIndex(filterStringAsByteArray, 0, 296 filterStringAsByteArray.length, ParseConstants.LPAREN); 297 if (argumentListStartIndex == -1) { 298 throw new IllegalArgumentException("Incorrect argument list"); 299 } 300 301 int argumentStartIndex = 0; 302 int argumentEndIndex = 0; 303 ArrayList<byte[]> filterArguments = new ArrayList<>(); 304 305 for (int i = argumentListStartIndex + 1; i < filterStringAsByteArray.length; i++) { 306 307 if ( 308 filterStringAsByteArray[i] == ParseConstants.WHITESPACE 309 || filterStringAsByteArray[i] == ParseConstants.COMMA 310 || filterStringAsByteArray[i] == ParseConstants.RPAREN 311 ) { 312 continue; 313 } 314 315 // The argument is in single quotes - for example 'prefix' 316 if (filterStringAsByteArray[i] == ParseConstants.SINGLE_QUOTE) { 317 argumentStartIndex = i; 318 for (int j = argumentStartIndex + 1; j < filterStringAsByteArray.length; j++) { 319 if (filterStringAsByteArray[j] == ParseConstants.SINGLE_QUOTE) { 320 if (isQuoteUnescaped(filterStringAsByteArray, j)) { 321 argumentEndIndex = j; 322 i = j + 1; 323 byte[] filterArgument = createUnescapdArgument(filterStringAsByteArray, 324 argumentStartIndex, argumentEndIndex); 325 filterArguments.add(filterArgument); 326 break; 327 } else { 328 // To jump over the second escaped quote 329 j++; 330 } 331 } else if (j == filterStringAsByteArray.length - 1) { 332 throw new IllegalArgumentException("Incorrect argument list"); 333 } 334 } 335 } else { 336 // The argument is an integer, boolean, comparison operator like <, >, != etc 337 argumentStartIndex = i; 338 for (int j = argumentStartIndex; j < filterStringAsByteArray.length; j++) { 339 if ( 340 filterStringAsByteArray[j] == ParseConstants.WHITESPACE 341 || filterStringAsByteArray[j] == ParseConstants.COMMA 342 || filterStringAsByteArray[j] == ParseConstants.RPAREN 343 ) { 344 argumentEndIndex = j - 1; 345 i = j; 346 byte[] filterArgument = new byte[argumentEndIndex - argumentStartIndex + 1]; 347 Bytes.putBytes(filterArgument, 0, filterStringAsByteArray, argumentStartIndex, 348 argumentEndIndex - argumentStartIndex + 1); 349 filterArguments.add(filterArgument); 350 break; 351 } else if (j == filterStringAsByteArray.length - 1) { 352 throw new IllegalArgumentException("Incorrect argument list"); 353 } 354 } 355 } 356 } 357 return filterArguments; 358 } 359 360 /** 361 * This function is called while parsing the filterString and an operator is parsed 362 * <p> 363 * @param operatorStack the stack containing the operators and parenthesis 364 * @param filterStack the stack containing the filters 365 * @param operator the operator found while parsing the filterString 366 */ 367 public void reduce(Stack<ByteBuffer> operatorStack, Stack<Filter> filterStack, 368 ByteBuffer operator) { 369 while ( 370 !operatorStack.empty() && !ParseConstants.LPAREN_BUFFER.equals(operatorStack.peek()) 371 && hasHigherPriority(operatorStack.peek(), operator) 372 ) { 373 filterStack.push(popArguments(operatorStack, filterStack)); 374 } 375 } 376 377 /** 378 * Pops an argument from the operator stack and the number of arguments required by the operator 379 * from the filterStack and evaluates them 380 * <p> 381 * @param operatorStack the stack containing the operators 382 * @param filterStack the stack containing the filters 383 * @return the evaluated filter 384 */ 385 public static Filter popArguments(Stack<ByteBuffer> operatorStack, Stack<Filter> filterStack) { 386 ByteBuffer argumentOnTopOfStack = operatorStack.peek(); 387 388 if (argumentOnTopOfStack.equals(ParseConstants.OR_BUFFER)) { 389 // The top of the stack is an OR 390 try { 391 ArrayList<Filter> listOfFilters = new ArrayList<>(); 392 while (!operatorStack.empty() && operatorStack.peek().equals(ParseConstants.OR_BUFFER)) { 393 Filter filter = filterStack.pop(); 394 listOfFilters.add(0, filter); 395 operatorStack.pop(); 396 } 397 Filter filter = filterStack.pop(); 398 listOfFilters.add(0, filter); 399 Filter orFilter = new FilterList(FilterList.Operator.MUST_PASS_ONE, listOfFilters); 400 return orFilter; 401 } catch (EmptyStackException e) { 402 throw new IllegalArgumentException("Incorrect input string - an OR needs two filters"); 403 } 404 405 } else if (argumentOnTopOfStack.equals(ParseConstants.AND_BUFFER)) { 406 // The top of the stack is an AND 407 try { 408 ArrayList<Filter> listOfFilters = new ArrayList<>(); 409 while (!operatorStack.empty() && operatorStack.peek().equals(ParseConstants.AND_BUFFER)) { 410 Filter filter = filterStack.pop(); 411 listOfFilters.add(0, filter); 412 operatorStack.pop(); 413 } 414 Filter filter = filterStack.pop(); 415 listOfFilters.add(0, filter); 416 Filter andFilter = new FilterList(FilterList.Operator.MUST_PASS_ALL, listOfFilters); 417 return andFilter; 418 } catch (EmptyStackException e) { 419 throw new IllegalArgumentException("Incorrect input string - an AND needs two filters"); 420 } 421 422 } else if (argumentOnTopOfStack.equals(ParseConstants.SKIP_BUFFER)) { 423 // The top of the stack is a SKIP 424 try { 425 Filter wrappedFilter = filterStack.pop(); 426 Filter skipFilter = new SkipFilter(wrappedFilter); 427 operatorStack.pop(); 428 return skipFilter; 429 } catch (EmptyStackException e) { 430 throw new IllegalArgumentException("Incorrect input string - a SKIP wraps a filter"); 431 } 432 433 } else if (argumentOnTopOfStack.equals(ParseConstants.WHILE_BUFFER)) { 434 // The top of the stack is a WHILE 435 try { 436 Filter wrappedFilter = filterStack.pop(); 437 Filter whileMatchFilter = new WhileMatchFilter(wrappedFilter); 438 operatorStack.pop(); 439 return whileMatchFilter; 440 } catch (EmptyStackException e) { 441 throw new IllegalArgumentException("Incorrect input string - a WHILE wraps a filter"); 442 } 443 444 } else if (argumentOnTopOfStack.equals(ParseConstants.LPAREN_BUFFER)) { 445 // The top of the stack is a LPAREN 446 try { 447 Filter filter = filterStack.pop(); 448 operatorStack.pop(); 449 return filter; 450 } catch (EmptyStackException e) { 451 throw new IllegalArgumentException("Incorrect Filter String"); 452 } 453 454 } else { 455 throw new IllegalArgumentException("Incorrect arguments on operatorStack"); 456 } 457 } 458 459 /** 460 * Returns which operator has higher precedence 461 * <p> 462 * If a has higher precedence than b, it returns true If they have the same precedence, it returns 463 * false 464 */ 465 public boolean hasHigherPriority(ByteBuffer a, ByteBuffer b) { 466 if ((operatorPrecedenceHashMap.get(a) - operatorPrecedenceHashMap.get(b)) < 0) { 467 return true; 468 } 469 return false; 470 } 471 472 /** 473 * Removes the single quote escaping a single quote - thus it returns an unescaped argument 474 * <p> 475 * @param filterStringAsByteArray filter string given by user 476 * @param argumentStartIndex start index of the argument 477 * @param argumentEndIndex end index of the argument 478 * @return returns an unescaped argument 479 */ 480 public static byte[] createUnescapdArgument(byte[] filterStringAsByteArray, 481 int argumentStartIndex, int argumentEndIndex) { 482 int unescapedArgumentLength = 2; 483 for (int i = argumentStartIndex + 1; i <= argumentEndIndex - 1; i++) { 484 unescapedArgumentLength++; 485 if ( 486 filterStringAsByteArray[i] == ParseConstants.SINGLE_QUOTE && i != (argumentEndIndex - 1) 487 && filterStringAsByteArray[i + 1] == ParseConstants.SINGLE_QUOTE 488 ) { 489 i++; 490 continue; 491 } 492 } 493 494 byte[] unescapedArgument = new byte[unescapedArgumentLength]; 495 int count = 1; 496 unescapedArgument[0] = '\''; 497 for (int i = argumentStartIndex + 1; i <= argumentEndIndex - 1; i++) { 498 if ( 499 filterStringAsByteArray[i] == ParseConstants.SINGLE_QUOTE && i != (argumentEndIndex - 1) 500 && filterStringAsByteArray[i + 1] == ParseConstants.SINGLE_QUOTE 501 ) { 502 unescapedArgument[count++] = filterStringAsByteArray[i + 1]; 503 i++; 504 } else { 505 unescapedArgument[count++] = filterStringAsByteArray[i]; 506 } 507 } 508 unescapedArgument[unescapedArgumentLength - 1] = '\''; 509 return unescapedArgument; 510 } 511 512 /** 513 * Checks if the current index of filter string we are on is the beginning of the keyword 'OR' 514 * <p> 515 * @param filterStringAsByteArray filter string given by the user 516 * @param indexOfOr index at which an 'O' was read 517 * @return true if the keyword 'OR' is at the current index 518 */ 519 public static boolean checkForOr(byte[] filterStringAsByteArray, int indexOfOr) 520 throws CharacterCodingException, ArrayIndexOutOfBoundsException { 521 522 try { 523 if ( 524 filterStringAsByteArray[indexOfOr] == ParseConstants.O 525 && filterStringAsByteArray[indexOfOr + 1] == ParseConstants.R 526 && (filterStringAsByteArray[indexOfOr - 1] == ParseConstants.WHITESPACE 527 || filterStringAsByteArray[indexOfOr - 1] == ParseConstants.RPAREN) 528 && (filterStringAsByteArray[indexOfOr + 2] == ParseConstants.WHITESPACE 529 || filterStringAsByteArray[indexOfOr + 2] == ParseConstants.LPAREN) 530 ) { 531 return true; 532 } else { 533 return false; 534 } 535 } catch (ArrayIndexOutOfBoundsException e) { 536 return false; 537 } 538 } 539 540 /** 541 * Checks if the current index of filter string we are on is the beginning of the keyword 'AND' 542 * <p> 543 * @param filterStringAsByteArray filter string given by the user 544 * @param indexOfAnd index at which an 'A' was read 545 * @return true if the keyword 'AND' is at the current index 546 */ 547 public static boolean checkForAnd(byte[] filterStringAsByteArray, int indexOfAnd) 548 throws CharacterCodingException { 549 550 try { 551 if ( 552 filterStringAsByteArray[indexOfAnd] == ParseConstants.A 553 && filterStringAsByteArray[indexOfAnd + 1] == ParseConstants.N 554 && filterStringAsByteArray[indexOfAnd + 2] == ParseConstants.D 555 && (filterStringAsByteArray[indexOfAnd - 1] == ParseConstants.WHITESPACE 556 || filterStringAsByteArray[indexOfAnd - 1] == ParseConstants.RPAREN) 557 && (filterStringAsByteArray[indexOfAnd + 3] == ParseConstants.WHITESPACE 558 || filterStringAsByteArray[indexOfAnd + 3] == ParseConstants.LPAREN) 559 ) { 560 return true; 561 } else { 562 return false; 563 } 564 } catch (ArrayIndexOutOfBoundsException e) { 565 return false; 566 } 567 } 568 569 /** 570 * Checks if the current index of filter string we are on is the beginning of the keyword 'SKIP' 571 * <p> 572 * @param filterStringAsByteArray filter string given by the user 573 * @param indexOfSkip index at which an 'S' was read 574 * @return true if the keyword 'SKIP' is at the current index 575 */ 576 public static boolean checkForSkip(byte[] filterStringAsByteArray, int indexOfSkip) 577 throws CharacterCodingException { 578 579 try { 580 if ( 581 filterStringAsByteArray[indexOfSkip] == ParseConstants.S 582 && filterStringAsByteArray[indexOfSkip + 1] == ParseConstants.K 583 && filterStringAsByteArray[indexOfSkip + 2] == ParseConstants.I 584 && filterStringAsByteArray[indexOfSkip + 3] == ParseConstants.P 585 && (indexOfSkip == 0 586 || filterStringAsByteArray[indexOfSkip - 1] == ParseConstants.WHITESPACE 587 || filterStringAsByteArray[indexOfSkip - 1] == ParseConstants.RPAREN 588 || filterStringAsByteArray[indexOfSkip - 1] == ParseConstants.LPAREN) 589 && (filterStringAsByteArray[indexOfSkip + 4] == ParseConstants.WHITESPACE 590 || filterStringAsByteArray[indexOfSkip + 4] == ParseConstants.LPAREN) 591 ) { 592 return true; 593 } else { 594 return false; 595 } 596 } catch (ArrayIndexOutOfBoundsException e) { 597 return false; 598 } 599 } 600 601 /** 602 * Checks if the current index of filter string we are on is the beginning of the keyword 'WHILE' 603 * <p> 604 * @param filterStringAsByteArray filter string given by the user 605 * @param indexOfWhile index at which an 'W' was read 606 * @return true if the keyword 'WHILE' is at the current index 607 */ 608 public static boolean checkForWhile(byte[] filterStringAsByteArray, int indexOfWhile) 609 throws CharacterCodingException { 610 611 try { 612 if ( 613 filterStringAsByteArray[indexOfWhile] == ParseConstants.W 614 && filterStringAsByteArray[indexOfWhile + 1] == ParseConstants.H 615 && filterStringAsByteArray[indexOfWhile + 2] == ParseConstants.I 616 && filterStringAsByteArray[indexOfWhile + 3] == ParseConstants.L 617 && filterStringAsByteArray[indexOfWhile + 4] == ParseConstants.E 618 && (indexOfWhile == 0 619 || filterStringAsByteArray[indexOfWhile - 1] == ParseConstants.WHITESPACE 620 || filterStringAsByteArray[indexOfWhile - 1] == ParseConstants.RPAREN 621 || filterStringAsByteArray[indexOfWhile - 1] == ParseConstants.LPAREN) 622 && (filterStringAsByteArray[indexOfWhile + 5] == ParseConstants.WHITESPACE 623 || filterStringAsByteArray[indexOfWhile + 5] == ParseConstants.LPAREN) 624 ) { 625 return true; 626 } else { 627 return false; 628 } 629 } catch (ArrayIndexOutOfBoundsException e) { 630 return false; 631 } 632 } 633 634 /** 635 * Returns a boolean indicating whether the quote was escaped or not 636 * <p> 637 * @param array byte array in which the quote was found 638 * @param quoteIndex index of the single quote 639 * @return returns true if the quote was unescaped 640 */ 641 public static boolean isQuoteUnescaped(byte[] array, int quoteIndex) { 642 if (array == null) { 643 throw new IllegalArgumentException("isQuoteUnescaped called with a null array"); 644 } 645 646 if (quoteIndex == array.length - 1 || array[quoteIndex + 1] != ParseConstants.SINGLE_QUOTE) { 647 return true; 648 } else { 649 return false; 650 } 651 } 652 653 /** 654 * Takes a quoted byte array and converts it into an unquoted byte array For example: given a byte 655 * array representing 'abc', it returns a byte array representing abc 656 * <p> 657 * @param quotedByteArray the quoted byte array 658 * @return Unquoted byte array 659 */ 660 public static byte[] removeQuotesFromByteArray(byte[] quotedByteArray) { 661 if ( 662 quotedByteArray == null || quotedByteArray.length < 2 663 || quotedByteArray[0] != ParseConstants.SINGLE_QUOTE 664 || quotedByteArray[quotedByteArray.length - 1] != ParseConstants.SINGLE_QUOTE 665 ) { 666 throw new IllegalArgumentException("removeQuotesFromByteArray needs a quoted byte array"); 667 } else { 668 byte[] targetString = new byte[quotedByteArray.length - 2]; 669 Bytes.putBytes(targetString, 0, quotedByteArray, 1, quotedByteArray.length - 2); 670 return targetString; 671 } 672 } 673 674 /** 675 * Converts an int expressed in a byte array to an actual int 676 * <p> 677 * This doesn't use Bytes.toInt because that assumes that there will be {@link Bytes#SIZEOF_INT} 678 * bytes available. 679 * <p> 680 * @param numberAsByteArray the int value expressed as a byte array 681 * @return the int value 682 */ 683 public static int convertByteArrayToInt(byte[] numberAsByteArray) { 684 685 long tempResult = ParseFilter.convertByteArrayToLong(numberAsByteArray); 686 687 if (tempResult > Integer.MAX_VALUE) { 688 throw new IllegalArgumentException("Integer Argument too large"); 689 } else if (tempResult < Integer.MIN_VALUE) { 690 throw new IllegalArgumentException("Integer Argument too small"); 691 } 692 693 int result = (int) tempResult; 694 return result; 695 } 696 697 /** 698 * Converts a long expressed in a byte array to an actual long 699 * <p> 700 * This doesn't use Bytes.toLong because that assumes that there will be {@link Bytes#SIZEOF_INT} 701 * bytes available. 702 * <p> 703 * @param numberAsByteArray the long value expressed as a byte array 704 * @return the long value 705 */ 706 public static long convertByteArrayToLong(byte[] numberAsByteArray) { 707 if (numberAsByteArray == null) { 708 throw new IllegalArgumentException("convertByteArrayToLong called with a null array"); 709 } 710 711 int i = 0; 712 long result = 0; 713 boolean isNegative = false; 714 715 if (numberAsByteArray[i] == ParseConstants.MINUS_SIGN) { 716 i++; 717 isNegative = true; 718 } 719 720 while (i != numberAsByteArray.length) { 721 if ( 722 numberAsByteArray[i] < ParseConstants.ZERO || numberAsByteArray[i] > ParseConstants.NINE 723 ) { 724 throw new IllegalArgumentException("Byte Array should only contain digits"); 725 } 726 result = result * 10 + (numberAsByteArray[i] - ParseConstants.ZERO); 727 if (result < 0) { 728 throw new IllegalArgumentException("Long Argument too large"); 729 } 730 i++; 731 } 732 733 if (isNegative) { 734 return -result; 735 } else { 736 return result; 737 } 738 } 739 740 /** 741 * Converts a boolean expressed in a byte array to an actual boolean 742 * <p> 743 * This doesn't used Bytes.toBoolean because Bytes.toBoolean(byte []) assumes that 1 stands for 744 * true and 0 for false. Here, the byte array representing "true" and "false" is parsed 745 * <p> 746 * @param booleanAsByteArray the boolean value expressed as a byte array 747 * @return the boolean value 748 */ 749 public static boolean convertByteArrayToBoolean(byte[] booleanAsByteArray) { 750 if (booleanAsByteArray == null) { 751 throw new IllegalArgumentException("convertByteArrayToBoolean called with a null array"); 752 } 753 754 if ( 755 booleanAsByteArray.length == 4 756 && (booleanAsByteArray[0] == 't' || booleanAsByteArray[0] == 'T') 757 && (booleanAsByteArray[1] == 'r' || booleanAsByteArray[1] == 'R') 758 && (booleanAsByteArray[2] == 'u' || booleanAsByteArray[2] == 'U') 759 && (booleanAsByteArray[3] == 'e' || booleanAsByteArray[3] == 'E') 760 ) { 761 return true; 762 } else if ( 763 booleanAsByteArray.length == 5 764 && (booleanAsByteArray[0] == 'f' || booleanAsByteArray[0] == 'F') 765 && (booleanAsByteArray[1] == 'a' || booleanAsByteArray[1] == 'A') 766 && (booleanAsByteArray[2] == 'l' || booleanAsByteArray[2] == 'L') 767 && (booleanAsByteArray[3] == 's' || booleanAsByteArray[3] == 'S') 768 && (booleanAsByteArray[4] == 'e' || booleanAsByteArray[4] == 'E') 769 ) { 770 return false; 771 } else { 772 throw new IllegalArgumentException("Incorrect Boolean Expression"); 773 } 774 } 775 776 /** 777 * Takes a compareOperator symbol as a byte array and returns the corresponding CompareOperator 778 * @param compareOpAsByteArray the comparatorOperator symbol as a byte array 779 * @return the Compare Operator 780 */ 781 public static CompareOperator createCompareOperator(byte[] compareOpAsByteArray) { 782 ByteBuffer compareOp = ByteBuffer.wrap(compareOpAsByteArray); 783 if (compareOp.equals(ParseConstants.LESS_THAN_BUFFER)) return CompareOperator.LESS; 784 else if (compareOp.equals(ParseConstants.LESS_THAN_OR_EQUAL_TO_BUFFER)) 785 return CompareOperator.LESS_OR_EQUAL; 786 else if (compareOp.equals(ParseConstants.GREATER_THAN_BUFFER)) return CompareOperator.GREATER; 787 else if (compareOp.equals(ParseConstants.GREATER_THAN_OR_EQUAL_TO_BUFFER)) 788 return CompareOperator.GREATER_OR_EQUAL; 789 else if (compareOp.equals(ParseConstants.NOT_EQUAL_TO_BUFFER)) return CompareOperator.NOT_EQUAL; 790 else if (compareOp.equals(ParseConstants.EQUAL_TO_BUFFER)) return CompareOperator.EQUAL; 791 else throw new IllegalArgumentException("Invalid compare operator"); 792 } 793 794 /** 795 * Parses a comparator of the form comparatorType:comparatorValue form and returns a comparator 796 * <p> 797 * @param comparator the comparator in the form comparatorType:comparatorValue 798 * @return the parsed comparator 799 */ 800 public static ByteArrayComparable createComparator(byte[] comparator) { 801 if (comparator == null) throw new IllegalArgumentException("Incorrect Comparator"); 802 byte[][] parsedComparator = ParseFilter.parseComparator(comparator); 803 byte[] comparatorType = parsedComparator[0]; 804 byte[] comparatorValue = parsedComparator[1]; 805 806 if (Bytes.equals(comparatorType, ParseConstants.binaryType)) 807 return new BinaryComparator(comparatorValue); 808 else if (Bytes.equals(comparatorType, ParseConstants.binaryPrefixType)) 809 return new BinaryPrefixComparator(comparatorValue); 810 else if (Bytes.equals(comparatorType, ParseConstants.regexStringType)) 811 return new RegexStringComparator(new String(comparatorValue, StandardCharsets.UTF_8)); 812 else if (Bytes.equals(comparatorType, ParseConstants.regexStringNoCaseType)) 813 return new RegexStringComparator(new String(comparatorValue, StandardCharsets.UTF_8), 814 Pattern.CASE_INSENSITIVE | Pattern.DOTALL); 815 else if (Bytes.equals(comparatorType, ParseConstants.substringType)) 816 return new SubstringComparator(new String(comparatorValue, StandardCharsets.UTF_8)); 817 else throw new IllegalArgumentException("Incorrect comparatorType"); 818 } 819 820 /** 821 * Splits a column in comparatorType:comparatorValue form into separate byte arrays 822 * <p> 823 * @param comparator the comparator 824 * @return the parsed arguments of the comparator as a 2D byte array 825 */ 826 public static byte[][] parseComparator(byte[] comparator) { 827 final int index = 828 Bytes.searchDelimiterIndex(comparator, 0, comparator.length, ParseConstants.COLON); 829 if (index == -1) { 830 throw new IllegalArgumentException("Incorrect comparator"); 831 } 832 833 byte[][] result = new byte[2][0]; 834 result[0] = new byte[index]; 835 System.arraycopy(comparator, 0, result[0], 0, index); 836 837 final int len = comparator.length - (index + 1); 838 result[1] = new byte[len]; 839 System.arraycopy(comparator, index + 1, result[1], 0, len); 840 841 return result; 842 } 843 844 /** 845 * Return a Set of filters supported by the Filter Language 846 */ 847 public Set<String> getSupportedFilters() { 848 return filterHashMap.keySet(); 849 } 850 851 /** 852 * Returns all known filters 853 * @return an unmodifiable map of filters 854 */ 855 public static Map<String, String> getAllFilters() { 856 return Collections.unmodifiableMap(filterHashMap); 857 } 858 859 /** 860 * Register a new filter with the parser. If the filter is already registered, an 861 * IllegalArgumentException will be thrown. 862 * @param name a name for the filter 863 * @param filterClass fully qualified class name 864 */ 865 public static void registerFilter(String name, String filterClass) { 866 if (LOG.isInfoEnabled()) LOG.info("Registering new filter " + name); 867 868 filterHashMap.put(name, filterClass); 869 } 870}