001/*
002 * The contents of this file are subject to the terms of the Common Development and
003 * Distribution License (the License). You may not use this file except in compliance with the License.
004 *
005 * You can obtain a copy of the License at legal/CDDLv1.0.txt. See the License for the
006 * specific language governing permission and limitations under the License.
007 *
008 * When distributing Covered Software, include this CDDL Header Notice in each file and include
009 * the License file at legal/CDDLv1.0.txt. If applicable, add the following below the CDDL
010 * Header, with the fields enclosed by brackets [] replaced by your own identifying
011 * information: "Portions copyright [year] [name of copyright owner]".
012 *
013 * Copyright 2015 ForgeRock AS.
014 */
015
016package org.forgerock.util.query;
017
018import static org.forgerock.util.query.QueryFilterOperators.*;
019
020import java.util.Iterator;
021import java.util.LinkedList;
022import java.util.List;
023import java.util.NoSuchElementException;
024
025/**
026 * A query string has the following string representation:
027 * 
028 * <pre>
029 * Expr           = OrExpr
030 * OrExpr         = AndExpr ( 'or' AndExpr ) *
031 * AndExpr        = NotExpr ( 'and' NotExpr ) *
032 * NotExpr        = '!' PrimaryExpr | PrimaryExpr
033 * PrimaryExpr    = '(' Expr ')' | ComparisonExpr | PresenceExpr | LiteralExpr
034 * ComparisonExpr = Pointer OpName Value
035 * PresenceExpr   = Pointer 'pr'
036 * LiteralExpr    = 'true' | 'false'
037 * Pointer        = Case-sensitive field specification
038 * OpName         = 'eq' |  # equal to
039 *                  'co' |  # contains
040 *                  'sw' |  # starts with
041 *                  'lt' |  # less than
042 *                  'le' |  # less than or equal to
043 *                  'gt' |  # greater than
044 *                  'ge' |  # greater than or equal to
045 *                  STRING  # extended operator
046 * Value          = NUMBER | BOOLEAN | '"' UTF8STRING '"' | ''' UTF8STRING '''
047 * STRING         = ASCII string not containing white-space
048 * UTF8STRING     = UTF-8 string possibly containing white-space
049 * </pre>
050 *
051 * Note that white space, parentheses, and exclamation characters need URL
052 * when passed via HTTP query strings.  
053 * <p>
054 * ASCII and UTF-8 strings will treat the backslash character as an escape character.
055 * For an example, this will allow for the inclusion of quotes or single-quotes within 
056 * a string that is surrounded by the same type of quotes: "tes\"t".  The backslash 
057 * character itself will also need to be escaped if it is to be included in the string.
058 * <p>
059 * In addition to single valued properties (number, boolean, and string), query filters
060 * can be applied to multi-valued properties. When operating on properties that are an 
061 * array or list type the operation should be evaluated on each element in the array, 
062 * passing if any of the elements in the array or list pass the operation.
063 *
064 * @param <F> The type of field description used in parsed {@link QueryFilter} objects.
065 */
066public abstract class QueryFilterParser<F> {
067
068    // Maximum permitted query filter nesting depth.
069    private static final int VALUE_OF_MAX_DEPTH = 256;
070
071    /**
072     * Parses the field description from the current filter token into the type of field
073     * description the QueryFilter uses.
074     * @param fieldDescription The token from parsing the query string.
075     * @return The field description.
076     */
077    protected abstract F parseField(String fieldDescription);
078
079    /**
080     * Parses the provided string representation of a query filter as a
081     * {@code QueryFilter}.
082     *
083     * @param string
084     *            The string representation of a query filter .
085     *
086     * @return The parsed {@code QueryFilter}.
087     * @throws IllegalArgumentException
088     *             If {@code string} is not a valid string representation of a
089     *             query filter.
090     */
091    public QueryFilter<F> valueOf(final String string) {
092        // Use recursive descent of grammar described in class Javadoc.
093        final FilterTokenizer tokenizer = new FilterTokenizer(string);
094        final QueryFilter<F> filter = valueOfOrExpr(tokenizer, 0);
095        if (tokenizer.hasNext()) {
096            return valueOfIllegalArgument(tokenizer);
097        } else {
098            return filter;
099        }
100    }
101
102    private void checkDepth(final FilterTokenizer tokenizer, final int depth) {
103        if (depth > VALUE_OF_MAX_DEPTH) {
104            throw new IllegalArgumentException("The query filter '" + tokenizer
105                    + "' cannot be parsed because it contains more than " + VALUE_OF_MAX_DEPTH
106                    + " nexted expressions");
107        }
108    }
109
110    private QueryFilter<F> valueOfAndExpr(final FilterTokenizer tokenizer, final int depth) {
111        checkDepth(tokenizer, depth);
112        QueryFilter<F> filter = valueOfNotExpr(tokenizer, depth + 1);
113        List<QueryFilter<F>> subFilters = null;
114        while (tokenizer.hasNext() && tokenizer.peek().equalsIgnoreCase(AND)) {
115            tokenizer.next();
116            if (subFilters == null) {
117                subFilters = new LinkedList<>();
118                subFilters.add(filter);
119            }
120            subFilters.add(valueOfNotExpr(tokenizer, depth + 1));
121        }
122        if (subFilters != null) {
123            filter = QueryFilter.and(subFilters);
124        }
125        return filter;
126    }
127
128    private QueryFilter<F> valueOfIllegalArgument(final FilterTokenizer tokenizer) {
129        throw new IllegalArgumentException("Invalid query filter '" + tokenizer + "'");
130    }
131
132    private QueryFilter<F> valueOfNotExpr(final FilterTokenizer tokenizer, final int depth) {
133        checkDepth(tokenizer, depth);
134        if (tokenizer.hasNext() && tokenizer.peek().equalsIgnoreCase(NOT)) {
135            tokenizer.next();
136            final QueryFilter<F> rhs = valueOfPrimaryExpr(tokenizer, depth + 1);
137            return QueryFilter.not(rhs);
138        } else {
139            return valueOfPrimaryExpr(tokenizer, depth + 1);
140        }
141    }
142
143    private QueryFilter<F> valueOfOrExpr(final FilterTokenizer tokenizer, final int depth) {
144        checkDepth(tokenizer, depth);
145        QueryFilter<F> filter = valueOfAndExpr(tokenizer, depth + 1);
146        List<QueryFilter<F>> subFilters = null;
147        while (tokenizer.hasNext() && tokenizer.peek().equalsIgnoreCase(OR)) {
148            tokenizer.next();
149            if (subFilters == null) {
150                subFilters = new LinkedList<>();
151                subFilters.add(filter);
152            }
153            subFilters.add(valueOfAndExpr(tokenizer, depth + 1));
154        }
155        if (subFilters != null) {
156            filter = QueryFilter.or(subFilters);
157        }
158        return filter;
159    }
160
161    private QueryFilter<F> valueOfPrimaryExpr(final FilterTokenizer tokenizer, final int depth) {
162        checkDepth(tokenizer, depth);
163        if (!tokenizer.hasNext()) {
164            return valueOfIllegalArgument(tokenizer);
165        }
166        String nextToken = tokenizer.next();
167        if (nextToken.equals("(")) {
168            // Nested expression.
169            final QueryFilter<F> filter = valueOfOrExpr(tokenizer, depth + 1);
170            if (!tokenizer.hasNext() || !tokenizer.next().equals(")")) {
171                return valueOfIllegalArgument(tokenizer);
172            }
173            return filter;
174        } else if (nextToken.equalsIgnoreCase(TRUE)) {
175            return QueryFilter.alwaysTrue();
176        } else if (nextToken.equalsIgnoreCase(FALSE)) {
177            return QueryFilter.alwaysFalse();
178        } else if (nextToken.equals("\"")) {
179            return valueOfIllegalArgument(tokenizer);
180        } else {
181            // Assertion.
182            final F pointer = parseField(nextToken);
183            if (!tokenizer.hasNext()) {
184                return valueOfIllegalArgument(tokenizer);
185            }
186            final String operator = tokenizer.next();
187            if (operator.equalsIgnoreCase(PRESENT)) {
188                return QueryFilter.present(pointer);
189            } else {
190                // Read assertion value: NUMBER | BOOLEAN | '"' UTF8STRING '"'
191                if (!tokenizer.hasNext()) {
192                    return valueOfIllegalArgument(tokenizer);
193                }
194                final Object assertionValue;
195                nextToken = tokenizer.next();
196                if (nextToken.equals("\"")) {
197                    // UTF8STRING delimited by quotes
198                    if (!tokenizer.hasNext()) {
199                        return valueOfIllegalArgument(tokenizer);
200                    }
201                    assertionValue = tokenizer.next();
202                    if (!tokenizer.hasNext() || !tokenizer.next().equals("\"")) {
203                        return valueOfIllegalArgument(tokenizer);
204                    }
205                } else if (nextToken.equals("'")) {
206                    // UTF8STRING delimited by single quotes
207                    if (!tokenizer.hasNext()) {
208                        return valueOfIllegalArgument(tokenizer);
209                    }
210                    assertionValue = tokenizer.next();
211                    if (!tokenizer.hasNext() || !tokenizer.next().equals("'")) {
212                        return valueOfIllegalArgument(tokenizer);
213                    }
214                } else if (nextToken.equalsIgnoreCase(TRUE)
215                        || nextToken.equalsIgnoreCase(FALSE)) {
216                    assertionValue = Boolean.parseBoolean(nextToken);
217                } else if (nextToken.indexOf('.') >= 0) {
218                    // Floating point number.
219                    assertionValue = Double.parseDouble(nextToken);
220                } else {
221                    // Must be an integer.
222                    assertionValue = Long.parseLong(nextToken);
223                }
224                try {
225                    return comparisonFilter(pointer, operator, assertionValue);
226                } catch (final IllegalArgumentException e) {
227                    return valueOfIllegalArgument(tokenizer);
228                }
229            }
230        }
231    }
232
233    /**
234     * Creates a new generic comparison filter using the provided field name,
235     * operator, and value assertion. When the provided operator name represents
236     * a core operator, e.g. "eq", then this method is equivalent to calling the
237     * equivalent constructor, e.g. {@link QueryFilter#equalTo(Object, Object)}.
238     * Otherwise, when the operator name does not correspond to a core operator,
239     * an extended comparison filter will be returned.
240     *
241     * @param field
242     *            The name of field to be compared.
243     * @param operator
244     *            The operator to use for the comparison, which must be one of
245     *            the core operator names, or a string matching the regular
246     *            expression {@code [a-zA-Z_0-9.]+}.
247     * @param valueAssertion
248     *            The assertion value.
249     * @return The newly created generic comparison filter.
250     * @throws IllegalArgumentException
251     *             If {@code operator} is not a valid operator name.
252     */
253    private QueryFilter<F> comparisonFilter(final F field, final String operator, final Object valueAssertion) {
254        if (operator.equalsIgnoreCase(EQUALS)) {
255            return QueryFilter.equalTo(field, valueAssertion);
256        } else if (operator.equalsIgnoreCase(GREATER_THAN)) {
257            return QueryFilter.greaterThan(field, valueAssertion);
258        } else if (operator.equalsIgnoreCase(GREATER_EQUAL)) {
259            return QueryFilter.greaterThanOrEqualTo(field, valueAssertion);
260        } else if (operator.equalsIgnoreCase(LESS_THAN)) {
261            return QueryFilter.lessThan(field, valueAssertion);
262        } else if (operator.equalsIgnoreCase(LESS_EQUAL)) {
263            return QueryFilter.lessThanOrEqualTo(field, valueAssertion);
264        } else if (operator.equalsIgnoreCase(CONTAINS)) {
265            return QueryFilter.contains(field, valueAssertion);
266        } else if (operator.equalsIgnoreCase(STARTS_WITH)) {
267            return QueryFilter.startsWith(field, valueAssertion);
268        } else if (operator.matches("[a-zA-Z_0-9.]+")) {
269            return QueryFilter.extendedMatch(field, operator, valueAssertion);
270        } else {
271            throw new IllegalArgumentException("\"" + operator + "\" is not a valid filter operator");
272        }
273    }
274
275    private static final class FilterTokenizer implements Iterator<String> {
276        private static final int NEED_END_STRING = 2;
277        private static final int NEED_START_STRING = 1;
278        private static final int NEED_TOKEN = 0;
279
280        private String filterString;
281        private String nextToken;
282        private int pos;
283        private int state;
284        private char stringDelimiter;
285
286        private FilterTokenizer(final String filterString) {
287            this.filterString = filterString;
288            this.pos = 0;
289            this.state = NEED_TOKEN;
290            readNextToken();
291        }
292
293        @Override
294        public boolean hasNext() {
295            return nextToken != null;
296        }
297
298        @Override
299        public String next() {
300            final String next = peek();
301            readNextToken();
302            return next;
303        }
304
305        @Override
306        public void remove() {
307            throw new UnsupportedOperationException();
308        }
309
310        @Override
311        public String toString() {
312            return filterString;
313        }
314
315        private String peek() {
316            if (nextToken == null) {
317                throw new NoSuchElementException();
318            }
319            return nextToken;
320        }
321
322        private void readNextToken() {
323            switch (state) {
324                case NEED_START_STRING:
325                    final int stringStart = pos;
326                    for (; pos < filterString.length() && filterString.charAt(pos) != stringDelimiter; pos++) {
327                        if (filterString.charAt(pos) == '\\') {
328                                if ((pos + 1) == filterString.length()) {
329                                        throw new IllegalArgumentException("The filter string cannot end with an escape character");
330                                }
331                                // Found an escaped character, so remove the '\')
332                                filterString = new StringBuilder(filterString).deleteCharAt(pos).toString();
333                        }
334                        // Do nothing
335                    }
336                    nextToken = filterString.substring(stringStart, pos);
337                    state = NEED_END_STRING;
338                    break;
339                case NEED_END_STRING:
340                    // NEED_START_STRING guarantees that we are either at the end of the string
341                    // or the next character is a quote.
342                    if (pos < filterString.length()) {
343                        nextToken = filterString.substring(pos, ++pos);
344                    } else {
345                        nextToken = null;
346                    }
347                    state = NEED_TOKEN;
348                    break;
349                default: // NEED_TOKEN:
350                    if (!skipWhiteSpace()) {
351                        nextToken = null;
352                    } else {
353                        final int tokenStart = pos;
354                        switch (filterString.charAt(pos++)) {
355                            case '(':
356                            case ')':
357                                break;
358                            case '"':
359                                state = NEED_START_STRING;
360                                stringDelimiter = '"';
361                                break;
362                            case '\'':
363                                state = NEED_START_STRING;
364                                stringDelimiter = '\'';
365                                break;
366                            default:
367                                for (; pos < filterString.length(); pos++) {
368                                    final char c = filterString.charAt(pos);
369                                    if (c == '(' || c == ')' || c == ' ') {
370                                        break;
371                                    }
372                                }
373                                break;
374                        }
375                        nextToken = filterString.substring(tokenStart, pos);
376                    }
377            }
378        }
379
380        private boolean skipWhiteSpace() {
381            for (; pos < filterString.length() && filterString.charAt(pos) == ' '; pos++) {
382                // Do nothing
383            }
384            return pos < filterString.length();
385        }
386    }
387
388}