001/*
002 * The contents of this file are subject to the terms of the Common Development and
003 * Distribution License (the License). You may not use this file except in compliance with the
004 * License.
005 *
006 * You can obtain a copy of the License at legal/CDDLv1.0.txt. See the License for the
007 * specific language governing permission and limitations under the License.
008 *
009 * When distributing Covered Software, include this CDDL Header Notice in each file and include
010 * the License file at legal/CDDLv1.0.txt. If applicable, add the following below the CDDL
011 * Header, with the fields enclosed by brackets [] replaced by your own identifying
012 * information: "Portions copyright [year] [name of copyright owner]".
013 *
014 * Copyright 2015 ForgeRock AS.
015 */
016
017package org.forgerock.util.query;
018
019import static org.forgerock.util.query.QueryFilterOperators.*;
020
021import java.util.Iterator;
022import java.util.LinkedList;
023import java.util.List;
024import java.util.NoSuchElementException;
025
026/**
027 * A query string has the following string representation:
028 *
029 * <pre>
030 * Expr           = OrExpr
031 * OrExpr         = AndExpr ( 'or' AndExpr ) *
032 * AndExpr        = NotExpr ( 'and' NotExpr ) *
033 * NotExpr        = '!' PrimaryExpr | PrimaryExpr
034 * PrimaryExpr    = '(' Expr ')' | ComparisonExpr | PresenceExpr | LiteralExpr
035 * ComparisonExpr = Pointer OpName Value
036 * PresenceExpr   = Pointer 'pr'
037 * LiteralExpr    = 'true' | 'false'
038 * Pointer        = Case-sensitive field specification
039 * OpName         = 'eq' |  # equal to
040 *                  'co' |  # contains
041 *                  'sw' |  # starts with
042 *                  'lt' |  # less than
043 *                  'le' |  # less than or equal to
044 *                  'gt' |  # greater than
045 *                  'ge' |  # greater than or equal to
046 *                  STRING  # extended operator
047 * Value          = NUMBER | BOOLEAN | '"' UTF8STRING '"' | ''' UTF8STRING '''
048 * STRING         = ASCII string not containing white-space
049 * UTF8STRING     = UTF-8 string possibly containing white-space
050 * </pre>
051 *
052 * Note that white space, parentheses, and exclamation characters need URL
053 * when passed via HTTP query strings.
054 * <p>
055 * ASCII and UTF-8 strings will treat the backslash character as an escape character.
056 * For an example, this will allow for the inclusion of quotes or single-quotes within
057 * a string that is surrounded by the same type of quotes: "tes\"t".  The backslash
058 * character itself will also need to be escaped if it is to be included in the string.
059 * <p>
060 * In addition to single valued properties (number, boolean, and string), query filters
061 * can be applied to multi-valued properties. When operating on properties that are an
062 * array or list type the operation should be evaluated on each element in the array,
063 * passing if any of the elements in the array or list pass the operation.
064 *
065 * @param <F> The type of field description used in parsed {@link QueryFilter} objects.
066 */
067public abstract class QueryFilterParser<F> {
068
069    // Maximum permitted query filter nesting depth.
070    private static final int VALUE_OF_MAX_DEPTH = 256;
071
072    /**
073     * Parses the field description from the current filter token into the type of field
074     * description the QueryFilter uses.
075     * @param fieldDescription The token from parsing the query string.
076     * @return The field description.
077     */
078    protected abstract F parseField(String fieldDescription);
079
080    /**
081     * Parses the provided string representation of a query filter as a
082     * {@code QueryFilter}.
083     *
084     * @param string
085     *            The string representation of a query filter .
086     *
087     * @return The parsed {@code QueryFilter}.
088     * @throws IllegalArgumentException
089     *             If {@code string} is not a valid string representation of a
090     *             query filter.
091     */
092    public QueryFilter<F> valueOf(final String string) {
093        // Use recursive descent of grammar described in class Javadoc.
094        final FilterTokenizer tokenizer = new FilterTokenizer(string);
095        final QueryFilter<F> filter = valueOfOrExpr(tokenizer, 0);
096        if (tokenizer.hasNext()) {
097            return valueOfIllegalArgument(tokenizer);
098        } else {
099            return filter;
100        }
101    }
102
103    private void checkDepth(final FilterTokenizer tokenizer, final int depth) {
104        if (depth > VALUE_OF_MAX_DEPTH) {
105            throw new IllegalArgumentException("The query filter '" + tokenizer
106                    + "' cannot be parsed because it contains more than " + VALUE_OF_MAX_DEPTH
107                    + " nexted expressions");
108        }
109    }
110
111    private QueryFilter<F> valueOfAndExpr(final FilterTokenizer tokenizer, final int depth) {
112        checkDepth(tokenizer, depth);
113        QueryFilter<F> filter = valueOfNotExpr(tokenizer, depth + 1);
114        List<QueryFilter<F>> subFilters = null;
115        while (tokenizer.hasNext() && tokenizer.peek().equalsIgnoreCase(AND)) {
116            tokenizer.next();
117            if (subFilters == null) {
118                subFilters = new LinkedList<>();
119                subFilters.add(filter);
120            }
121            subFilters.add(valueOfNotExpr(tokenizer, depth + 1));
122        }
123        if (subFilters != null) {
124            filter = QueryFilter.and(subFilters);
125        }
126        return filter;
127    }
128
129    private QueryFilter<F> valueOfIllegalArgument(final FilterTokenizer tokenizer) {
130        throw new IllegalArgumentException("Invalid query filter '" + tokenizer + "'");
131    }
132
133    private QueryFilter<F> valueOfNotExpr(final FilterTokenizer tokenizer, final int depth) {
134        checkDepth(tokenizer, depth);
135        if (tokenizer.hasNext() && tokenizer.peek().equalsIgnoreCase(NOT)) {
136            tokenizer.next();
137            final QueryFilter<F> rhs = valueOfPrimaryExpr(tokenizer, depth + 1);
138            return QueryFilter.not(rhs);
139        } else {
140            return valueOfPrimaryExpr(tokenizer, depth + 1);
141        }
142    }
143
144    private QueryFilter<F> valueOfOrExpr(final FilterTokenizer tokenizer, final int depth) {
145        checkDepth(tokenizer, depth);
146        QueryFilter<F> filter = valueOfAndExpr(tokenizer, depth + 1);
147        List<QueryFilter<F>> subFilters = null;
148        while (tokenizer.hasNext() && tokenizer.peek().equalsIgnoreCase(OR)) {
149            tokenizer.next();
150            if (subFilters == null) {
151                subFilters = new LinkedList<>();
152                subFilters.add(filter);
153            }
154            subFilters.add(valueOfAndExpr(tokenizer, depth + 1));
155        }
156        if (subFilters != null) {
157            filter = QueryFilter.or(subFilters);
158        }
159        return filter;
160    }
161
162    private QueryFilter<F> valueOfPrimaryExpr(final FilterTokenizer tokenizer, final int depth) {
163        checkDepth(tokenizer, depth);
164        if (!tokenizer.hasNext()) {
165            return valueOfIllegalArgument(tokenizer);
166        }
167        String nextToken = tokenizer.next();
168        if (nextToken.equals("(")) {
169            // Nested expression.
170            final QueryFilter<F> filter = valueOfOrExpr(tokenizer, depth + 1);
171            if (!tokenizer.hasNext() || !tokenizer.next().equals(")")) {
172                return valueOfIllegalArgument(tokenizer);
173            }
174            return filter;
175        } else if (nextToken.equalsIgnoreCase(TRUE)) {
176            return QueryFilter.alwaysTrue();
177        } else if (nextToken.equalsIgnoreCase(FALSE)) {
178            return QueryFilter.alwaysFalse();
179        } else if (nextToken.equals("\"")) {
180            return valueOfIllegalArgument(tokenizer);
181        } else {
182            // Assertion.
183            final F pointer = parseField(nextToken);
184            if (!tokenizer.hasNext()) {
185                return valueOfIllegalArgument(tokenizer);
186            }
187            final String operator = tokenizer.next();
188            if (operator.equalsIgnoreCase(PRESENT)) {
189                return QueryFilter.present(pointer);
190            } else {
191                // Read assertion value: NUMBER | BOOLEAN | '"' UTF8STRING '"'
192                if (!tokenizer.hasNext()) {
193                    return valueOfIllegalArgument(tokenizer);
194                }
195                final Object assertionValue;
196                nextToken = tokenizer.next();
197                if (nextToken.equals("\"")) {
198                    // UTF8STRING delimited by quotes
199                    if (!tokenizer.hasNext()) {
200                        return valueOfIllegalArgument(tokenizer);
201                    }
202                    assertionValue = tokenizer.next();
203                    if (!tokenizer.hasNext() || !tokenizer.next().equals("\"")) {
204                        return valueOfIllegalArgument(tokenizer);
205                    }
206                } else if (nextToken.equals("'")) {
207                    // UTF8STRING delimited by single quotes
208                    if (!tokenizer.hasNext()) {
209                        return valueOfIllegalArgument(tokenizer);
210                    }
211                    assertionValue = tokenizer.next();
212                    if (!tokenizer.hasNext() || !tokenizer.next().equals("'")) {
213                        return valueOfIllegalArgument(tokenizer);
214                    }
215                } else if (nextToken.equalsIgnoreCase(TRUE)
216                        || nextToken.equalsIgnoreCase(FALSE)) {
217                    assertionValue = Boolean.parseBoolean(nextToken);
218                } else if (nextToken.indexOf('.') >= 0) {
219                    // Floating point number.
220                    assertionValue = Double.parseDouble(nextToken);
221                } else {
222                    // Must be an integer.
223                    assertionValue = Long.parseLong(nextToken);
224                }
225                try {
226                    return comparisonFilter(pointer, operator, assertionValue);
227                } catch (final IllegalArgumentException e) {
228                    return valueOfIllegalArgument(tokenizer);
229                }
230            }
231        }
232    }
233
234    /**
235     * Creates a new generic comparison filter using the provided field name,
236     * operator, and value assertion. When the provided operator name represents
237     * a core operator, e.g. "eq", then this method is equivalent to calling the
238     * equivalent constructor, e.g. {@link QueryFilter#equalTo(Object, Object)}.
239     * Otherwise, when the operator name does not correspond to a core operator,
240     * an extended comparison filter will be returned.
241     *
242     * @param field
243     *            The name of field to be compared.
244     * @param operator
245     *            The operator to use for the comparison, which must be one of
246     *            the core operator names, or a string matching the regular
247     *            expression {@code [a-zA-Z_0-9.]+}.
248     * @param valueAssertion
249     *            The assertion value.
250     * @return The newly created generic comparison filter.
251     * @throws IllegalArgumentException
252     *             If {@code operator} is not a valid operator name.
253     */
254    private QueryFilter<F> comparisonFilter(final F field, final String operator, final Object valueAssertion) {
255        if (operator.equalsIgnoreCase(EQUALS)) {
256            return QueryFilter.equalTo(field, valueAssertion);
257        } else if (operator.equalsIgnoreCase(GREATER_THAN)) {
258            return QueryFilter.greaterThan(field, valueAssertion);
259        } else if (operator.equalsIgnoreCase(GREATER_EQUAL)) {
260            return QueryFilter.greaterThanOrEqualTo(field, valueAssertion);
261        } else if (operator.equalsIgnoreCase(LESS_THAN)) {
262            return QueryFilter.lessThan(field, valueAssertion);
263        } else if (operator.equalsIgnoreCase(LESS_EQUAL)) {
264            return QueryFilter.lessThanOrEqualTo(field, valueAssertion);
265        } else if (operator.equalsIgnoreCase(CONTAINS)) {
266            return QueryFilter.contains(field, valueAssertion);
267        } else if (operator.equalsIgnoreCase(STARTS_WITH)) {
268            return QueryFilter.startsWith(field, valueAssertion);
269        } else if (operator.matches("[a-zA-Z_0-9.]+")) {
270            return QueryFilter.extendedMatch(field, operator, valueAssertion);
271        } else {
272            throw new IllegalArgumentException("\"" + operator + "\" is not a valid filter operator");
273        }
274    }
275
276    private static final class FilterTokenizer implements Iterator<String> {
277        private static final int NEED_END_STRING = 2;
278        private static final int NEED_START_STRING = 1;
279        private static final int NEED_TOKEN = 0;
280
281        private String filterString;
282        private String nextToken;
283        private int pos;
284        private int state;
285        private char stringDelimiter;
286
287        private FilterTokenizer(final String filterString) {
288            this.filterString = filterString;
289            this.pos = 0;
290            this.state = NEED_TOKEN;
291            readNextToken();
292        }
293
294        @Override
295        public boolean hasNext() {
296            return nextToken != null;
297        }
298
299        @Override
300        public String next() {
301            final String next = peek();
302            readNextToken();
303            return next;
304        }
305
306        @Override
307        public void remove() {
308            throw new UnsupportedOperationException();
309        }
310
311        @Override
312        public String toString() {
313            return filterString;
314        }
315
316        private String peek() {
317            if (nextToken == null) {
318                throw new NoSuchElementException();
319            }
320            return nextToken;
321        }
322
323        private void readNextToken() {
324            switch (state) {
325            case NEED_START_STRING:
326                final int stringStart = pos;
327                for (; pos < filterString.length() && filterString.charAt(pos) != stringDelimiter; pos++) {
328                    if (filterString.charAt(pos) == '\\') {
329                        if ((pos + 1) == filterString.length()) {
330                            throw new IllegalArgumentException("The filter string cannot end with an escape character");
331                        }
332                        // Found an escaped character, so remove the '\')
333                        filterString = new StringBuilder(filterString).deleteCharAt(pos).toString();
334                    }
335                    // Do nothing
336                }
337                nextToken = filterString.substring(stringStart, pos);
338                state = NEED_END_STRING;
339                break;
340            case NEED_END_STRING:
341                // NEED_START_STRING guarantees that we are either at the end of the string
342                // or the next character is a quote.
343                if (pos < filterString.length()) {
344                    nextToken = filterString.substring(pos, ++pos);
345                } else {
346                    nextToken = null;
347                }
348                state = NEED_TOKEN;
349                break;
350            default: // NEED_TOKEN:
351                if (!skipWhiteSpace()) {
352                    nextToken = null;
353                } else {
354                    final int tokenStart = pos;
355                    switch (filterString.charAt(pos++)) {
356                    case '(':
357                    case ')':
358                        break;
359                    case '"':
360                        state = NEED_START_STRING;
361                        stringDelimiter = '"';
362                        break;
363                    case '\'':
364                        state = NEED_START_STRING;
365                        stringDelimiter = '\'';
366                        break;
367                    default:
368                        for (; pos < filterString.length(); pos++) {
369                            final char c = filterString.charAt(pos);
370                            if (c == '(' || c == ')' || c == ' ') {
371                                break;
372                            }
373                        }
374                        break;
375                    }
376                    nextToken = filterString.substring(tokenStart, pos);
377                }
378            }
379        }
380
381        private boolean skipWhiteSpace() {
382            for (; pos < filterString.length() && filterString.charAt(pos) == ' '; pos++) {
383                // Do nothing
384            }
385            return pos < filterString.length();
386        }
387    }
388
389}