001/*
002 * The contents of this file are subject to the terms of the Common Development and
003 * Distribution License (the License). You may not use this file except in compliance with the
004 * License.
005 *
006 * You can obtain a copy of the License at legal/CDDLv1.0.txt. See the License for the
007 * specific language governing permission and limitations under the License.
008 *
009 * When distributing Covered Software, include this CDDL Header Notice in each file and include
010 * the License file at legal/CDDLv1.0.txt. If applicable, add the following below the CDDL
011 * Header, with the fields enclosed by brackets [] replaced by your own identifying
012 * information: "Portions Copyright [year] [name of copyright owner]".
013 *
014 * Copyright 2013-2014 Manuel Gaupp
015 */
016package org.forgerock.opendj.ldap;
017
018import java.math.BigInteger;
019import java.util.regex.Matcher;
020import java.util.regex.Pattern;
021
022import org.forgerock.util.Reject;
023import org.forgerock.i18n.LocalizableMessage;
024
025import static com.forgerock.opendj.ldap.CoreMessages.WARN_GSER_NO_VALID_IDENTIFIEDCHOICE;
026import static com.forgerock.opendj.ldap.CoreMessages.WARN_GSER_NO_VALID_IDENTIFIER;
027import static com.forgerock.opendj.ldap.CoreMessages.WARN_GSER_NO_VALID_INTEGER;
028import static com.forgerock.opendj.ldap.CoreMessages.WARN_GSER_NO_VALID_SEPARATOR;
029import static com.forgerock.opendj.ldap.CoreMessages.WARN_GSER_NO_VALID_STRING;
030import static com.forgerock.opendj.ldap.CoreMessages.WARN_GSER_PATTERN_NO_MATCH;
031import static com.forgerock.opendj.ldap.CoreMessages.WARN_GSER_SPACE_CHAR_EXPECTED;
032
033/**
034 * This class implements a parser for strings which are encoded using the
035 * Generic String Encoding Rules (GSER) defined in RFC 3641.
036 *
037 * @see <a href="http://tools.ietf.org/html/rfc3641">RFC 3641 - Generic String
038 * Encoding Rules (GSER) for ASN.1 Types</a>
039 */
040public final class GSERParser {
041
042    private final String gserValue;
043
044    private int pos;
045
046    private final int length;
047
048    /**
049     * Pattern to match an identifier defined in RFC 3641, section 3.4.
050     * <pre>
051     * An &lt;identifier&gt; conforms to the definition of an identifier in ASN.1
052     * notation (Clause 11.3 of X.680 [8]).  It begins with a lowercase
053     * letter and is followed by zero or more letters, digits, and hyphens.
054     * A hyphen is not permitted to be the last character, nor is it to be
055     * followed by another hyphen.  The case of letters in an identifier is
056     * always significant.
057     *
058     *    identifier    = lowercase *alphanumeric *(hyphen 1*alphanumeric)
059     *    alphanumeric  = uppercase / lowercase / decimal-digit
060     *    uppercase     = %x41-5A  ; "A" to "Z"
061     *    lowercase     = %x61-7A  ; "a" to "z"
062     *    decimal-digit = %x30-39  ; "0" to "9"
063     *    hyphen        = "-"
064     * </pre>
065     */
066    private static final Pattern GSER_IDENTIFIER = Pattern.compile("^([a-z]([A-Za-z0-9]|(-[A-Za-z0-9]))*)");
067
068    /**
069     * Pattern to match the identifier part (including the colon) of an
070     * IdentifiedChoiceValue defined in RFC 3641, section 3.12.
071     * <pre>
072     *    IdentifiedChoiceValue = identifier ":" Value
073     * </pre>
074     */
075    private static final Pattern GSER_CHOICE_IDENTIFIER = Pattern.compile("^([a-z]([A-Za-z0-9]|(-[A-Za-z0-9]))*:)");
076
077    /**
078     * Pattern to match "sp", containing zero, one or more space characters.
079     * <pre>
080     *    sp = *%x20  ; zero, one or more space characters
081     * </pre>
082     */
083    private static final Pattern GSER_SP = Pattern.compile("^( *)");
084
085    /**
086     * Pattern to match "msp", containing at least one space character.
087     * <pre>
088     *    msp = 1*%x20  ; one or more space characters
089     * </pre>
090     */
091    private static final Pattern GSER_MSP = Pattern.compile("^( +)");
092
093    /**
094     * Pattern to match an Integer value.
095     */
096    private static final Pattern GSER_INTEGER = Pattern.compile("^(\\d+)");
097
098    /**
099     * Pattern to match a GSER StringValue, defined in RFC 3641, section 3.2:
100     * <pre>
101     * Any embedded double quotes in the resulting UTF-8 character string
102     * are escaped by repeating the double quote characters.
103     *
104     * [...]
105     *
106     *    StringValue       = dquote *SafeUTF8Character dquote
107     *    dquote            = %x22 ; &quot; (double quote)
108     * </pre>
109     */
110    private static final Pattern GSER_STRING = Pattern.compile("^(\"([^\"]|(\"\"))*\")");
111
112    /**
113     * Pattern to match the beginning of a GSER encoded Sequence.
114     * <pre>
115     *    SequenceValue = ComponentList
116     *    ComponentList = "{" [ sp NamedValue *( "," sp NamedValue) ] sp "}"
117     * </pre>
118     */
119    private static final Pattern GSER_SEQUENCE_START = Pattern.compile("^(\\{)");
120
121    /**
122     * Pattern to match the end of a GSER encoded Sequence.
123     * <pre>
124     *    SequenceValue = ComponentList
125     *    ComponentList = "{" [ sp NamedValue *( "," sp NamedValue) ] sp "}"
126     * </pre>
127     */
128    private static final Pattern GSER_SEQUENCE_END = Pattern.compile("^(\\})");
129
130    /**
131     * Pattern to match the separator used in GSER encoded sequences.
132     */
133    private static final Pattern GSER_SEP = Pattern.compile("^(,)");
134
135    /**
136     * Creates a new GSER Parser.
137     *
138     * @param value the GSER encoded String value
139     */
140    public GSERParser(CharSequence value) {
141        Reject.checkNotNull(value);
142        this.gserValue = value.toString();
143        this.pos = 0;
144        this.length = value.length();
145    }
146
147    /**
148     * Determines if the GSER String contains at least one character to be read.
149     *
150     * @return <code>true</code> if there is at least one remaining character or
151     * <code>false</code> otherwise.
152     */
153    public boolean hasNext() {
154        return pos < length;
155    }
156
157    /**
158     * Determines if the remaining GSER String matches the provided pattern.
159     *
160     * @param pattern the pattern to search for
161     *
162     * @return <code>true</code> if the remaining string matches the pattern or
163     * <code>false</code> otherwise.
164     */
165    private boolean hasNext(Pattern pattern) {
166        if (!hasNext()) {
167            return false;
168        }
169
170        Matcher matcher = pattern.matcher(gserValue.substring(pos, length));
171
172        return matcher.find();
173    }
174
175    /**
176     * Returns the String matched by the first capturing group of the pattern.
177     * The parser advances past the input matched by the first capturing group.
178     *
179     * @param pattern the pattern to search for
180     *
181     * @return the String matched by the first capturing group of the pattern
182     *
183     * @throws DecodeException If no match could be found
184     */
185    private String next(Pattern pattern) throws DecodeException {
186        Matcher matcher = pattern.matcher(gserValue.substring(pos, length));
187        if (matcher.find() && matcher.groupCount() >= 1) {
188            pos += matcher.end(1);
189            return matcher.group(1);
190        } else {
191            final LocalizableMessage msg =
192                    WARN_GSER_PATTERN_NO_MATCH.get(pattern.pattern(),
193                                                   gserValue.substring(pos, length));
194            throw DecodeException.error(msg);
195        }
196    }
197
198    /**
199     * Skips the input matched by the first capturing group.
200     *
201     * @param pattern the pattern to search for
202     *
203     * @throws DecodeException If no match could be found
204     */
205    private void skip(Pattern pattern) throws DecodeException {
206        Matcher matcher = pattern.matcher(gserValue.substring(pos, length));
207
208        if (matcher.find() && matcher.groupCount() >= 1) {
209            pos += matcher.end(1);
210        } else {
211            final LocalizableMessage msg =
212                    WARN_GSER_PATTERN_NO_MATCH.get(pattern.pattern(),
213                                                   gserValue.substring(pos, length));
214            throw DecodeException.error(msg);
215        }
216    }
217
218    /**
219     * Skips the input matching zero, one or more space characters.
220     *
221     * @return reference to this GSERParser
222     *
223     * @throws DecodeException If no match could be found
224     */
225    public GSERParser skipSP() throws DecodeException {
226        skip(GSER_SP);
227        return this;
228    }
229
230    /**
231     * Skips the input matching one or more space characters.
232     *
233     * @return reference to this GSERParser
234     *
235     * @throws DecodeException If no match could be found
236     */
237    public GSERParser skipMSP() throws DecodeException {
238        skip(GSER_MSP);
239        return this;
240    }
241
242    /**
243     * Skips the input matching the start of a sequence and subsequent space
244     * characters.
245     *
246     * @return reference to this GSERParser
247     *
248     * @throws DecodeException If the input does not match the start of a
249     * sequence
250     */
251    public GSERParser readStartSequence() throws DecodeException {
252        next(GSER_SEQUENCE_START);
253        skip(GSER_SP);
254        return this;
255    }
256
257    /**
258     * Skips the input matching the end of a sequence and preceding space
259     * characters.
260     *
261     * @return reference to this GSERParser
262     *
263     * @throws DecodeException If the input does not match the end of a sequence
264     */
265    public GSERParser readEndSequence() throws DecodeException {
266        skip(GSER_SP);
267        next(GSER_SEQUENCE_END);
268        return this;
269    }
270
271    /**
272     * Skips the input matching the separator pattern (",") and subsequenct
273     * space characters.
274     *
275     * @return reference to this GSERParser
276     *
277     * @throws DecodeException If the input does not match the separator
278     * pattern.
279     */
280    public GSERParser skipSeparator() throws DecodeException {
281        if (!hasNext(GSER_SEP)) {
282            final LocalizableMessage msg =
283                    WARN_GSER_NO_VALID_SEPARATOR.get(gserValue.substring(pos, length));
284            throw DecodeException.error(msg);
285        }
286        skip(GSER_SEP);
287        skip(GSER_SP);
288        return this;
289    }
290
291    /**
292     * Returns the next element as a String.
293     *
294     * @return the input matching the String pattern
295     *
296     * @throws DecodeException If the input does not match the string pattern.
297     */
298    public String nextString() throws DecodeException {
299        if (!hasNext(GSER_STRING)) {
300            final LocalizableMessage msg =
301                    WARN_GSER_NO_VALID_STRING.get(gserValue.substring(pos, length));
302            throw DecodeException.error(msg);
303        }
304
305        String str = next(GSER_STRING);
306
307        // Strip leading and trailing dquotes; unescape double dquotes
308        return str.substring(1, str.length() - 1).replace("\"\"", "\"");
309    }
310
311    /**
312     * Returns the next element as an Integer.
313     *
314     * @return the input matching the integer pattern
315     *
316     * @throws DecodeException If the input does not match the integer pattern
317     */
318    public int nextInteger() throws DecodeException {
319        if (!hasNext(GSER_INTEGER)) {
320            final LocalizableMessage msg =
321                    WARN_GSER_NO_VALID_INTEGER.get(gserValue.substring(pos, length));
322            throw DecodeException.error(msg);
323        }
324        return Integer.valueOf(next(GSER_INTEGER)).intValue();
325    }
326
327    /**
328     * Returns the next element as a BigInteger.
329     *
330     * @return the input matching the integer pattern
331     *
332     * @throws DecodeException If the input does not match the integer pattern
333     */
334    public BigInteger nextBigInteger() throws DecodeException {
335        if (!hasNext(GSER_INTEGER)) {
336            final LocalizableMessage msg =
337                    WARN_GSER_NO_VALID_INTEGER.get(gserValue.substring(pos, length));
338            throw DecodeException.error(msg);
339        }
340        return new BigInteger(next(GSER_INTEGER));
341    }
342
343    /**
344     * Returns the identifier of the next NamedValue element.
345     *
346     * @return the identifier of the NamedValue element
347     *
348     * @throws DecodeException If the input does not match the identifier
349     * pattern of a NamedValue
350     */
351    public String nextNamedValueIdentifier() throws DecodeException {
352        if (!hasNext(GSER_IDENTIFIER)) {
353            final LocalizableMessage msg =
354                    WARN_GSER_NO_VALID_IDENTIFIER.get(gserValue.substring(pos, length));
355            throw DecodeException.error(msg);
356        }
357        String identifier = next(GSER_IDENTIFIER);
358        if (!hasNext(GSER_MSP)) {
359            final LocalizableMessage msg =
360                    WARN_GSER_SPACE_CHAR_EXPECTED.get(gserValue.substring(pos, length));
361            throw DecodeException.error(msg);
362        }
363        skipMSP();
364        return identifier;
365    }
366
367    /**
368     * Return the identifier of the next IdentifiedChoiceValue element.
369     *
370     * @return the identifier of the IdentifiedChoiceValue element
371     *
372     * @throws DecodeException If the input does not match the identifier
373     * pattern of an IdentifiedChoiceValue
374     */
375    public String nextChoiceValueIdentifier() throws DecodeException {
376        if (!hasNext(GSER_CHOICE_IDENTIFIER)) {
377            final LocalizableMessage msg =
378                    WARN_GSER_NO_VALID_IDENTIFIEDCHOICE.get(gserValue.substring(pos, length));
379            throw DecodeException.error(msg);
380        }
381        String identifier = next(GSER_CHOICE_IDENTIFIER);
382
383        // Remove the colon at the end of the identifier
384        return identifier.substring(0, identifier.length() - 1);
385    }
386
387    /**
388     * Returns the GSER encoded String value.
389     *
390     * @return The GSER encoded String value.
391     */
392    @Override
393    public String toString() {
394        return gserValue;
395    }
396}