001/*
002 * The contents of this file are subject to the terms of the Common Development and
003 * Distribution License (the License). You may not use this file except in compliance with the
004 * License.
005 *
006 * You can obtain a copy of the License at legal/CDDLv1.0.txt. See the License for the
007 * specific language governing permission and limitations under the License.
008 *
009 * When distributing Covered Software, include this CDDL Header Notice in each file and include
010 * the License file at legal/CDDLv1.0.txt. If applicable, add the following below the CDDL
011 * Header, with the fields enclosed by brackets [] replaced by your own identifying
012 * information: "Portions Copyright [year] [name of copyright owner]".
013 *
014 * Copyright 2010–2011 ApexIdentity Inc.
015 * Portions Copyright 2011-2015 ForgeRock AS.
016 */
017
018package org.forgerock.http.util;
019
020import java.io.UnsupportedEncodingException;
021import java.net.URI;
022import java.net.URISyntaxException;
023import java.net.URLDecoder;
024import java.net.URLEncoder;
025import java.nio.charset.StandardCharsets;
026import java.util.BitSet;
027import java.util.Locale;
028
029import org.forgerock.http.protocol.Form;
030
031/**
032 * Utility class for performing operations on universal resource identifiers.
033 */
034public final class Uris {
035
036    /** Non-safe characters are escaped as UTF-8 octets using "%" HEXDIG HEXDIG production. */
037    private static final char URL_ESCAPE_CHAR = '%';
038
039    /** Look up table for characters which do not need URL encoding in path elements according to RFC 3986. */
040    private static final BitSet SAFE_URL_PCHAR_CHARS = new BitSet(128);
041
042    /** Look up table for characters which do not need URL encoding in query string parameters according to RFC 3986. */
043    private static final BitSet SAFE_URL_QUERY_CHARS = new BitSet(128);
044
045    /** Look up table for characters which do not need URL encoding in fragments according to RFC 3986. */
046    private static final BitSet SAFE_URL_FRAGMENT_CHARS = new BitSet(128);
047
048    /** Look up table for characters which do not need URL encoding in userInfo according to RFC 3986. */
049    private static final BitSet SAFE_URL_USERINFO_CHARS = new BitSet(128);
050
051    static {
052        /*
053         * pchar       = unreserved / pct-encoded / sub-delims / ":" / "@"
054         *
055         * pct-encoded = "%" HEXDIG HEXDIG
056         * unreserved  = ALPHA / DIGIT / "-" / "." / "_" / "~"
057         * sub-delims  = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
058         */
059        for (char c : "-._~!$&'()*+,;=:@".toCharArray()) {
060            SAFE_URL_PCHAR_CHARS.set(c);
061        }
062        SAFE_URL_PCHAR_CHARS.set('0', '9' + 1);
063        SAFE_URL_PCHAR_CHARS.set('a', 'z' + 1);
064        SAFE_URL_PCHAR_CHARS.set('A', 'Z' + 1);
065
066        // query = *( pchar / "/" / "?" ) - also encode ? and & since these are parameter separators,
067        //                                  as well as + which is used for encoding white space (see w3c).
068        SAFE_URL_QUERY_CHARS.or(SAFE_URL_PCHAR_CHARS);
069        SAFE_URL_QUERY_CHARS.set('/');
070        SAFE_URL_QUERY_CHARS.set('?');
071        SAFE_URL_QUERY_CHARS.clear('&');
072        SAFE_URL_QUERY_CHARS.clear('=');
073        SAFE_URL_QUERY_CHARS.clear('+');
074
075        // fragment = *( pchar / "/" / "?" )
076        SAFE_URL_FRAGMENT_CHARS.or(SAFE_URL_PCHAR_CHARS);
077        SAFE_URL_FRAGMENT_CHARS.set('/');
078        SAFE_URL_FRAGMENT_CHARS.set('?');
079
080        // userInfo = *( unreserved / pct-encoded / sub-delims / ":" )
081        SAFE_URL_USERINFO_CHARS.or(SAFE_URL_PCHAR_CHARS);
082        SAFE_URL_USERINFO_CHARS.clear('@');
083    }
084
085    /**
086     * Fast lookup for encoding octets as hex.
087     */
088    private static final String[] BYTE_TO_HEX = new String[256];
089    static {
090        for (int i = 0; i < BYTE_TO_HEX.length; i++) {
091            BYTE_TO_HEX[i] = String.format(Locale.ROOT, "%02X", i);
092        }
093    }
094
095    /** Static methods only. */
096    private Uris() {
097    }
098
099    /**
100     * Returns a hierarchical URI constructed from the given components. Differs from the URI
101     * constructor by accepting raw versions of userInfo, path, query and fragment components.
102     * <p>
103     * Unlike {@link #createNonStrict}, this method does not tolerate invalid characters, such
104     * as double-quotes, in the query string.
105     *
106     * @param scheme the scheme component of the URI or {@code null} if none.
107     * @param rawUserInfo the raw user-information component of the URI or {@code null} if none.
108     * @param host the host component of the URI or {@code null} if none.
109     * @param port the port number of the URI or {@code -1} if none.
110     * @param rawPath the raw path component of the URI or {@code null} if none.
111     * @param rawQuery the raw query component of the URI or {@code null} if none. The raw query must not contain
112     *                 characters that should have been percent encoded.
113     * @param rawFragment the raw fragment component of the URI or {@code null} if none.
114     * @return the URI constructed from the given components.
115     * @throws URISyntaxException if the resulting URI would be malformed per RFC 2396.
116     */
117    public static URI create(String scheme, String rawUserInfo, String host, int port,
118            String rawPath, String rawQuery, String rawFragment) throws URISyntaxException {
119        StringBuilder sb = new StringBuilder();
120        if (scheme != null) {
121            sb.append(scheme).append(':');
122        }
123        if (host != null) {
124            sb.append("//");
125        }
126        if (rawUserInfo != null) {
127            sb.append(rawUserInfo).append('@');
128        }
129        if (host != null) {
130            sb.append(host);
131            if (port != -1) {
132                sb.append(':').append(Integer.toString(port));
133            }
134        }
135        if (rawPath != null) {
136            sb.append(rawPath);
137        }
138        if (rawQuery != null) {
139            sb.append('?').append(rawQuery);
140        }
141        if (rawFragment != null) {
142            sb.append("#").append(rawFragment);
143        }
144        return new URI(sb.toString());
145    }
146
147    /**
148     * Returns a hierarchical URI constructed from the given components. Differs from the URI
149     * constructor by accepting raw versions of userInfo, path, query and fragment components.
150     * <p>
151     * Unlike {@link #create}, this method tolerates invalid characters, such as double-quotes,
152     * in the query string.
153     *
154     * @param scheme the scheme component of the URI or {@code null} if none.
155     * @param rawUserInfo the raw user-information component of the URI or {@code null} if none.
156     * @param host the host component of the URI or {@code null} if none.
157     * @param port the port number of the URI or {@code -1} if none.
158     * @param rawPath the raw path component of the URI or {@code null} if none.
159     * @param rawQuery the raw query component of the URI or {@code null} if none. The raw query may contain
160     *                 characters that should have been percent encoded.
161     * @param rawFragment the raw fragment component of the URI or {@code null} if none.
162     * @return the URI constructed from the given components.
163     * @throws URISyntaxException if the resulting URI would be malformed per RFC 2396.
164     */
165    public static URI createNonStrict(String scheme, String rawUserInfo, String host, int port,
166                             String rawPath, String rawQuery, String rawFragment) throws URISyntaxException {
167        return create(scheme, rawUserInfo, host, port, rawPath, asSafeQuery(rawQuery), rawFragment);
168    }
169
170    private static String asSafeQuery(final String rawQuery) throws URISyntaxException {
171        if (rawQuery == null) {
172            return null;
173        }
174        // Allocate a bit of extra padding in case a couple of characters need % encoding.
175        StringBuilder builder = new StringBuilder(rawQuery.length() + 8);
176        for (String param : rawQuery.split("&")) {
177            String[] nv = param.split("=", 2);
178            if (nv.length == 2) {
179                try {
180                    if (builder.length() > 0) {
181                        builder.append('&');
182                    }
183
184                    String name = urlDecodeQueryParameterNameOrValue(nv[0]);
185                    String value = urlDecodeQueryParameterNameOrValue(nv[1]);
186
187                    builder.append(urlEncodeQueryParameterNameOrValue(name))
188                           .append('=')
189                           .append(urlEncodeQueryParameterNameOrValue(value));
190                } catch (Exception e) {
191                    throw new URISyntaxException(rawQuery, "The URL query string could not be decoded");
192                }
193            }
194        }
195        return builder.toString();
196    }
197
198    /**
199     * Changes the base scheme, host and port of a request to that specified in a base URI,
200     * or leaves them unchanged if the base URI is {@code null}. This implementation only
201     * uses scheme, host and port. The remaining components of the URI remain intact.
202     *
203     * @param uri the URI whose base is to be changed.
204     * @param base the URI to base the other URI on.
205     * @return the the URI with the new established base.
206     */
207    public static URI rebase(URI uri, URI base)  {
208        if (base == null) {
209            return uri;
210        }
211        String scheme = base.getScheme();
212        String host = base.getHost();
213        int port = base.getPort();
214        if (scheme == null || host == null) {
215            return uri;
216        }
217        try {
218            return create(scheme, uri.getRawUserInfo(), host, port, uri.getRawPath(),
219                    uri.getRawQuery(), uri.getRawFragment());
220        } catch (URISyntaxException e) {
221            throw new IllegalStateException(e);
222        }
223    }
224
225    /**
226     * Returns a new URI having the provided query parameters. The scheme,
227     * authority, path, and fragment remain unchanged.
228     *
229     * @param uri
230     *            the URI whose query is to be changed.
231     * @param query
232     *            the form containing the query parameters.
233     * @return a new URI having the provided query parameters. The scheme,
234     *         authority, path, and fragment remain unchanged.
235     */
236    public static URI withQuery(final URI uri, final Form query) {
237        try {
238            return create(uri.getScheme(), uri.getRawUserInfo(), uri.getHost(), uri.getPort(),
239                    uri.getRawPath(), query.toQueryString(), uri.getRawFragment());
240        } catch (final URISyntaxException e) {
241            throw new IllegalStateException(e);
242        }
243    }
244
245    /**
246     * Returns a new URI having the same scheme, authority and path, but no
247     * query nor fragment.
248     *
249     * @param uri
250     *            the URI whose query and fragments are to be removed.
251     * @return a new URI having the same scheme, authority and path, but no
252     *         query nor fragment.
253     */
254    public static URI withoutQueryAndFragment(final URI uri) {
255        try {
256            return new URI(uri.getScheme(), uri.getAuthority(), uri.getPath(), null, null);
257        } catch (final URISyntaxException e) {
258            throw new IllegalStateException(e);
259        }
260    }
261
262    /**
263     * Decodes the provided form encoded parameter name or value as per application/x-www-form-urlencoded.
264     *
265     * @param nameOrValue
266     *         the form encoded parameter name or value, which may be {@code null}.
267     * @return the decoded form parameter name or value, or {@code null} if {@code nameOrValue} was {@code null}.
268     */
269    public static String formDecodeParameterNameOrValue(String nameOrValue) {
270        try {
271            return nameOrValue != null ? URLDecoder.decode(nameOrValue, "UTF-8") : null;
272        } catch (UnsupportedEncodingException e) {
273            return nameOrValue;
274        }
275    }
276
277    /**
278     * Form encodes the provided parameter name or value as per application/x-www-form-urlencoded.
279     *
280     * @param nameOrValue
281     *         the parameter name or value, which may be {@code null}.
282     * @return the form encoded parameter name or value, or {@code null} if {@code nameOrValue} was {@code null}.
283     */
284    public static String formEncodeParameterNameOrValue(String nameOrValue) {
285        try {
286            return nameOrValue != null ? URLEncoder.encode(nameOrValue, "UTF-8") : null;
287        } catch (UnsupportedEncodingException e) {
288            return nameOrValue;
289        }
290    }
291
292    /**
293     * Decodes the provided URL encoded path element as per RFC 3986.
294     *
295     * @param pathElement
296     *            the URL encoded path element, which may be {@code null}.
297     * @return the decoded path element, or {@code null} if {@code pathElement} was {@code null}.
298     */
299    public static String urlDecodePathElement(String pathElement) {
300        return urlDecode(pathElement, false);
301    }
302
303    /**
304     * URL encodes the provided path element as per RFC 3986.
305     *
306     * @param pathElement
307     *         the path element, which may be {@code null}.
308     * @return the URL encoded path element, or {@code null} if {@code pathElement} was {@code null}.
309     */
310    public static String urlEncodePathElement(String pathElement) {
311        return urlEncode(pathElement, SAFE_URL_PCHAR_CHARS);
312    }
313
314    /**
315     * Decodes the provided URL encoded query parameter name or value as per RFC 3986.
316     *
317     * @param nameOrValue
318     *            the URL encoded query parameter name or value, which may be {@code null}.
319     * @return the decoded query parameter name or value, or {@code null} if {@code nameOrValue} was {@code null}.
320     */
321    public static String urlDecodeQueryParameterNameOrValue(String nameOrValue) {
322        return urlDecode(nameOrValue, true);
323    }
324
325    /**
326     * URL encodes the provided query parameter name or value as per RFC 3986. Note that this method does not
327     * adhere to the "query" production in RFC 3986, because it is intended for encoding query parameter names or
328     * values. Therefore, this method will encode '?' and '=' characters.
329     *
330     * @param nameOrValue
331     *         the query parameter name or value, which may be {@code null}.
332     * @return the URL encoded query parameter name or value, or {@code null} if {@code nameOrValue} was {@code null}.
333     */
334    public static String urlEncodeQueryParameterNameOrValue(String nameOrValue) {
335        return urlEncode(nameOrValue, SAFE_URL_QUERY_CHARS);
336    }
337
338    /**
339     * Decodes the provided URL encoded fragment as per RFC 3986.
340     *
341     * @param fragment
342     *            the URL encoded fragment, which may be {@code null}.
343     * @return the decoded fragment, or {@code null} if {@code fragment} was {@code null}.
344     */
345    public static String urlDecodeFragment(String fragment) {
346        return urlDecode(fragment, false);
347    }
348
349    /**
350     * URL encodes the provided fragment as per RFC 3986.
351     *
352     * @param fragment
353     *         the fragment, which may be {@code null}.
354     * @return the URL encoded fragment, or {@code null} if {@code fragment} was {@code null}.
355     */
356    public static String urlEncodeFragment(String fragment) {
357        return urlEncode(fragment, SAFE_URL_FRAGMENT_CHARS);
358    }
359
360    /**
361     * Decodes the provided URL encoded userInfo as per RFC 3986.
362     *
363     * @param userInfo
364     *            the URL encoded userInfo, which may be {@code null}.
365     * @return the decoded userInfo, or {@code null} if {@code userInfo} was {@code null}.
366     */
367    public static String urlDecodeUserInfo(String userInfo) {
368        return urlDecode(userInfo, false);
369    }
370
371    /**
372     * URL encodes the provided userInfo as per RFC 3986.
373     *
374     * @param userInfo
375     *         the userInfo, which may be {@code null}.
376     * @return the URL encoded userInfo, or {@code null} if {@code userInfo} was {@code null}.
377     */
378    public static String urlEncodeUserInfo(String userInfo) {
379        return urlEncode(userInfo, SAFE_URL_USERINFO_CHARS);
380    }
381
382    private static String urlDecode(final String s, final boolean decodePlusToSpace) {
383        if (s == null) {
384            return null;
385        }
386        // First try fast-path decode of simple ASCII.
387        final int size = s.length();
388        for (int i = 0; i < size; i++) {
389            final char c = s.charAt(i);
390            if (isUrlEscapeChar(c) || (decodePlusToSpace && c == '+')) {
391                // Slow path.
392                return urlDecode0(s, decodePlusToSpace);
393            }
394        }
395        return s;
396    }
397
398    private static String urlDecode0(final String s, final boolean decodePlusToSpace) {
399        final StringBuilder builder = new StringBuilder(s.length());
400        final int size = s.length();
401        final byte[] buffer = new byte[size / 3];
402        for (int i = 0; i < size;) {
403            final char c = s.charAt(i);
404            if (decodePlusToSpace && c == '+') {
405                builder.append(' ');
406                i++;
407            } else if (!isUrlEscapeChar(c)) {
408                builder.append(c);
409                i++;
410            } else {
411                int bufferPos = 0;
412                for (; i < size && isUrlEscapeChar(s.charAt(i)); i += 3) {
413                    if ((i + 2) >= size) {
414                        throw new IllegalArgumentException(
415                                "Path contains an incomplete percent encoding");
416                    }
417                    final String hexPair = s.substring(i + 1, i + 3);
418                    try {
419                        final int octet = Integer.parseInt(hexPair, 16);
420                        if (octet < 0) {
421                            throw new IllegalArgumentException(
422                                    "Path contains an invalid percent encoding '" + hexPair + "'");
423                        }
424                        buffer[bufferPos++] = (byte) octet;
425                    } catch (NumberFormatException e) {
426                        throw new IllegalArgumentException(
427                                "Path contains an invalid percent encoding '" + hexPair + "'");
428                    }
429                }
430                builder.append(new String(buffer, 0, bufferPos, StandardCharsets.UTF_8));
431            }
432        }
433        return builder.toString();
434    }
435
436    private static String urlEncode(final String s, final BitSet safeChars) {
437        if (s == null) {
438            return null;
439        }
440        // First try fast-path encode of simple ASCII.
441        final int size = s.length();
442        for (int i = 0; i < size; i++) {
443            final int c = s.charAt(i);
444            if (!safeChars.get(c)) {
445                // Slow path.
446                return urlEncode0(s, safeChars);
447            }
448        }
449        return s;
450    }
451
452    private static String urlEncode0(String s, final BitSet safeChars) {
453        final byte[] utf8 = s.getBytes(StandardCharsets.UTF_8);
454        final int size = utf8.length;
455        final StringBuilder builder = new StringBuilder(size + 16);
456        for (final byte b : utf8) {
457            final int octet = b & 0xff;
458            if (safeChars.get(octet)) {
459                builder.append((char) octet);
460            } else {
461                builder.append(URL_ESCAPE_CHAR);
462                builder.append(BYTE_TO_HEX[octet]);
463            }
464        }
465        return builder.toString();
466    }
467
468    private static boolean isUrlEscapeChar(final char c) {
469        return c == URL_ESCAPE_CHAR;
470    }
471}