001/* 002 * The contents of this file are subject to the terms of the Common Development and 003 * Distribution License (the License). You may not use this file except in compliance with the 004 * License. 005 * 006 * You can obtain a copy of the License at legal/CDDLv1.0.txt. See the License for the 007 * specific language governing permission and limitations under the License. 008 * 009 * When distributing Covered Software, include this CDDL Header Notice in each file and include 010 * the License file at legal/CDDLv1.0.txt. If applicable, add the following below the CDDL 011 * Header, with the fields enclosed by brackets [] replaced by your own identifying 012 * information: "Portions Copyright [year] [name of copyright owner]". 013 * 014 * Copyright 2010–2011 ApexIdentity Inc. 015 * Portions Copyright 2011-2015 ForgeRock AS. 016 */ 017 018package org.forgerock.http.util; 019 020import java.io.UnsupportedEncodingException; 021import java.net.URI; 022import java.net.URISyntaxException; 023import java.net.URLDecoder; 024import java.net.URLEncoder; 025import java.nio.charset.StandardCharsets; 026import java.util.BitSet; 027import java.util.Locale; 028 029import org.forgerock.http.protocol.Form; 030 031/** 032 * Utility class for performing operations on universal resource identifiers. 033 */ 034public final class Uris { 035 036 /** Non-safe characters are escaped as UTF-8 octets using "%" HEXDIG HEXDIG production. */ 037 private static final char URL_ESCAPE_CHAR = '%'; 038 039 /** Look up table for characters which do not need URL encoding in path elements according to RFC 3986. */ 040 private static final BitSet SAFE_URL_PCHAR_CHARS = new BitSet(128); 041 042 /** Look up table for characters which do not need URL encoding in query string parameters according to RFC 3986. */ 043 private static final BitSet SAFE_URL_QUERY_CHARS = new BitSet(128); 044 045 /** Look up table for characters which do not need URL encoding in fragments according to RFC 3986. */ 046 private static final BitSet SAFE_URL_FRAGMENT_CHARS = new BitSet(128); 047 048 /** Look up table for characters which do not need URL encoding in userInfo according to RFC 3986. */ 049 private static final BitSet SAFE_URL_USERINFO_CHARS = new BitSet(128); 050 051 static { 052 /* 053 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 054 * 055 * pct-encoded = "%" HEXDIG HEXDIG 056 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 057 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" 058 */ 059 for (char c : "-._~!$&'()*+,;=:@".toCharArray()) { 060 SAFE_URL_PCHAR_CHARS.set(c); 061 } 062 SAFE_URL_PCHAR_CHARS.set('0', '9' + 1); 063 SAFE_URL_PCHAR_CHARS.set('a', 'z' + 1); 064 SAFE_URL_PCHAR_CHARS.set('A', 'Z' + 1); 065 066 // query = *( pchar / "/" / "?" ) - also encode ? and & since these are parameter separators, 067 // as well as + which is used for encoding white space (see w3c). 068 SAFE_URL_QUERY_CHARS.or(SAFE_URL_PCHAR_CHARS); 069 SAFE_URL_QUERY_CHARS.set('/'); 070 SAFE_URL_QUERY_CHARS.set('?'); 071 SAFE_URL_QUERY_CHARS.clear('&'); 072 SAFE_URL_QUERY_CHARS.clear('='); 073 SAFE_URL_QUERY_CHARS.clear('+'); 074 075 // fragment = *( pchar / "/" / "?" ) 076 SAFE_URL_FRAGMENT_CHARS.or(SAFE_URL_PCHAR_CHARS); 077 SAFE_URL_FRAGMENT_CHARS.set('/'); 078 SAFE_URL_FRAGMENT_CHARS.set('?'); 079 080 // userInfo = *( unreserved / pct-encoded / sub-delims / ":" ) 081 SAFE_URL_USERINFO_CHARS.or(SAFE_URL_PCHAR_CHARS); 082 SAFE_URL_USERINFO_CHARS.clear('@'); 083 } 084 085 /** 086 * Fast lookup for encoding octets as hex. 087 */ 088 private static final String[] BYTE_TO_HEX = new String[256]; 089 static { 090 for (int i = 0; i < BYTE_TO_HEX.length; i++) { 091 BYTE_TO_HEX[i] = String.format(Locale.ROOT, "%02X", i); 092 } 093 } 094 095 /** Static methods only. */ 096 private Uris() { 097 } 098 099 /** 100 * Returns a hierarchical URI constructed from the given components. Differs from the URI 101 * constructor by accepting raw versions of userInfo, path, query and fragment components. 102 * <p> 103 * Unlike {@link #createNonStrict}, this method does not tolerate invalid characters, such 104 * as double-quotes, in the query string. 105 * 106 * @param scheme the scheme component of the URI or {@code null} if none. 107 * @param rawUserInfo the raw user-information component of the URI or {@code null} if none. 108 * @param host the host component of the URI or {@code null} if none. 109 * @param port the port number of the URI or {@code -1} if none. 110 * @param rawPath the raw path component of the URI or {@code null} if none. 111 * @param rawQuery the raw query component of the URI or {@code null} if none. The raw query must not contain 112 * characters that should have been percent encoded. 113 * @param rawFragment the raw fragment component of the URI or {@code null} if none. 114 * @return the URI constructed from the given components. 115 * @throws URISyntaxException if the resulting URI would be malformed per RFC 2396. 116 */ 117 public static URI create(String scheme, String rawUserInfo, String host, int port, 118 String rawPath, String rawQuery, String rawFragment) throws URISyntaxException { 119 StringBuilder sb = new StringBuilder(); 120 if (scheme != null) { 121 sb.append(scheme).append(':'); 122 } 123 if (host != null) { 124 sb.append("//"); 125 } 126 if (rawUserInfo != null) { 127 sb.append(rawUserInfo).append('@'); 128 } 129 if (host != null) { 130 sb.append(host); 131 if (port != -1) { 132 sb.append(':').append(Integer.toString(port)); 133 } 134 } 135 if (rawPath != null) { 136 sb.append(rawPath); 137 } 138 if (rawQuery != null) { 139 sb.append('?').append(rawQuery); 140 } 141 if (rawFragment != null) { 142 sb.append("#").append(rawFragment); 143 } 144 return new URI(sb.toString()); 145 } 146 147 /** 148 * Returns a hierarchical URI constructed from the given components. Differs from the URI 149 * constructor by accepting raw versions of userInfo, path, query and fragment components. 150 * <p> 151 * Unlike {@link #create}, this method tolerates invalid characters, such as double-quotes, 152 * in the query string. 153 * 154 * @param scheme the scheme component of the URI or {@code null} if none. 155 * @param rawUserInfo the raw user-information component of the URI or {@code null} if none. 156 * @param host the host component of the URI or {@code null} if none. 157 * @param port the port number of the URI or {@code -1} if none. 158 * @param rawPath the raw path component of the URI or {@code null} if none. 159 * @param rawQuery the raw query component of the URI or {@code null} if none. The raw query may contain 160 * characters that should have been percent encoded. 161 * @param rawFragment the raw fragment component of the URI or {@code null} if none. 162 * @return the URI constructed from the given components. 163 * @throws URISyntaxException if the resulting URI would be malformed per RFC 2396. 164 */ 165 public static URI createNonStrict(String scheme, String rawUserInfo, String host, int port, 166 String rawPath, String rawQuery, String rawFragment) throws URISyntaxException { 167 return create(scheme, rawUserInfo, host, port, rawPath, asSafeQuery(rawQuery), rawFragment); 168 } 169 170 private static String asSafeQuery(final String rawQuery) throws URISyntaxException { 171 if (rawQuery == null) { 172 return null; 173 } 174 // Allocate a bit of extra padding in case a couple of characters need % encoding. 175 StringBuilder builder = new StringBuilder(rawQuery.length() + 8); 176 for (String param : rawQuery.split("&")) { 177 String[] nv = param.split("=", 2); 178 if (nv.length == 2) { 179 try { 180 if (builder.length() > 0) { 181 builder.append('&'); 182 } 183 184 String name = urlDecodeQueryParameterNameOrValue(nv[0]); 185 String value = urlDecodeQueryParameterNameOrValue(nv[1]); 186 187 builder.append(urlEncodeQueryParameterNameOrValue(name)) 188 .append('=') 189 .append(urlEncodeQueryParameterNameOrValue(value)); 190 } catch (Exception e) { 191 throw new URISyntaxException(rawQuery, "The URL query string could not be decoded"); 192 } 193 } 194 } 195 return builder.toString(); 196 } 197 198 /** 199 * Changes the base scheme, host and port of a request to that specified in a base URI, 200 * or leaves them unchanged if the base URI is {@code null}. This implementation only 201 * uses scheme, host and port. The remaining components of the URI remain intact. 202 * 203 * @param uri the URI whose base is to be changed. 204 * @param base the URI to base the other URI on. 205 * @return the the URI with the new established base. 206 */ 207 public static URI rebase(URI uri, URI base) { 208 if (base == null) { 209 return uri; 210 } 211 String scheme = base.getScheme(); 212 String host = base.getHost(); 213 int port = base.getPort(); 214 if (scheme == null || host == null) { 215 return uri; 216 } 217 try { 218 return create(scheme, uri.getRawUserInfo(), host, port, uri.getRawPath(), 219 uri.getRawQuery(), uri.getRawFragment()); 220 } catch (URISyntaxException e) { 221 throw new IllegalStateException(e); 222 } 223 } 224 225 /** 226 * Returns a new URI having the provided query parameters. The scheme, 227 * authority, path, and fragment remain unchanged. 228 * 229 * @param uri 230 * the URI whose query is to be changed. 231 * @param query 232 * the form containing the query parameters. 233 * @return a new URI having the provided query parameters. The scheme, 234 * authority, path, and fragment remain unchanged. 235 */ 236 public static URI withQuery(final URI uri, final Form query) { 237 try { 238 return create(uri.getScheme(), uri.getRawUserInfo(), uri.getHost(), uri.getPort(), 239 uri.getRawPath(), query.toQueryString(), uri.getRawFragment()); 240 } catch (final URISyntaxException e) { 241 throw new IllegalStateException(e); 242 } 243 } 244 245 /** 246 * Returns a new URI having the same scheme, authority and path, but no 247 * query nor fragment. 248 * 249 * @param uri 250 * the URI whose query and fragments are to be removed. 251 * @return a new URI having the same scheme, authority and path, but no 252 * query nor fragment. 253 */ 254 public static URI withoutQueryAndFragment(final URI uri) { 255 try { 256 return new URI(uri.getScheme(), uri.getAuthority(), uri.getPath(), null, null); 257 } catch (final URISyntaxException e) { 258 throw new IllegalStateException(e); 259 } 260 } 261 262 /** 263 * Decodes the provided form encoded parameter name or value as per application/x-www-form-urlencoded. 264 * 265 * @param nameOrValue 266 * the form encoded parameter name or value, which may be {@code null}. 267 * @return the decoded form parameter name or value, or {@code null} if {@code nameOrValue} was {@code null}. 268 */ 269 public static String formDecodeParameterNameOrValue(String nameOrValue) { 270 try { 271 return nameOrValue != null ? URLDecoder.decode(nameOrValue, "UTF-8") : null; 272 } catch (UnsupportedEncodingException e) { 273 return nameOrValue; 274 } 275 } 276 277 /** 278 * Form encodes the provided parameter name or value as per application/x-www-form-urlencoded. 279 * 280 * @param nameOrValue 281 * the parameter name or value, which may be {@code null}. 282 * @return the form encoded parameter name or value, or {@code null} if {@code nameOrValue} was {@code null}. 283 */ 284 public static String formEncodeParameterNameOrValue(String nameOrValue) { 285 try { 286 return nameOrValue != null ? URLEncoder.encode(nameOrValue, "UTF-8") : null; 287 } catch (UnsupportedEncodingException e) { 288 return nameOrValue; 289 } 290 } 291 292 /** 293 * Decodes the provided URL encoded path element as per RFC 3986. 294 * 295 * @param pathElement 296 * the URL encoded path element, which may be {@code null}. 297 * @return the decoded path element, or {@code null} if {@code pathElement} was {@code null}. 298 */ 299 public static String urlDecodePathElement(String pathElement) { 300 return urlDecode(pathElement, false); 301 } 302 303 /** 304 * URL encodes the provided path element as per RFC 3986. 305 * 306 * @param pathElement 307 * the path element, which may be {@code null}. 308 * @return the URL encoded path element, or {@code null} if {@code pathElement} was {@code null}. 309 */ 310 public static String urlEncodePathElement(String pathElement) { 311 return urlEncode(pathElement, SAFE_URL_PCHAR_CHARS); 312 } 313 314 /** 315 * Decodes the provided URL encoded query parameter name or value as per RFC 3986. 316 * 317 * @param nameOrValue 318 * the URL encoded query parameter name or value, which may be {@code null}. 319 * @return the decoded query parameter name or value, or {@code null} if {@code nameOrValue} was {@code null}. 320 */ 321 public static String urlDecodeQueryParameterNameOrValue(String nameOrValue) { 322 return urlDecode(nameOrValue, true); 323 } 324 325 /** 326 * URL encodes the provided query parameter name or value as per RFC 3986. Note that this method does not 327 * adhere to the "query" production in RFC 3986, because it is intended for encoding query parameter names or 328 * values. Therefore, this method will encode '?' and '=' characters. 329 * 330 * @param nameOrValue 331 * the query parameter name or value, which may be {@code null}. 332 * @return the URL encoded query parameter name or value, or {@code null} if {@code nameOrValue} was {@code null}. 333 */ 334 public static String urlEncodeQueryParameterNameOrValue(String nameOrValue) { 335 return urlEncode(nameOrValue, SAFE_URL_QUERY_CHARS); 336 } 337 338 /** 339 * Decodes the provided URL encoded fragment as per RFC 3986. 340 * 341 * @param fragment 342 * the URL encoded fragment, which may be {@code null}. 343 * @return the decoded fragment, or {@code null} if {@code fragment} was {@code null}. 344 */ 345 public static String urlDecodeFragment(String fragment) { 346 return urlDecode(fragment, false); 347 } 348 349 /** 350 * URL encodes the provided fragment as per RFC 3986. 351 * 352 * @param fragment 353 * the fragment, which may be {@code null}. 354 * @return the URL encoded fragment, or {@code null} if {@code fragment} was {@code null}. 355 */ 356 public static String urlEncodeFragment(String fragment) { 357 return urlEncode(fragment, SAFE_URL_FRAGMENT_CHARS); 358 } 359 360 /** 361 * Decodes the provided URL encoded userInfo as per RFC 3986. 362 * 363 * @param userInfo 364 * the URL encoded userInfo, which may be {@code null}. 365 * @return the decoded userInfo, or {@code null} if {@code userInfo} was {@code null}. 366 */ 367 public static String urlDecodeUserInfo(String userInfo) { 368 return urlDecode(userInfo, false); 369 } 370 371 /** 372 * URL encodes the provided userInfo as per RFC 3986. 373 * 374 * @param userInfo 375 * the userInfo, which may be {@code null}. 376 * @return the URL encoded userInfo, or {@code null} if {@code userInfo} was {@code null}. 377 */ 378 public static String urlEncodeUserInfo(String userInfo) { 379 return urlEncode(userInfo, SAFE_URL_USERINFO_CHARS); 380 } 381 382 private static String urlDecode(final String s, final boolean decodePlusToSpace) { 383 if (s == null) { 384 return null; 385 } 386 // First try fast-path decode of simple ASCII. 387 final int size = s.length(); 388 for (int i = 0; i < size; i++) { 389 final char c = s.charAt(i); 390 if (isUrlEscapeChar(c) || (decodePlusToSpace && c == '+')) { 391 // Slow path. 392 return urlDecode0(s, decodePlusToSpace); 393 } 394 } 395 return s; 396 } 397 398 private static String urlDecode0(final String s, final boolean decodePlusToSpace) { 399 final StringBuilder builder = new StringBuilder(s.length()); 400 final int size = s.length(); 401 final byte[] buffer = new byte[size / 3]; 402 for (int i = 0; i < size;) { 403 final char c = s.charAt(i); 404 if (decodePlusToSpace && c == '+') { 405 builder.append(' '); 406 i++; 407 } else if (!isUrlEscapeChar(c)) { 408 builder.append(c); 409 i++; 410 } else { 411 int bufferPos = 0; 412 for (; i < size && isUrlEscapeChar(s.charAt(i)); i += 3) { 413 if ((i + 2) >= size) { 414 throw new IllegalArgumentException( 415 "Path contains an incomplete percent encoding"); 416 } 417 final String hexPair = s.substring(i + 1, i + 3); 418 try { 419 final int octet = Integer.parseInt(hexPair, 16); 420 if (octet < 0) { 421 throw new IllegalArgumentException( 422 "Path contains an invalid percent encoding '" + hexPair + "'"); 423 } 424 buffer[bufferPos++] = (byte) octet; 425 } catch (NumberFormatException e) { 426 throw new IllegalArgumentException( 427 "Path contains an invalid percent encoding '" + hexPair + "'"); 428 } 429 } 430 builder.append(new String(buffer, 0, bufferPos, StandardCharsets.UTF_8)); 431 } 432 } 433 return builder.toString(); 434 } 435 436 private static String urlEncode(final String s, final BitSet safeChars) { 437 if (s == null) { 438 return null; 439 } 440 // First try fast-path encode of simple ASCII. 441 final int size = s.length(); 442 for (int i = 0; i < size; i++) { 443 final int c = s.charAt(i); 444 if (!safeChars.get(c)) { 445 // Slow path. 446 return urlEncode0(s, safeChars); 447 } 448 } 449 return s; 450 } 451 452 private static String urlEncode0(String s, final BitSet safeChars) { 453 final byte[] utf8 = s.getBytes(StandardCharsets.UTF_8); 454 final int size = utf8.length; 455 final StringBuilder builder = new StringBuilder(size + 16); 456 for (final byte b : utf8) { 457 final int octet = b & 0xff; 458 if (safeChars.get(octet)) { 459 builder.append((char) octet); 460 } else { 461 builder.append(URL_ESCAPE_CHAR); 462 builder.append(BYTE_TO_HEX[octet]); 463 } 464 } 465 return builder.toString(); 466 } 467 468 private static boolean isUrlEscapeChar(final char c) { 469 return c == URL_ESCAPE_CHAR; 470 } 471}