001/* 002 * The contents of this file are subject to the terms of the Common Development and 003 * Distribution License (the License). You may not use this file except in compliance with the 004 * License. 005 * 006 * You can obtain a copy of the License at legal/CDDLv1.0.txt. See the License for the 007 * specific language governing permission and limitations under the License. 008 * 009 * When distributing Covered Software, include this CDDL Header Notice in each file and include 010 * the License file at legal/CDDLv1.0.txt. If applicable, add the following below the CDDL 011 * Header, with the fields enclosed by brackets [] replaced by your own identifying 012 * information: "Portions Copyright [year] [name of copyright owner]". 013 * 014 * Copyright 2013-2014 Manuel Gaupp 015 */ 016package org.forgerock.opendj.ldap; 017 018import java.math.BigInteger; 019import java.util.regex.Matcher; 020import java.util.regex.Pattern; 021 022import org.forgerock.util.Reject; 023import org.forgerock.i18n.LocalizableMessage; 024 025import static com.forgerock.opendj.ldap.CoreMessages.WARN_GSER_NO_VALID_IDENTIFIEDCHOICE; 026import static com.forgerock.opendj.ldap.CoreMessages.WARN_GSER_NO_VALID_IDENTIFIER; 027import static com.forgerock.opendj.ldap.CoreMessages.WARN_GSER_NO_VALID_INTEGER; 028import static com.forgerock.opendj.ldap.CoreMessages.WARN_GSER_NO_VALID_SEPARATOR; 029import static com.forgerock.opendj.ldap.CoreMessages.WARN_GSER_NO_VALID_STRING; 030import static com.forgerock.opendj.ldap.CoreMessages.WARN_GSER_PATTERN_NO_MATCH; 031import static com.forgerock.opendj.ldap.CoreMessages.WARN_GSER_SPACE_CHAR_EXPECTED; 032 033/** 034 * This class implements a parser for strings which are encoded using the 035 * Generic String Encoding Rules (GSER) defined in RFC 3641. 036 * 037 * @see <a href="http://tools.ietf.org/html/rfc3641">RFC 3641 - Generic String 038 * Encoding Rules (GSER) for ASN.1 Types</a> 039 */ 040public final class GSERParser { 041 042 private final String gserValue; 043 044 private int pos; 045 046 private final int length; 047 048 /** 049 * Pattern to match an identifier defined in RFC 3641, section 3.4. 050 * <pre> 051 * An <identifier> conforms to the definition of an identifier in ASN.1 052 * notation (Clause 11.3 of X.680 [8]). It begins with a lowercase 053 * letter and is followed by zero or more letters, digits, and hyphens. 054 * A hyphen is not permitted to be the last character, nor is it to be 055 * followed by another hyphen. The case of letters in an identifier is 056 * always significant. 057 * 058 * identifier = lowercase *alphanumeric *(hyphen 1*alphanumeric) 059 * alphanumeric = uppercase / lowercase / decimal-digit 060 * uppercase = %x41-5A ; "A" to "Z" 061 * lowercase = %x61-7A ; "a" to "z" 062 * decimal-digit = %x30-39 ; "0" to "9" 063 * hyphen = "-" 064 * </pre> 065 */ 066 private static final Pattern GSER_IDENTIFIER = Pattern.compile("^([a-z]([A-Za-z0-9]|(-[A-Za-z0-9]))*)"); 067 068 /** 069 * Pattern to match the identifier part (including the colon) of an 070 * IdentifiedChoiceValue defined in RFC 3641, section 3.12. 071 * <pre> 072 * IdentifiedChoiceValue = identifier ":" Value 073 * </pre> 074 */ 075 private static final Pattern GSER_CHOICE_IDENTIFIER = Pattern.compile("^([a-z]([A-Za-z0-9]|(-[A-Za-z0-9]))*:)"); 076 077 /** 078 * Pattern to match "sp", containing zero, one or more space characters. 079 * <pre> 080 * sp = *%x20 ; zero, one or more space characters 081 * </pre> 082 */ 083 private static final Pattern GSER_SP = Pattern.compile("^( *)"); 084 085 /** 086 * Pattern to match "msp", containing at least one space character. 087 * <pre> 088 * msp = 1*%x20 ; one or more space characters 089 * </pre> 090 */ 091 private static final Pattern GSER_MSP = Pattern.compile("^( +)"); 092 093 /** 094 * Pattern to match an Integer value. 095 */ 096 private static final Pattern GSER_INTEGER = Pattern.compile("^(\\d+)"); 097 098 /** 099 * Pattern to match a GSER StringValue, defined in RFC 3641, section 3.2: 100 * <pre> 101 * Any embedded double quotes in the resulting UTF-8 character string 102 * are escaped by repeating the double quote characters. 103 * 104 * [...] 105 * 106 * StringValue = dquote *SafeUTF8Character dquote 107 * dquote = %x22 ; " (double quote) 108 * </pre> 109 */ 110 private static final Pattern GSER_STRING = Pattern.compile("^(\"([^\"]|(\"\"))*\")"); 111 112 /** 113 * Pattern to match the beginning of a GSER encoded Sequence. 114 * <pre> 115 * SequenceValue = ComponentList 116 * ComponentList = "{" [ sp NamedValue *( "," sp NamedValue) ] sp "}" 117 * </pre> 118 */ 119 private static final Pattern GSER_SEQUENCE_START = Pattern.compile("^(\\{)"); 120 121 /** 122 * Pattern to match the end of a GSER encoded Sequence. 123 * <pre> 124 * SequenceValue = ComponentList 125 * ComponentList = "{" [ sp NamedValue *( "," sp NamedValue) ] sp "}" 126 * </pre> 127 */ 128 private static final Pattern GSER_SEQUENCE_END = Pattern.compile("^(\\})"); 129 130 /** 131 * Pattern to match the separator used in GSER encoded sequences. 132 */ 133 private static final Pattern GSER_SEP = Pattern.compile("^(,)"); 134 135 /** 136 * Creates a new GSER Parser. 137 * 138 * @param value the GSER encoded String value 139 */ 140 public GSERParser(CharSequence value) { 141 Reject.checkNotNull(value); 142 this.gserValue = value.toString(); 143 this.pos = 0; 144 this.length = value.length(); 145 } 146 147 /** 148 * Determines if the GSER String contains at least one character to be read. 149 * 150 * @return <code>true</code> if there is at least one remaining character or 151 * <code>false</code> otherwise. 152 */ 153 public boolean hasNext() { 154 return pos < length; 155 } 156 157 /** 158 * Determines if the remaining GSER String matches the provided pattern. 159 * 160 * @param pattern the pattern to search for 161 * 162 * @return <code>true</code> if the remaining string matches the pattern or 163 * <code>false</code> otherwise. 164 */ 165 private boolean hasNext(Pattern pattern) { 166 if (!hasNext()) { 167 return false; 168 } 169 170 Matcher matcher = pattern.matcher(gserValue.substring(pos, length)); 171 172 return matcher.find(); 173 } 174 175 /** 176 * Returns the String matched by the first capturing group of the pattern. 177 * The parser advances past the input matched by the first capturing group. 178 * 179 * @param pattern the pattern to search for 180 * 181 * @return the String matched by the first capturing group of the pattern 182 * 183 * @throws DecodeException If no match could be found 184 */ 185 private String next(Pattern pattern) throws DecodeException { 186 Matcher matcher = pattern.matcher(gserValue.substring(pos, length)); 187 if (matcher.find() && matcher.groupCount() >= 1) { 188 pos += matcher.end(1); 189 return matcher.group(1); 190 } else { 191 final LocalizableMessage msg = 192 WARN_GSER_PATTERN_NO_MATCH.get(pattern.pattern(), 193 gserValue.substring(pos, length)); 194 throw DecodeException.error(msg); 195 } 196 } 197 198 /** 199 * Skips the input matched by the first capturing group. 200 * 201 * @param pattern the pattern to search for 202 * 203 * @throws DecodeException If no match could be found 204 */ 205 private void skip(Pattern pattern) throws DecodeException { 206 Matcher matcher = pattern.matcher(gserValue.substring(pos, length)); 207 208 if (matcher.find() && matcher.groupCount() >= 1) { 209 pos += matcher.end(1); 210 } else { 211 final LocalizableMessage msg = 212 WARN_GSER_PATTERN_NO_MATCH.get(pattern.pattern(), 213 gserValue.substring(pos, length)); 214 throw DecodeException.error(msg); 215 } 216 } 217 218 /** 219 * Skips the input matching zero, one or more space characters. 220 * 221 * @return reference to this GSERParser 222 * 223 * @throws DecodeException If no match could be found 224 */ 225 public GSERParser skipSP() throws DecodeException { 226 skip(GSER_SP); 227 return this; 228 } 229 230 /** 231 * Skips the input matching one or more space characters. 232 * 233 * @return reference to this GSERParser 234 * 235 * @throws DecodeException If no match could be found 236 */ 237 public GSERParser skipMSP() throws DecodeException { 238 skip(GSER_MSP); 239 return this; 240 } 241 242 /** 243 * Skips the input matching the start of a sequence and subsequent space 244 * characters. 245 * 246 * @return reference to this GSERParser 247 * 248 * @throws DecodeException If the input does not match the start of a 249 * sequence 250 */ 251 public GSERParser readStartSequence() throws DecodeException { 252 next(GSER_SEQUENCE_START); 253 skip(GSER_SP); 254 return this; 255 } 256 257 /** 258 * Skips the input matching the end of a sequence and preceding space 259 * characters. 260 * 261 * @return reference to this GSERParser 262 * 263 * @throws DecodeException If the input does not match the end of a sequence 264 */ 265 public GSERParser readEndSequence() throws DecodeException { 266 skip(GSER_SP); 267 next(GSER_SEQUENCE_END); 268 return this; 269 } 270 271 /** 272 * Skips the input matching the separator pattern (",") and subsequenct 273 * space characters. 274 * 275 * @return reference to this GSERParser 276 * 277 * @throws DecodeException If the input does not match the separator 278 * pattern. 279 */ 280 public GSERParser skipSeparator() throws DecodeException { 281 if (!hasNext(GSER_SEP)) { 282 final LocalizableMessage msg = 283 WARN_GSER_NO_VALID_SEPARATOR.get(gserValue.substring(pos, length)); 284 throw DecodeException.error(msg); 285 } 286 skip(GSER_SEP); 287 skip(GSER_SP); 288 return this; 289 } 290 291 /** 292 * Returns the next element as a String. 293 * 294 * @return the input matching the String pattern 295 * 296 * @throws DecodeException If the input does not match the string pattern. 297 */ 298 public String nextString() throws DecodeException { 299 if (!hasNext(GSER_STRING)) { 300 final LocalizableMessage msg = 301 WARN_GSER_NO_VALID_STRING.get(gserValue.substring(pos, length)); 302 throw DecodeException.error(msg); 303 } 304 305 String str = next(GSER_STRING); 306 307 // Strip leading and trailing dquotes; unescape double dquotes 308 return str.substring(1, str.length() - 1).replace("\"\"", "\""); 309 } 310 311 /** 312 * Returns the next element as an Integer. 313 * 314 * @return the input matching the integer pattern 315 * 316 * @throws DecodeException If the input does not match the integer pattern 317 */ 318 public int nextInteger() throws DecodeException { 319 if (!hasNext(GSER_INTEGER)) { 320 final LocalizableMessage msg = 321 WARN_GSER_NO_VALID_INTEGER.get(gserValue.substring(pos, length)); 322 throw DecodeException.error(msg); 323 } 324 return Integer.valueOf(next(GSER_INTEGER)).intValue(); 325 } 326 327 /** 328 * Returns the next element as a BigInteger. 329 * 330 * @return the input matching the integer pattern 331 * 332 * @throws DecodeException If the input does not match the integer pattern 333 */ 334 public BigInteger nextBigInteger() throws DecodeException { 335 if (!hasNext(GSER_INTEGER)) { 336 final LocalizableMessage msg = 337 WARN_GSER_NO_VALID_INTEGER.get(gserValue.substring(pos, length)); 338 throw DecodeException.error(msg); 339 } 340 return new BigInteger(next(GSER_INTEGER)); 341 } 342 343 /** 344 * Returns the identifier of the next NamedValue element. 345 * 346 * @return the identifier of the NamedValue element 347 * 348 * @throws DecodeException If the input does not match the identifier 349 * pattern of a NamedValue 350 */ 351 public String nextNamedValueIdentifier() throws DecodeException { 352 if (!hasNext(GSER_IDENTIFIER)) { 353 final LocalizableMessage msg = 354 WARN_GSER_NO_VALID_IDENTIFIER.get(gserValue.substring(pos, length)); 355 throw DecodeException.error(msg); 356 } 357 String identifier = next(GSER_IDENTIFIER); 358 if (!hasNext(GSER_MSP)) { 359 final LocalizableMessage msg = 360 WARN_GSER_SPACE_CHAR_EXPECTED.get(gserValue.substring(pos, length)); 361 throw DecodeException.error(msg); 362 } 363 skipMSP(); 364 return identifier; 365 } 366 367 /** 368 * Return the identifier of the next IdentifiedChoiceValue element. 369 * 370 * @return the identifier of the IdentifiedChoiceValue element 371 * 372 * @throws DecodeException If the input does not match the identifier 373 * pattern of an IdentifiedChoiceValue 374 */ 375 public String nextChoiceValueIdentifier() throws DecodeException { 376 if (!hasNext(GSER_CHOICE_IDENTIFIER)) { 377 final LocalizableMessage msg = 378 WARN_GSER_NO_VALID_IDENTIFIEDCHOICE.get(gserValue.substring(pos, length)); 379 throw DecodeException.error(msg); 380 } 381 String identifier = next(GSER_CHOICE_IDENTIFIER); 382 383 // Remove the colon at the end of the identifier 384 return identifier.substring(0, identifier.length() - 1); 385 } 386 387 /** 388 * Returns the GSER encoded String value. 389 * 390 * @return The GSER encoded String value. 391 */ 392 @Override 393 public String toString() { 394 return gserValue; 395 } 396}