001/* 002 * The contents of this file are subject to the terms of the Common Development and 003 * Distribution License (the License). You may not use this file except in compliance with the 004 * License. 005 * 006 * You can obtain a copy of the License at legal/CDDLv1.0.txt. See the License for the 007 * specific language governing permission and limitations under the License. 008 * 009 * When distributing Covered Software, include this CDDL Header Notice in each file and include 010 * the License file at legal/CDDLv1.0.txt. If applicable, add the following below the CDDL 011 * Header, with the fields enclosed by brackets [] replaced by your own identifying 012 * information: "Portions Copyright [year] [name of copyright owner]". 013 * 014 * Copyright 2010-2011 ApexIdentity Inc. 015 * Portions Copyright 2011-2014 ForgeRock AS. 016 */ 017 018package org.forgerock.openig.regex; 019 020import java.util.Collection; 021import java.util.NoSuchElementException; 022import java.util.regex.Matcher; 023import java.util.regex.Pattern; 024 025/** 026 * Iterates through multiple regular expression matches within a character sequence. 027 */ 028public class StringPatternMatches { 029 030 /** The patterns to match. */ 031 private final Pattern[] patterns; 032 033 /** Matches found, with {@literal null} elements indicating no match for pattern. */ 034 private final Matcher[] matchers; 035 036 /** The character sequence to search. */ 037 private final CharSequence input; 038 039 /** Should patterns be discarded after they yield a match. */ 040 private boolean discard; 041 042 /** 043 * Constructs a new string pattern match iterator. If {@code discard} is {@code true}, 044 * then a pattern is discarded after it is first matched. 045 * 046 * @param input the character sequence to match regular expression patterns against. 047 * @param patterns a collection of regular expression patterns to match. 048 * @param discard indicates patterns be discarded after they yield a match. 049 */ 050 public StringPatternMatches(final CharSequence input, final Collection<Pattern> patterns, final boolean discard) { 051 this.input = input; 052 this.patterns = patterns.toArray(new Pattern[patterns.size()]); 053 this.matchers = new Matcher[this.patterns.length]; 054 for (int n = 0; n < this.patterns.length; n++) { 055 // null pattern means already discarded; simply ignore 056 if (this.patterns[n] != null) { 057 Matcher matcher = this.patterns[n].matcher(input); 058 // matchers without any matches are not used 059 if (matcher.find()) { 060 matchers[n] = matcher; 061 } 062 } 063 } 064 this.discard = discard; 065 } 066 067 /** 068 * Returns the next match from the character sequence. Matches are returned in the order 069 * they are encountered in the character sequence, then by the order they are expressed in 070 * the supplied patterns collection. 071 * 072 * @return the next {@link java.util.regex.Matcher} from the character sequence. 073 * @throws NoSuchElementException if the reader has no more matches. 074 */ 075 public Matcher next() { 076 // index of matcher with smallest start index 077 int matcherIndex = -1; 078 // smallest start index encountered 079 int charIndex = Integer.MAX_VALUE; 080 // find first matcher with smallest start index 081 for (int n = 0; n < matchers.length; n++) { 082 if (matchers[n] != null) { 083 int start = matchers[n].start(); 084 if (start < charIndex) { 085 charIndex = start; 086 matcherIndex = n; 087 } 088 } 089 } 090 if (matcherIndex == -1) { 091 // no active matchers found 092 throw new NoSuchElementException(); 093 } 094 // save match to return 095 Matcher next = matchers[matcherIndex]; 096 // reset matcher and set for next match (if applicable) 097 matchers[matcherIndex] = (discard ? null : patterns[matcherIndex].matcher(input)); 098 if (matchers[matcherIndex] != null 099 && (charIndex == input.length() - 1 || !matchers[matcherIndex].find(charIndex + 1))) { 100 // matchers without any matches are not relevant 101 matchers[matcherIndex] = null; 102 } 103 return next; 104 } 105 106 /** 107 * Returns {@code true} if the character sequence has more matches. 108 * @return {@code true} if the character sequence has more matches. 109 */ 110 public boolean hasNext() { 111 for (Matcher matcher : matchers) { 112 if (matcher != null) { 113 // any existing matcher means another match exists 114 return true; 115 } 116 } 117 return false; 118 } 119}