001/*
002 * The contents of this file are subject to the terms of the Common Development and
003 * Distribution License (the License). You may not use this file except in compliance with the
004 * License.
005 *
006 * You can obtain a copy of the License at legal/CDDLv1.0.txt. See the License for the
007 * specific language governing permission and limitations under the License.
008 *
009 * When distributing Covered Software, include this CDDL Header Notice in each file and include
010 * the License file at legal/CDDLv1.0.txt. If applicable, add the following below the CDDL
011 * Header, with the fields enclosed by brackets [] replaced by your own identifying
012 * information: "Portions Copyright [year] [name of copyright owner]".
013 *
014 * Copyright 2010-2011 ApexIdentity Inc.
015 * Portions Copyright 2011-2014 ForgeRock AS.
016 */
017
018package org.forgerock.openig.regex;
019
020import java.util.Collection;
021import java.util.NoSuchElementException;
022import java.util.regex.Matcher;
023import java.util.regex.Pattern;
024
025/**
026 * Iterates through multiple regular expression matches within a character sequence.
027 */
028public class StringPatternMatches {
029
030    /** The patterns to match. */
031    private final Pattern[] patterns;
032
033    /** Matches found, with {@literal null} elements indicating no match for pattern. */
034    private final Matcher[] matchers;
035
036    /** The character sequence to search. */
037    private final CharSequence input;
038
039    /** Should patterns be discarded after they yield a match. */
040    private boolean discard;
041
042    /**
043     * Constructs a new string pattern match iterator. If {@code discard} is {@code true},
044     * then a pattern is discarded after it is first matched.
045     *
046     * @param input the character sequence to match regular expression patterns against.
047     * @param patterns a collection of regular expression patterns to match.
048     * @param discard indicates patterns be discarded after they yield a match.
049     */
050    public StringPatternMatches(final CharSequence input, final Collection<Pattern> patterns, final boolean discard) {
051        this.input = input;
052        this.patterns = patterns.toArray(new Pattern[patterns.size()]);
053        this.matchers = new Matcher[this.patterns.length];
054        for (int n = 0; n < this.patterns.length; n++) {
055            // null pattern means already discarded; simply ignore
056            if (this.patterns[n] != null) {
057                Matcher matcher = this.patterns[n].matcher(input);
058                // matchers without any matches are not used
059                if (matcher.find()) {
060                    matchers[n] = matcher;
061                }
062            }
063        }
064        this.discard = discard;
065    }
066
067    /**
068     * Returns the next match from the character sequence. Matches are returned in the order
069     * they are encountered in the character sequence, then by the order they are expressed in
070     * the supplied patterns collection.
071     *
072     * @return the next {@link java.util.regex.Matcher} from the character sequence.
073     * @throws NoSuchElementException if the reader has no more matches.
074     */
075    public Matcher next() {
076        // index of matcher with smallest start index
077        int matcherIndex = -1;
078        // smallest start index encountered
079        int charIndex = Integer.MAX_VALUE;
080        // find first matcher with smallest start index
081        for (int n = 0; n < matchers.length; n++) {
082            if (matchers[n] != null) {
083                int start = matchers[n].start();
084                if (start < charIndex) {
085                    charIndex = start;
086                    matcherIndex = n;
087                }
088            }
089        }
090        if (matcherIndex == -1) {
091            // no active matchers found
092            throw new NoSuchElementException();
093        }
094        // save match to return
095        Matcher next = matchers[matcherIndex];
096        // reset matcher and set for next match (if applicable)
097        matchers[matcherIndex] = (discard ? null : patterns[matcherIndex].matcher(input));
098        if (matchers[matcherIndex] != null
099                && (charIndex == input.length() - 1 || !matchers[matcherIndex].find(charIndex + 1))) {
100            // matchers without any matches are not relevant
101            matchers[matcherIndex] = null;
102        }
103        return next;
104    }
105
106    /**
107     * Returns {@code true} if the character sequence has more matches.
108     * @return {@code true} if the character sequence has more matches.
109     */
110    public boolean hasNext() {
111        for (Matcher matcher : matchers) {
112            if (matcher != null) {
113                // any existing matcher means another match exists
114                return true;
115            }
116        }
117        return false;
118    }
119}