001/*
002 * The contents of this file are subject to the terms of the Common Development and
003 * Distribution License (the License). You may not use this file except in compliance with the
004 * License.
005 *
006 * You can obtain a copy of the License at legal/CDDLv1.0.txt. See the License for the
007 * specific language governing permission and limitations under the License.
008 *
009 * When distributing Covered Software, include this CDDL Header Notice in each file and include
010 * the License file at legal/CDDLv1.0.txt. If applicable, add the following below the CDDL
011 * Header, with the fields enclosed by brackets [] replaced by your own identifying
012 * information: "Portions Copyright [year] [name of copyright owner]".
013 *
014 * Copyright 2010-2011 ApexIdentity Inc.
015 * Portions Copyright 2011-2015 ForgeRock AS.
016 */
017
018package org.forgerock.openig.text;
019
020import java.io.IOException;
021import java.io.Reader;
022import java.util.ArrayList;
023import java.util.List;
024
025/**
026 * Reads records with delimiter-separated values from a character stream.
027 */
028public class SeparatedValuesReader {
029
030    private static final int CR = '\r';
031    private static final int LF = '\n';
032
033    /** The character stream to read from. */
034    private final Reader input;
035
036    /** The separator specification to parse the file with. */
037    private final Separator separator;
038
039    /** The number of  expected in the record; adjusted and used to set the ArrayList initial capacity. */
040    private int fields = 1;
041
042    /** Read-ahead of next character (needed to check separator escapes). */
043    private int next = -1;
044
045    /** Flag indicating that the parse state is currently within quotations. */
046    private boolean quoted;
047
048    /**
049     * Constructs a new separated values reader, to read a character stream from the
050     * specified reader and use the specified separator specification.
051     *
052     * @param input the character stream to read from.
053     * @param separator the separator specification to parse the file with.
054     */
055    public SeparatedValuesReader(Reader input, Separator separator) {
056        this.input = input;
057        this.separator = separator;
058    }
059
060    /**
061     * Reads the next record from the character input stream.
062     *
063     * @return a list of fields contained in the next record, or {@code null} if the end of stream has been reached.
064     * @throws IOException if an I/O exception occurs.
065     */
066    public List<String> next() throws IOException {
067        ArrayList<String> list = new ArrayList<>(this.fields);
068        StringBuilder sb = new StringBuilder();
069        int c;
070        boolean escaped = false;
071        while ((c = read()) != -1) {
072            if (escaped) {
073                sb.append((char) c);
074                escaped = false;
075            } else if (c == separator.getEscape()) {
076                escaped = true;
077            } else if (c == separator.getQuote() && sb.length() == 0) {
078                quoted = true;
079            } else if (c == separator.getQuote() && quoted) {
080                c = read();
081                if (c == separator.getQuote()) {
082                    sb.append((char) c);
083                } else {
084                    next = c;
085                    quoted = false;
086                }
087            } else if (c == separator.getCharacter() && !quoted) {
088                list.add(sb.toString());
089                sb.setLength(0);
090            } else if (c == LF && !quoted) {
091                break;
092            } else {
093                sb.append((char) c);
094            }
095        }
096        if (list.size() > 0 || sb.length() > 0) {
097            list.add(sb.toString());
098        }
099        if (list.size() == 0 && c == -1) {
100            // end of stream
101            return null;
102        } else {
103            // more efficient array allocation for next record
104            this.fields = Math.max(this.fields, list.size());
105            return list;
106        }
107    }
108
109    /**
110     * Closes the reader and releases any system resources associated with it. Once the
111     * reader has been closed, further {@code next()} invocations will throw an
112     * {@code IOException}. Closing a previously closed reader has no effect.
113     */
114    public void close() {
115        try {
116            input.close();
117        } catch (IOException ioe) {
118            // exceptions closing the reader are not reported
119        }
120    }
121
122    private int read() throws IOException {
123        int c;
124        if (next != -1) {
125            c = next;
126            next = -1;
127        } else {
128            c = input.read();
129        }
130        if (c == CR && !quoted) {
131            int n = input.read();
132            if (n == LF) {
133                // translate unquoted CR+LF into LF
134                c = LF;
135            } else {
136                // CR not followed by LF; remember read value and return CR
137                next = n;
138            }
139        }
140        return c;
141    }
142}