001/* 002 * The contents of this file are subject to the terms of the Common Development and 003 * Distribution License (the License). You may not use this file except in compliance with the 004 * License. 005 * 006 * You can obtain a copy of the License at legal/CDDLv1.0.txt. See the License for the 007 * specific language governing permission and limitations under the License. 008 * 009 * When distributing Covered Software, include this CDDL Header Notice in each file and include 010 * the License file at legal/CDDLv1.0.txt. If applicable, add the following below the CDDL 011 * Header, with the fields enclosed by brackets [] replaced by your own identifying 012 * information: "Portions Copyright [year] [name of copyright owner]". 013 * 014 * Copyright 2010-2011 ApexIdentity Inc. 015 * Portions Copyright 2011-2015 ForgeRock AS. 016 */ 017 018package org.forgerock.openig.text; 019 020import java.io.IOException; 021import java.io.Reader; 022import java.util.ArrayList; 023import java.util.List; 024 025/** 026 * Reads records with delimiter-separated values from a character stream. 027 */ 028public class SeparatedValuesReader { 029 030 private static final int CR = '\r'; 031 private static final int LF = '\n'; 032 033 /** The character stream to read from. */ 034 private final Reader input; 035 036 /** The separator specification to parse the file with. */ 037 private final Separator separator; 038 039 /** The number of expected in the record; adjusted and used to set the ArrayList initial capacity. */ 040 private int fields = 1; 041 042 /** Read-ahead of next character (needed to check separator escapes). */ 043 private int next = -1; 044 045 /** Flag indicating that the parse state is currently within quotations. */ 046 private boolean quoted; 047 048 /** 049 * Constructs a new separated values reader, to read a character stream from the 050 * specified reader and use the specified separator specification. 051 * 052 * @param input the character stream to read from. 053 * @param separator the separator specification to parse the file with. 054 */ 055 public SeparatedValuesReader(Reader input, Separator separator) { 056 this.input = input; 057 this.separator = separator; 058 } 059 060 /** 061 * Reads the next record from the character input stream. 062 * 063 * @return a list of fields contained in the next record, or {@code null} if the end of stream has been reached. 064 * @throws IOException if an I/O exception occurs. 065 */ 066 public List<String> next() throws IOException { 067 ArrayList<String> list = new ArrayList<>(this.fields); 068 StringBuilder sb = new StringBuilder(); 069 int c; 070 boolean escaped = false; 071 while ((c = read()) != -1) { 072 if (escaped) { 073 sb.append((char) c); 074 escaped = false; 075 } else if (c == separator.getEscape()) { 076 escaped = true; 077 } else if (c == separator.getQuote() && sb.length() == 0) { 078 quoted = true; 079 } else if (c == separator.getQuote() && quoted) { 080 c = read(); 081 if (c == separator.getQuote()) { 082 sb.append((char) c); 083 } else { 084 next = c; 085 quoted = false; 086 } 087 } else if (c == separator.getCharacter() && !quoted) { 088 list.add(sb.toString()); 089 sb.setLength(0); 090 } else if (c == LF && !quoted) { 091 break; 092 } else { 093 sb.append((char) c); 094 } 095 } 096 if (list.size() > 0 || sb.length() > 0) { 097 list.add(sb.toString()); 098 } 099 if (list.size() == 0 && c == -1) { 100 // end of stream 101 return null; 102 } else { 103 // more efficient array allocation for next record 104 this.fields = Math.max(this.fields, list.size()); 105 return list; 106 } 107 } 108 109 /** 110 * Closes the reader and releases any system resources associated with it. Once the 111 * reader has been closed, further {@code next()} invocations will throw an 112 * {@code IOException}. Closing a previously closed reader has no effect. 113 */ 114 public void close() { 115 try { 116 input.close(); 117 } catch (IOException ioe) { 118 // exceptions closing the reader are not reported 119 } 120 } 121 122 private int read() throws IOException { 123 int c; 124 if (next != -1) { 125 c = next; 126 next = -1; 127 } else { 128 c = input.read(); 129 } 130 if (c == CR && !quoted) { 131 int n = input.read(); 132 if (n == LF) { 133 // translate unquoted CR+LF into LF 134 c = LF; 135 } else { 136 // CR not followed by LF; remember read value and return CR 137 next = n; 138 } 139 } 140 return c; 141 } 142}