import java.util.*;
import com.darwinsys.util.Debug;
/** Parse comma-separated values (CSV), a common Windows file format.
* Sample input: "LU",86.25,"11/4/1998","2:19PM",+4.0625
* <p>
* Inner logic adapted from a C++ original that was
* Copyright (C) 1999 Lucent Technologies
* Excerpted from 'The Practice of Programming'
* by Brian W. Kernighan and Rob Pike.
* <p>
* Included by permission of the http://tpop.awl.com/ web site,
* which says:
* "You may use this code for any purpose, as long as you leave
* the copyright notice and book citation attached." I have done so.
* @author Brian W. Kernighan and Rob Pike (C++ original)
* @author Ian F. Darwin (translation into Java and removal of I/O)
* @author Ben Ballard (rewrote advQuoted to handle '""' and for readability)
*/
public class CSV {
public static final char DEFAULT_SEP = ',';
/** Construct a CSV parser, with the default separator (','). */
public CSV( ) {
this(DEFAULT_SEP);
}
/** Construct a CSV parser with a given separator.
* @param sep The single char for the separator (not a list of
* separator characters)
*/
public CSV(char sep) {
fieldSep = sep;
}
/** The fields in the current String */
protected List list = new ArrayList( );
/** the separator char for this parser */
protected char fieldSep;
/** parse: break the input String into fields
* @return java.util.Iterator containing each field
* from the original as a String, in order.
*/
public List parse(String line)
{
StringBuffer sb = new StringBuffer( );
list.clear( ); // recycle to initial state
int i = 0;
if (line.length( ) == 0) {
list.add(line);
return list;
}
do {
sb.setLength(0);
if (i < line.length( ) && line.charAt(i) == '"')
i = advQuoted(line, sb, ++i); // skip quote
else
i = advPlain(line, sb, i);
list.add(sb.toString( ));
Debug.println("csv", sb.toString( ));
i++;
} while (i < line.length( ));
return list;
}
/** advQuoted: quoted field; return index of next separator */
protected int advQuoted(String s, StringBuffer sb, int i)
{
int j;
int len= s.length( );
for (j=i; j<len; j++) {
if (s.charAt(j) == '"' && j+1 < len) {
if (s.charAt(j+1) == '"') {
j++; // skip escape char
} else if (s.charAt(j+1) == fieldSep) { //next delimiter
j++; // skip end quotes
break;
}
} else if (s.charAt(j) == '"' && j+1 == len) { // end quotes at end of line
break; //done
}
sb.append(s.charAt(j)); // regular character.
}
return j;
}
/** advPlain: unquoted field; return index of next separator */
protected int advPlain(String s, StringBuffer sb, int i)
{
int j;
j = s.indexOf(fieldSep, i); // look for separator
Debug.println("csv", "i = " + i + ", j = " + j);
if (j == -1) { // none found
sb.append(s.substring(i));
return s.length( );
} else {
sb.append(s.substring(i, j));
return j;
}
}
}
But what about the CSV class itself? The code in Example 3-10 started as a transla- tion of a CSV program written in C++ by Brian W. Kernighan and Rob Pike that appeared in their book The Practice of Programming (Addison Wesley). Their ver- sion commingled the input processing with the parsing; my CSV class does only the parsing since the input could be coming from any of a variety of sources. And it has been substantially rewritten over time. The main work is done in parse( ), which del- egates handling of individual fields to advquoted( ) in cases where the field begins with a quote; otherwise, to advplain( ).
Be the first to comment
You can use [html][/html], [css][/css], [php][/php] and more to embed the code. Urls are automatically hyperlinked. Line breaks and paragraphs are automatically generated.