View Javadoc

1   /*
2    * Copyright 2004-2006 the Seasar Foundation and the Others.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
13   * either express or implied. See the License for the specific language
14   * governing permissions and limitations under the License.
15   */
16  package com.isenshi.util;
17  
18  import java.io.BufferedReader;
19  import java.io.IOException;
20  import java.io.InputStream;
21  import java.io.InputStreamReader;
22  import java.io.Reader;
23  import java.io.UnsupportedEncodingException;
24  import java.util.ArrayList;
25  import java.util.Iterator;
26  import java.util.List;
27  
28  import org.seasar.tuigwaa.system.Constants;
29  
30  
31  /***
32   * @author someda
33   */
34  public class CSVParser implements Iterator{
35  	
36  	private BufferedReader br;
37  	private String currentLine;
38  	
39  	private static final char QUOTEMARK = '\"';
40  	private static final char DELIMITER = ',';	
41  	
42  	private static final String DEFAULT_ENCODING = "Windows-31J";
43  	
44  	public CSVParser(InputStream is){
45  		try{
46  			br = new BufferedReader(new InputStreamReader(is,DEFAULT_ENCODING));
47  		}catch(UnsupportedEncodingException uee){// should not occur			
48  		}
49  	}	
50  	
51  	public CSVParser(Reader reader){
52  		br = new BufferedReader(reader);
53  	}
54  
55  	// ----- [Start] interface methods -----
56  	public void remove() {
57  		// do nothing		
58  	}
59  
60  	public boolean hasNext() {
61  		return getNextLine() != null;
62  	}
63  
64  	public Object next() {						
65  		return parse(currentLine);
66  	}
67  	// ----- [End] interface methods -----
68  	
69  	private String getNextLine(){		
70  		try{
71  			currentLine = br.readLine();
72  		}catch(IOException ioe){
73  			currentLine = null;
74  		}		
75  		return currentLine;		
76  	}
77  	
78  	private String[] parse(String line){
79  		if(line == null) return null;
80  		
81  		List wordList = new ArrayList();
82  		StringBuffer buf = new StringBuffer();
83  		boolean inword = false;
84  		// for the column contains QUOTEMARKs only
85  		// in that case, quotemark appended withouth changing inword state flag 
86  		boolean appendCharacter = false; 
87  				
88  		do {
89  			if (buf.length() > 0) {
90  				buf.append(Constants.LINEBREAK_CODE);
91  				line = getNextLine();
92  				if (line == null)
93  					break;
94  			}
95  			
96  			int size = line.length();
97  			
98  			for (int i = 0; i < size; i++) {
99  				char c = line.charAt(i);
100 				if (c == QUOTEMARK) {
101 					int num = lookahead(i,line);
102 					i = i + num;
103 					if(num >= 0 && num % 2 == 0){ // odd number of QUOTEMARKs, looked as quotation mark
104 						inword = !inword;
105 					}
106 					for(int j=0; j<(num+1)/2 ; j++){
107 						buf.append(QUOTEMARK);
108 					}
109 				} else if (c == DELIMITER && !inword) {
110 					wordList.add(getAppendString(appendCharacter,buf.toString()));
111 					appendCharacter = false;
112 					buf = new StringBuffer();
113 				} else {
114 					buf.append(c);
115 					appendCharacter = true;
116 				}
117 			}
118 			
119 		} while (inword);
120 		wordList.add(getAppendString(appendCharacter,buf.toString())); // last word
121 			
122 		return (String[]) wordList.toArray(new String[wordList.size()]);
123 	}
124 	
125 	/***
126 	 * shows how many QUOTEMARK exists, from current i position,
127 	 */
128 	private static int lookahead(int i, String line){
129 		
130 		int nextidx = i + 1;
131 		
132 		if(nextidx == line.length()){
133 			return 0;
134 		}else if(line.charAt(nextidx) == QUOTEMARK){
135 			return 1+ lookahead(nextidx,line);
136 		}else{
137 			return 0;
138 		}
139 	}
140 	
141 	private String getAppendString(boolean appendCharacter, String str){
142 		String ret = str;
143 		
144 		if(!appendCharacter){
145 			if(str.length() <= 1){
146 				ret = "";
147 			}else{
148 				ret = str.substring(1);
149 			}
150 		}
151 		return ret;		
152 	}
153 	
154 }