通过org.apache.commons.lang.Tokenizer解析上传的txt文件

最近在看我们项目中的一个对上传的txt文件的解析,发现这个txt文件比较复杂,虽然字段之间都是用“|”分隔,但是,这个文件又包含了不同的长度的块,需要对每一块进行解析。这个可以使用apache的一些工具包common-lang-2.1.jar。 这个txt文件如下:

cardcode|cardname|appl_no|classcode|classname|appf|begdate|enddate|cardtype|risktype|feetype|feeamt|pieces|dcdm|timetype|num|saleattr|regdate|regname|prelmt|operno|utime|planno|headocno|dbvalue|prntype|dbvalue1|dbvalue2|prntno
R12034|*****a1||EA3611|test1|1|2013/06/01|2020/12/31|6|1|8|0.00|1.000000||4|0|010111|2013/06/01|CIPA0006|0|CIPA0005|2013060110:30:23|09|000000|1.00|08|1.00|1.00|09
R12034|****a2||AA4301|test2|2|2013/06/01|2020/12/31|6|2|8|0.00|1.000000||4|0|010111|2013/06/01|CIPA0006|0|CIPA0005|2013060110:30:23|09|000000|1.00|08|1.00|1.00|09
cardcode|saleattr
R12034|20
R12034|32
R12034|33
R12034|40
cardcode|classcode|pre|clmcode|typeno|planno|responame
R12034|EA3611|300000.000000|112|00|09|XXX
R12034|EA3611|200000.000000|112|00|09|YYY
R12034|EA3611|150000.000000|112|01|09|ZZZ
R12034|EA3611|200000.000000|112|02|09|AAA
cardcode|sex|begage|endage|minpieces|maxpieces|movedate|planno
R12034|0|18|70|0.000000|1.000000||09
cardcode|planno|issign|cardtype|activedays|isunion|unionrate|begday|endday|iscont|proc
R12034|09|1|6|0|0|0.000000|1|90|0|00100
comp
NAJ
cardcode|planno|specagr
cardcode|planno|classcode|feetype|preamt|risktype|feeamt
R12034|9|AA4301|9|7.00|2|13.0000
R12034|9|AA4301|9|10.00|2|18.0000
R12034|9|AA4301|9|14.00|2|25.0000
R12034|9|AA4301|9|17.00|2|29.0000
R12034|9|AA4301|9|21.00|2|35.0000
R12034|9|AA4301|9|24.00|2|39.0000
R12034|9|AA4301|9|28.00|2|45.0000
R12034|9|EA3611|9|7.00|2|54.0000
cardcode|planno|is_insure_tgtr|insrnc_code|rate
cardcode|classcode|code|dbvalue|chvalue|attr|type|planno
cardcode|planno|type|feetxt
cardcode|planno|classcode|maxpre
cardcode|planno|feeftup|feeftdown|count
public HashMap unmarshal(BufferedReader reader) throws Exception {
		reader = new BufferedReader(
					new InputStreamReader(new FileInputStream(new File(
							productFilePath))));
		String line = reader.readLine();
		HashMap productInfo = new HashMap();
		List saleAttributeList = new ArrayList();
		List insuranceLiabilityList = new ArrayList();
		List productInfoList = new ArrayList();
		List productpremiumsetList = new ArrayList();

		List productGradeList = new ArrayList();
		List unionlist = new ArrayList();

		int index = 0;
		int floatFlag = 0;

		String productcode = "";
		// String pieces=null;
		Map pieces = new HashMap();

		while (line != null && StringUtils.isNotBlank(line)) {
			Tokenizer tokenizer = Tokenizer.getCSVInstance(line);
			tokenizer
					.setDelimiterMatcher(new org.apache.commons.lang.Tokenizer.CharMatcher(
							'|'));
			String columns[] = tokenizer.getAllTokens();
			if (columns.length == 29) {
				if (columns[1].trim().equals("cardname")) {
					line = reader.readLine();
					continue;
				}
				ProductBaseInfo productbaseinfo = new ProductBaseInfo();
				if (columns[5].trim().equals("1"))
					productcode = columns[0].trim() + columns[3].trim()
							+ columns[22].trim() + "LF";
				productbaseinfo.setCardCode(columns[0].trim()
						+ columns[3].trim() + columns[22].trim() + "LF");
				productbaseinfo.setPlanNo(columns[22].trim());
				productbaseinfo.setCardName(columns[1].trim());
				productbaseinfo.setHeadOcno(columns[23].trim());
				productbaseinfo.setMessageSource("2"); // 默认值
				// agreementSpecMain.setExternalReference(columns[0].trim() +
				// "_" + columns[22].trim());
				productbaseinfo.setClassCode(columns[3].trim());
				productbaseinfo.setClassName(columns[4].trim());
				productbaseinfo.setAppf(columns[5].trim()); // 1 是 其他 否
				productbaseinfo.setBegDate(columns[6].trim());
				productbaseinfo
						.setEndDate(StringUtil.isNull(columns[7]) ? "2099/01/01"
								: columns[7].trim());
				......
				productInfoList.add(productbaseinfo);
				productInfo.put("productbaseinfo", productInfoList);
			}
			if (columns.length == 2) {
				if (columns[1].trim().equals("saleattr")) {
					line = reader.readLine();
					continue;
				}
				SaleAttribute saleattri = new SaleAttribute();
				saleattri.setCardcode(productcode);
				saleattri.setSaleAttr(columns[1].trim());
				saleAttributeList.add(saleattri);
				productInfo.put("saleattri", saleAttributeList);
			}
			if (columns.length == 4&&columns[2].length()==6) {
				if (columns[3].trim().equals("maxpre")) {
					line = reader.readLine();
					continue;
				}
				String prodcode=columns[0]+columns[2]+columns[1]+"LF";
				productInfo.put(prodcode, columns[3]);
			}
			//读取 cardcode|planno|feeftup|feeftdown|count
			//NJJG12|16|800.00| 20.00|1
			if (columns.length == 5) {
				if (columns[3].trim().equals("feeftdown")) {
					line = reader.readLine();
					floatFlag=1;
					continue;
				}
				if(productInfoList.size()>0 && floatFlag==1) {
					ProductBaseInfo productbaseinfo = (ProductBaseInfo) productInfoList.get(0);
					productbaseinfo.setUpFloatRate(columns[2]);
					productbaseinfo.setDownFloatRate(columns[3]);
					productbaseinfo.setFloatRateFlag("true");
				}
				
			}
			if (columns.length == 7 && columns[1].length() > 2
					&& !columns[1].equals("planno")) {
				if (columns[1].trim().equals("classcode")) {
					line = reader.readLine();
					continue;
				}
				InsuranceLiability insuranceliability = new InsuranceLiability();
				insuranceliability.setCardCode(columns[0].trim()
						+ columns[1].trim()
						+ this.FormatPlanno(columns[5].trim()) + "LF");
				insuranceliability.setClassCode(columns[1].trim());
				....
				insuranceliability
						.setRespoName(StringUtil.isNull(columns[6]) ? "XX金额"
								: columns[6].trim());
				insuranceliability.setTypeNo(columns[4].trim());
				insuranceLiabilityList.add(insuranceliability);
				productInfo.put("insuranceliability", insuranceLiabilityList);

			}
			if (columns.length == 8) {
				if (columns[1].trim().equals("sex")) {
					line = reader.readLine();
					index = 1;
					continue;
				} else if (columns[5].trim().equals("attr")) {
					line = reader.readLine();
					index = 2;
					continue;
				}

				if (index == 1) {
					ProductOther productother = new ProductOther();
					productother.setBegage(columns[2].trim());
					productother.setCardcode(productcode);
					....
					productInfo.put("productother", productother);
				} else if (index == 2) {
					TProductGrade productGrade = new TProductGrade();
					productGrade.setProductCode(columns[0].trim()
							+ columns[1].trim() + columns[7].trim() + "LF");
					productGrade.setCode(columns[2].trim());
					productGrade.setDbvalue(Double.valueOf(columns[3].trim()));
					productGrade.setChvalue(columns[4].trim());
					productGrade.setAttr(columns[5].trim());
					productGrade.setType(columns[6].trim());
					productGradeList.add(productGrade);
					productInfo.put("productGrade", productGradeList);
				}
			}
			if (columns.length == 11) {
				if (columns[1].trim().equals("planno")) {
					line = reader.readLine();
					continue;
				}
				ProductOther1 productother1 = new ProductOther1();
				productother1.setActiveDays(columns[4].trim());
				productother1.setBegDay(columns[7].trim());
				productother1.setCardCode(productcode);
				.......
				productInfo.put("productother1", productother1);
			}
			if (columns.length == 1) {
				if (columns[0].trim().equals("comp")) {
					line = reader.readLine();
					continue;
				}
				ProductComp productcomp = new ProductComp();
				productcomp.setCardcode(productcode);
				productcomp.setComp(columns[0].trim());
				productInfo.put("productcomp", productcomp);
			}
		
			line = reader.readLine();
		}
		return productInfo;

	}

引入apache的工具类Tokenizer.java

/*
 * Copyright 2003-2004 The Apache Software Foundation.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.commons.lang;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.ListIterator;

/**
 * Tokenizes a string based based on delimiters (separators)
 * and supporting quoting and ignored character concepts.
 * 

* This class can split a String into many smaller strings. * It aims to do a similar job to java util StringTokenizer, however it offers * much more control and flexibility. By default, it is setup like StringTokenizer. *

* The input String is split into a number of tokens. * Each token is separated from the next String by a delimiter. * One or more delimiter characters must be specified. *

* The processing then strips all the ignored characters from each side of the token. * The token may also have quotes to mark an area not to be stripped or tokenized. * Empty tokens may be removed or returned as null. * This example is based on the CSV tokenizer. *

 * "a,b,c"       - Three tokens "a","b","c"   (comma delimiter)
 * "a, b , c"    - Three tokens "a","b","c"   (ignored space characters stripped)
 * "a, " b ", c" - Three tokens "a"," b ","c" (quoted text untouched)
 * 
*

* * This tokenizer has the following properties and options: * *

* * * * * * * * * * * * * * * * * * *
PropertyBmfClassDefault
delimCharSetMatcher{ \t\n\r\f}
quoteNoneMatcher{}
ignoreNoneMatcher{}
emptyTokenAsNullbooleanfalse
ignoreEmptyTokensbooleantrue
* * @author Matthew Inger * @author Stephen Colebourne * @author Gary D. Gregory * @since 2.1 * @version $Id: Tokenizer.java,v 1.1 2006/07/30 06:21:10 wangxun Exp $ */ public class Tokenizer implements ListIterator, Cloneable { /** * A Matcher which matches the comma character. * Best used for delimiter. */ public static final Matcher COMMA_MATCHER = new CharMatcher(';'); /** * A Matcher which matches the tab character. * Best used for delimiter. */ public static final Matcher TAB_MATCHER = new CharMatcher('\t'); /** * A Matcher which matches the space character. * Best used for delimiter. */ public static final Matcher SPACE_MATCHER = new CharMatcher(' '); /** * A Matcher which matches the same characters as StringTokenizer, * namely space, tab, newline, formfeed. * Best used for delimiter. */ public static final Matcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f"); /** * A Matcher which matches the double quote character. * Best used for quote. */ public static final Matcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"'); /** * A Matcher which matches the String trim() whitespace characters. * Best used for ignored. */ public static final Matcher TRIM_MATCHER = new TrimMatcher(); /** * A Matcher that matches no characters. Don't use this for delimiters! * Best used for ignored. */ public static final Matcher NONE_MATCHER = new NoMatcher(); private static final Tokenizer CSV_TOKENIZER_PROTOTYPE; private static final Tokenizer TSV_TOKENIZER_PROTOTYPE; static { CSV_TOKENIZER_PROTOTYPE = new Tokenizer(StringUtils.EMPTY); CSV_TOKENIZER_PROTOTYPE.setDelimiterMatcher(COMMA_MATCHER); CSV_TOKENIZER_PROTOTYPE.setQuoteMatcher(DOUBLE_QUOTE_MATCHER); CSV_TOKENIZER_PROTOTYPE.setIgnoredMatcher(TRIM_MATCHER); CSV_TOKENIZER_PROTOTYPE.setEmptyTokenAsNull(false); CSV_TOKENIZER_PROTOTYPE.setIgnoreEmptyTokens(false); TSV_TOKENIZER_PROTOTYPE = new Tokenizer(StringUtils.EMPTY); TSV_TOKENIZER_PROTOTYPE.setDelimiterMatcher(TAB_MATCHER); TSV_TOKENIZER_PROTOTYPE.setQuoteMatcher(DOUBLE_QUOTE_MATCHER); TSV_TOKENIZER_PROTOTYPE.setIgnoredMatcher(TRIM_MATCHER); TSV_TOKENIZER_PROTOTYPE.setEmptyTokenAsNull(false); TSV_TOKENIZER_PROTOTYPE.setIgnoreEmptyTokens(false); } /** The text to work on */ private char chars[]; /** The input text, null if char[] input */ private String text; /** The parsed tokens */ private String tokens[]; /** The current iteration position */ private int tokenPos; /** The delimiter matcher */ private Matcher delim = SPLIT_MATCHER; /** The quote matcher */ private Matcher quote = NONE_MATCHER; /** The ignored matcher */ private Matcher ignored = NONE_MATCHER; /** Whether to return empty tokens as null */ private boolean emptyAsNull = false; /** Whether to ignore empty tokens */ private boolean ignoreEmptyTokens = true; //----------------------------------------------------------------------- /** * Get a tokenizer instance which parses Comma Seperated Value * strings. You must call a "reset" method to set the string which * you want to parse. */ public static final Tokenizer getCSVInstance() { return (Tokenizer)(CSV_TOKENIZER_PROTOTYPE.clone()); } /** * Get a tokenizer instance which parses Comma Seperated Value * strings, initializing it with the given input. * * @param input the string to parse */ public static final Tokenizer getCSVInstance(String input) { Tokenizer tok = (Tokenizer)(CSV_TOKENIZER_PROTOTYPE.clone()); tok.reset(input); return tok; } /** * Get a tokenizer instance which parses Comma Seperated Value * strings, initializing it with the given input. * * @param input the text to parse */ public static final Tokenizer getCSVInstance(char[] input) { Tokenizer tok = (Tokenizer)(CSV_TOKENIZER_PROTOTYPE.clone()); tok.reset(input); return tok; } /** * Get a tokenizer instance which parses Tab Seperated Value * strings. You must call a "reset" method to set the string which * you want to parse. */ public static final Tokenizer getTSVInstance() { return (Tokenizer)(TSV_TOKENIZER_PROTOTYPE.clone()); } /** * Get a tokenizer instance which parses Tab Seperated Value * strings, initializing it with the given input. * * @param input the string to parse */ public static final Tokenizer getTSVInstance(String input) { Tokenizer tok = (Tokenizer)(TSV_TOKENIZER_PROTOTYPE.clone()); tok.reset(input); return tok; } /** * Get a tokenizer instance which parses Tab Seperated Value * strings, initializing it with the given input. * * @param input the text to parse */ public static final Tokenizer getTSVInstance(char[] input) { Tokenizer tok = (Tokenizer)(TSV_TOKENIZER_PROTOTYPE.clone()); tok.reset(input); return tok; } //----------------------------------------------------------------------- /** * Constructs a tokenizer splitting on space, tab, newline and formfeed * as per StringTokenizer. * * @param input the string which is to be parsed */ public Tokenizer(String input) { super(); this.text = input; this.chars = input.toCharArray(); // no clone as toCharArray() clones } /** * Constructs a tokenizer splitting on space, tab, newline and formfeed * as per StringTokenizer. * * @param input the string which is to be parsed * @param delim the field delimiter character */ public Tokenizer(String input, char delim) { this(input); setDelimiterChar(delim); } /** * Constructs a tokenizer splitting on space, tab, newline and formfeed * as per StringTokenizer. * * @param input the string which is to be parsed * @param delim the field delimiter character */ public Tokenizer(String input, CharSetMatcher delim) { this(input); setDelimiterMatcher(delim); } /** * Constructs a tokenizer splitting on space, tab, newline and formfeed * as per StringTokenizer. * * @param input the string which is to be parsed * @param delim the field delimiter character * @param quote the field quoted string character */ public Tokenizer(String input, char delim, char quote) { this(input, delim); setQuoteChar(quote); } /** * Constructs a tokenizer splitting on space, tab, newline and formfeed * as per StringTokenizer. * * @param input the string which is to be parsed * @param delim the field delimiter character * @param quote the field quoted string character */ public Tokenizer(String input, CharSetMatcher delim, CharSetMatcher quote) { this(input, delim); setQuoteMatcher(quote); } /** * Constructs a tokenizer splitting on space, tab, newline and formfeed * as per StringTokenizer. * * @param input the string which is to be parsed, cloned */ public Tokenizer(char[] input) { super(); this.text = null; this.chars = (char[]) input.clone(); } /** * Constructs a tokenizer splitting on space, tab, newline and formfeed * as per StringTokenizer. * * @param input the string which is to be parsed, cloned * @param delim the field delimiter character */ public Tokenizer(char[] input, char delim) { this(input); setDelimiterChar(delim); } /** * Constructs a tokenizer splitting on space, tab, newline and formfeed * as per StringTokenizer. * * @param input the string which is to be parsed, cloned * @param delim the field delimiter character */ public Tokenizer(char[] input, CharSetMatcher delim) { this(input); setDelimiterMatcher(delim); } /** * Constructs a tokenizer splitting on space, tab, newline and formfeed * as per StringTokenizer. * * @param input the string which is to be parsed, cloned * @param delim the field delimiter character * @param quote the field quoted string character */ public Tokenizer(char[] input, char delim, char quote) { this(input, delim); setQuoteChar(quote); } /** * Constructs a tokenizer splitting on space, tab, newline and formfeed * as per StringTokenizer. * * @param input the string which is to be parsed, cloned * @param delim the field delimiter character * @param quote the field quoted string character */ public Tokenizer(char[] input, CharSetMatcher delim, CharSetMatcher quote) { this(input, delim); setQuoteMatcher(quote); } // API //----------------------------------------------------------------------- /** * Gets the number of tokens found in the String. * * @return the number of matched tokens */ public int size() { tokenize(); return tokens.length; } /** * Gets the next token from the String. * * @return the next sequential token, or null when no more tokens are found */ public String nextToken() { if (hasNext()) { return tokens[tokenPos++]; } else { return null; } } /** * Gets the previous token from the String. * * @return the previous sequential token, or null when no more tokens are found */ public String previousToken() { if (hasPrevious()) { return tokens[--tokenPos]; } else { return null; } } /** * Gets a copy of the full token list. * * @return the tokens as a String array */ public String[] getAllTokens() { tokenize(); return (String[]) tokens.clone(); } /** * Resets this tokenizer, forgetting all parsing and iteration already completed. *

* This method allows the same tokenizer to be reused for the same String. */ public void reset() { tokenPos = 0; tokens = null; } /** * Reset this tokenizer, giving it a new input string to parse. * In this manner you can re-use a tokenizer with the same settings * on multiple input lines. * * @param input the new string to tokenize */ public void reset(String input) { reset(); this.text = input; chars = input.toCharArray(); // no clone as toCharArray() clones } /** * Reset this tokenizer, giving it a new input string to parse. * In this manner you can re-use a tokenizer with the same settings * on multiple input lines. * * @param input the new character array to tokenize, cloned */ public void reset(char [] input) { reset(); this.text = null; chars = (char[]) input.clone(); } // ListIterator //----------------------------------------------------------------------- /** * Checks whether there are any more tokens. * * @return true if there are more tokens */ public boolean hasNext() { tokenize(); return (tokenPos < tokens.length); } /** * Gets the next token. This method is equivalent to {@link #nextToken()}. * * @return the next String token */ public Object next() { return nextToken(); } /** * Gets the index of the next token to return. * * @return the next token index */ public int nextIndex() { return tokenPos; } /** * Checks whether there are any previous tokens that can be iterated to. * * @return true if there are previous tokens */ public boolean hasPrevious() { tokenize(); return (tokenPos > 0); } /** * Gets the token previous to the last returned token. * * @return the previous token */ public Object previous() { return previousToken(); } /** * Gets the index of the previous token. * * @return the previous token index */ public int previousIndex() { return (tokenPos - 1); } /** * Unsupported ListIterator operation. * * @throws UnsupportedOperationException always */ public void remove() { throw new UnsupportedOperationException("remove() is unsupported"); } /** * Unsupported ListIterator operation. * * @throws UnsupportedOperationException always */ public void set(Object obj) { throw new UnsupportedOperationException("set() is unsupported"); } /** * Unsupported ListIterator operation. * * @throws UnsupportedOperationException always */ public void add(Object obj) { throw new UnsupportedOperationException("add() is unsupported"); } // Implementation //----------------------------------------------------------------------- /** * Performs the tokenization if it hasn't already been done. */ private void tokenize() { if (tokens == null) { this.tokens = readTokens(); } } /** * Read all the tokens. */ private String[] readTokens() { int len = chars.length; char cbuf[] = new char[len]; StringBuffer token = new StringBuffer(); int start = 0; List tokens = new ArrayList(); String tok = null; // Keep going until we run out of characters while (start < len) { // read the next token start = readNextToken(start, cbuf, token); tok = token.toString(); // Add the token, following the rules // in this object addToken(tokens, tok); // Reset the string buffer to zero length token.setLength(0); // Handle the special case where the very last // character is a delimiter, in which case, we // need another empty string if (start == len && delim.isMatch(chars[start - 1])) { // Add the token, following the rules // in this object addToken(tokens, new String()); } } return (String[]) tokens.toArray(new String[tokens.size()]); } /** * Adds a token to a list, paying attention to the parameters we've set. * * @param list the list to add to * @param tok the token to add */ private void addToken(List list, String tok) { if (StringUtils.isEmpty(tok)) { if (ignoreEmptyTokens) { return; } if (emptyAsNull) { tok = null; } } list.add(tok); } /** * Reads character by character through the String to get the next token. * * @param start the first character of field * @param cbuf a character buffer for temporary computations (so we * don't have to keep recreating one) * @param token a StringBuffer where the output token will go * @return the starting position of the next field (the character * immediately after the delimiter, or if end of string found, * then the length of string */ private int readNextToken(int start, char cbuf[], StringBuffer token) { token.setLength(0); int len = chars.length; // Skip all leading whitespace, unless it is the // field delimiter or the quote character int current = start; while (current < len && ignored.isMatch(chars[current]) && !delim.isMatch(chars[current]) && !quote.isMatch(chars[current])) { current++; } start = current; // Read the token depending on what the first // character is like if (delim.isMatch(chars[start])) { start = readEmpty(start, token); } else if (quote.isMatch(chars[start])) { start = readQuoted(start, cbuf, token); } else { start = readUnquoted(start, token); } return start; } /** * Reads a quoted string token. * * @param start The first character of field (this will be the quote * character) * @param cbuf A character buffer for temporary computations (so we * don't have to keep recreating one) * @param token A StringBuffer where the output token will go. * @return The starting position of the next field (the character * immediately after the delimiter, or if end of string found, * then the length of string. */ private int readQuoted(int start, char cbuf[], StringBuffer token) { // Loop until we've found the end of the quoted // string or the end of the input int cbufcnt = 0; int nd = start + 1; boolean done = false; boolean quoting = true; int len = chars.length; while (nd < len && !done) { // Quoting mode can occur several times throughout // a given string, so must switch between quoting // and non-quoting until we encounter a non-quoted // delimiter, or end of string, which inidicates end // of token. if (quoting) { // If we've found a quote character, see if it's // followed by a second quote. If so, then we need // to actually put the quote character into the token // rather than end the token. if (quote.isMatch(chars[nd]) && nd + 1 < len && chars[nd + 1] == chars[nd]) { cbuf[cbufcnt++] = chars[nd]; nd++; } // End the quoting if we get to this condition else if (quote.isMatch(chars[nd])) { quoting = false; } // Otherwise, just put the character into the token else { cbuf[cbufcnt++] = chars[nd]; } nd++; } // If we're not in quoting mode, if we encounter // a delimiter, the token is ended. If we encounter // a quote, we start quoting mode, otherwise, just append // the character else { // If we're if (delim.isMatch(chars[nd])) { done = true; } else { if (quote.isMatch(chars[nd])) { quoting = true; } else { cbuf[cbufcnt++] = chars[nd]; } nd++; } } } token.append(cbuf, 0, cbufcnt); return nd + 1; } /** * Read an unquoted string until a delimiter is found. * * @param start the first character of field * @param token a StringBuffer where the output token will go. * @return the starting position of the next field (the character * immediately after the delimiter, or if end of string found, * then the length of string. */ private int readUnquoted(int start, StringBuffer token) { int len = chars.length; // Skip ahead until we get to a delimiter character, or // the end of the input int nd = start + 1; while (nd < len && !delim.isMatch(chars[nd])) { nd++; } token.append(chars, start, Math.min(nd, len) - start); return nd + 1; } /** * Read an empty string (basically, if a delimiter is found right * after another delimiter). * * @param start the first character of field (this will be the delimiter * character) * @param token a StringBuffer where the output token will go. * @return The starting position of the next field (the character * immediately after the delimiter, or if end of string found, * then the length of string. */ private int readEmpty(int start, StringBuffer token) { token.setLength(0); return start + 1; } // Delimiter //----------------------------------------------------------------------- /** * Gets the field delimiter matcher. * * @return the delimiter matcher in use */ public Matcher getDelimiterMatcher() { return delim; } /** * Sets the field delimiter matcher. *

* The delimitier is used to separate one token from another. * * @param delim the delimiter matcher to use, null ignored */ public void setDelimiterMatcher(Matcher delim) { if (delim != null) { this.delim = delim; } } /** * Sets the field delimiter character * * @param delim the delimiter character to use */ public void setDelimiterChar(char delim) { setDelimiterMatcher(new CharMatcher(delim)); } // Quote //----------------------------------------------------------------------- /** * Gets the quote matcher currently in use. *

* The quote character is used to wrap data between the tokens. * This enables delimiters to be entered as data. * The default value is '"' (double quote). * * @return the quote matcher in use */ public Matcher getQuoteMatcher() { return quote; } /** * Set the quote matcher to use. *

* The quote character is used to wrap data between the tokens. * This enables delimiters to be entered as data. * * @param quote the quote matcher to use, null ignored */ public void setQuoteMatcher(Matcher quote) { if (quote != null) { this.quote = quote; } } /** * Sets the quote character to use. *

* The quote character is used to wrap data between the tokens. * This enables delimiters to be entered as data. * * @param quote the quote character to use */ public void setQuoteChar(char quote) { setQuoteMatcher(new CharMatcher(quote)); } // Ignored //----------------------------------------------------------------------- /** * Gets the ignored character matcher. *

* These characters are ignored when parsing the String, unless they are * within a quoted region. * The default value is space (' ') and all char control characters (32 and less). * * @return the ignored matcher in use */ public Matcher getIgnoredMatcher() { return ignored; } /** * Set the matcher for characters to ignore. *

* These characters are ignored when parsing the String, unless they are * within a quoted region. * * @param ignored the ignored matcher to use, null ignored */ public void setIgnoredMatcher(Matcher ignored) { if (ignored != null) { this.ignored = ignored; } } /** * Set the character to ignore. *

* This character is ignored when parsing the String, unless it is * within a quoted region. * * @param ignored the ignored character to use */ public void setIgnoredChar(char ignored) { setIgnoredMatcher(new CharMatcher(ignored)); } //----------------------------------------------------------------------- /** * Gets whether the tokenizer currently returns empty tokens as null. * The default for this property is false. * * @return true if empty tokens are returned as null */ public boolean isEmptyTokenAsNull() { return emptyAsNull; } /** * Sets whether the tokenizer should return empty tokens as null. * The default for this property is false. * * @param emptyAsNull whether empty tokens are returned as null */ public void setEmptyTokenAsNull(boolean emptyAsNull) { this.emptyAsNull = emptyAsNull; } //----------------------------------------------------------------------- /** * Gets whether the tokenizer currently ignores empty tokens. * The default for this property is false. * * @return true if empty tokens are not returned */ public boolean isIgnoreEmptyTokens() { return ignoreEmptyTokens; } /** * Sets whether the tokenizer should ignore and not return empty tokens. * The default for this property is false. * * @param ignoreEmptyTokens whether empty tokens are not returned */ public void setIgnoreEmptyTokens(boolean ignoreEmptyTokens) { this.ignoreEmptyTokens = ignoreEmptyTokens; } //----------------------------------------------------------------------- /** * Gets the String content that the tokenizer is parsing. * * @return the string content being parsed */ public String getContent() { if (text == null) { text = new String(chars); } return text; } //----------------------------------------------------------------------- /** * Create a new instance of this Tokenizer. * The new instance is reset so that it will be at the start of the token list. */ public Object clone() { try { Tokenizer cloned = (Tokenizer) super.clone(); // chars[] does not need additional clone as it is treated as immutable cloned.reset(); return cloned; } catch (CloneNotSupportedException ex) { return null; } } //----------------------------------------------------------------------- /** * Class used to define a set of characters for matching purposes. */ public static interface Matcher { /** * Returns true if the specified character matches. * * @param ch the character to check for * @return true if matches */ boolean isMatch(char ch); } //----------------------------------------------------------------------- /** * Class used to define a set of characters for matching purposes. */ public static final class CharSetMatcher implements Matcher { private char chars[]; /** * Constructor that creates a matcher from a character array. * * @param chars the characters to match, must not be null */ public CharSetMatcher(char chars[]) { super(); this.chars = (char[]) chars.clone(); Arrays.sort(this.chars); } /** * Constructor that creates a matcher from a String. * * @param chars the characters to match, must not be null */ public CharSetMatcher(String chars) { super(); this.chars = chars.toCharArray(); Arrays.sort(this.chars); } /** * Gets the characters being matched. * * @return the characters being matched */ public char[] getChars() { return (char[]) chars.clone(); } /** * Returns whether or not the given charatcer matches. * * @param ch the character to match. * @return whether or not the given charatcer matches. */ public boolean isMatch(char ch) { return (Arrays.binarySearch(chars, ch) >= 0); } } //----------------------------------------------------------------------- /** * Class used to define a character for matching purposes. */ public static final class CharMatcher implements Matcher { private char ch; /** * Constructor that creates a matcher that matches a single character. * * @param ch the character to match */ public CharMatcher(char ch) { super(); this.ch = ch; } /** * Gets the character being matched. * * @return the character being matched */ public char getChar() { return this.ch; } /** * Returns whether or not the given charatcer matches. * * @param ch the character to match. * @return whether or not the given charatcer matches. */ public boolean isMatch(char ch) { return (this.ch == ch); } } //----------------------------------------------------------------------- /** * Class used to match no characters. */ static final class NoMatcher implements Matcher { NoMatcher() { super(); } /** * Always returns false. * * @param ch the character to match. * @return Always returns false. */ public boolean isMatch(char ch) { return false; } } //----------------------------------------------------------------------- /** * Class used to match whitespace as per trim(). */ static final class TrimMatcher implements Matcher { TrimMatcher() { super(); } /** * Returns whether or not the given charatcer matches. * * @param ch the character to match. * @return whether or not the given charatcer matches. */ public boolean isMatch(char ch) { return (ch <= 32); } } }


 

 

你可能感兴趣的:(工具类,技术学习)