读取CSV文件,并解析为list

package com.lhx.test.utils;

import com.alibaba.dubbo.common.utils.CollectionUtils;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;

/**
 * Created by lhx on 2019/6/18
 * Desc :
 */
public class CSVRead {

    private BufferedReader br;

    //读取文件时,是否还有下一行
    private boolean hasNext = true;

    //元素之间的分隔符
    private char elementSeparator;

    //引号
    private char quoteChar;

    //转义符
    private char escape;

    //跳过的行数
    private int skipLines;

    //是否跳过行
    private boolean linesSkipped = true;

    private static final char DEFAULT_SEPARATOR = ',';

    private static final int INITIAL_READ_SIZE = 64;

    private static final char DEFAULT_QUOTE_CHARACTER = '"';

    private static final char DEFAULT_ESCAPE_CHARACTER = '\\';

    private static final int DEFAULT_SKIP_LINES = 0;

    // 解析文件核心代码
    private ArrayList parseNextLine(String nextLine) {
        ArrayList inner = new ArrayList();
        StringBuilder sb = new StringBuilder(INITIAL_READ_SIZE);
        //是否在引用符号约束之内,为true时表示目前已经遍历到引号之内的字符了,默认不在引号之内,所以使用do while循环,强制第一次进行循环
        boolean within = false;
        /*do {*/
            for (int index = 0; index < nextLine.length(); index++) {
                char chr = nextLine.charAt(index);
                //如果当前符号为转义符,则取转义符后面的字符,同时索引前进一位,获取转义符后面的一位,拼接到结果字符串中
                if (escape == chr) {
                    //如果下一个字符在引用符号范围内,且该字符后面以为不是转义符也不是引用符,则认为是正常可获取并展示的字符,获取之;
                    if (this.escaped(nextLine, within, index)) {
                        //在引号范围内,出现的下一个引号或者转义符,则是值的内容,进行获取
                        sb.append(nextLine.charAt(index + 1));
                    }// else {} 如果下一个字符不在引号内,则是值的开头引号,不拼接、
                    // 或者下一个字符在引号内切是引号或者转义符则拼接无意义,跳过,索引前进一位
                    //由于是转义符,则至多只能获取转义符后面的,所以索引在此要前进一位
                    index++;
                    //如果是引用符号,则判断下一个字符是引用符号还是转义之后的需要添加到值里面的字符
                } else if (quoteChar == chr) {
                    if (this.quoteEscaped(nextLine, within, index)) {
                        //如果在引号内,且下一个字符是引号,则下一个字符是值的内容,进行拼接下一个字符,索引前进一位
                        sb.append(nextLine.charAt(index)).append(nextLine.charAt(index + 1));
                        index++;
                        //如果在引号内且当前字符为引号,下一个字符为转义符,则取当前字符和下下个字符,索引前进两位
                    } else {
                        //每一个值的开头处
                        if (index == 0
                                || (index > 2 && this.elementSeparator == nextLine.charAt(index - 1))) {
                            within = true;
                            //非最后一个值的结尾处
                        } else if ((nextLine.length() > (index + 1) && this.elementSeparator == nextLine.charAt(index + 1))) {
                            within = false;
                            //最后一个值的结尾处
                        } else if ((nextLine.length() - 1) == index && within) {
                            //字符串结尾处
                            String s = sb.toString();
                            inner.add(s);
                            within = false;
                        } else {
                            sb.append(chr);
                        }
                    }
                } else if (this.elementSeparator == chr) {
                    if (within) {
                        sb.append(chr);
                    } else {
                        //如果等于分隔符,且within的状态为false,则就认为是两个值之间的分隔符。
                        inner.add(sb.toString());
                        //读到该值的结束位置,结束拼接,放到集合,然后重新开一个sb
                        sb = new StringBuilder(INITIAL_READ_SIZE);
                    }
                } else {
                    sb.append(chr);
                }
            }
        return inner;
    }

    private boolean quoteEscaped(String nextLine, boolean inQuote, int index) {
        int targetIndex = index + 1;
        return inQuote//是否引用符号之内,如果在引号之外,那就只有可能是分隔符,不关心。
                && nextLine.length() > targetIndex//判断该索引位是否为字符串最后一位。如果最后一位,则接下来无需操作
                && this.quoteChar == nextLine.charAt(targetIndex);
    }

    //判断该字符的下一个是否是转义或者引用符
    private boolean escaped(String nextLine, boolean inQuote, int index) {
        int targetIndex = index + 1;
        return inQuote//是否引用符号之内,如果在引号之外,那就只有可能是分隔符,不关心。
                && nextLine.length() > targetIndex//判断该索引位是否为字符串最后一位。如果最后一位,则接下来无需操作
                //判断下一个字符是否是否为转义符或者引用符
                && (this.escape == nextLine.charAt(targetIndex) || this.quoteChar == nextLine.charAt(targetIndex));
    }

    /**
     * Created by lhx on 2019/6/18 9:19
     * Desc :
     * reader           文件流
     * elementSeparator 元素之间的分隔符
     * quoteChar        每个元素的引用符
     * escape           元素引用符的转义符
     * lines            读取文件时跳过的行数
     */
    public CSVRead(Reader reader, char elementSeparator, char quoteChar, char escape, int lines) {
        this.br = new BufferedReader(reader);
        this.elementSeparator = elementSeparator;
        this.quoteChar = quoteChar;
        this.escape = escape;
        this.skipLines = lines;
    }

    public CSVRead(Reader reader, char elementSeparator, char quoteChar, char escape) {
        this.br = new BufferedReader(reader);
        this.elementSeparator = elementSeparator;
        this.quoteChar = quoteChar;
        this.escape = escape;
        this.skipLines = DEFAULT_SKIP_LINES;
    }

    public CSVRead(Reader reader, char elementSeparator, char quoteChar) {
        this.br = new BufferedReader(reader);
        this.elementSeparator = elementSeparator;
        this.quoteChar = quoteChar;
        this.escape = DEFAULT_ESCAPE_CHARACTER;
        this.skipLines = DEFAULT_SKIP_LINES;
    }

    public CSVRead(Reader reader, char elementSeparator) {
        this.br = new BufferedReader(reader);
        this.elementSeparator = elementSeparator;
        this.quoteChar = DEFAULT_QUOTE_CHARACTER;
        this.escape = DEFAULT_ESCAPE_CHARACTER;
        this.skipLines = DEFAULT_SKIP_LINES;
    }

    //在默认情况下:只给出文件流,默认分隔符为 ',', 默认元素引用符为 '"', 默认转义符为 '\\', 默认跳过读取元素行数为0.
    public CSVRead(Reader reader) {
        this.br = new BufferedReader(reader);
        this.elementSeparator = DEFAULT_SEPARATOR;
        this.quoteChar = DEFAULT_QUOTE_CHARACTER;
        this.escape = DEFAULT_ESCAPE_CHARACTER;
        this.skipLines = DEFAULT_SKIP_LINES;
    }

    public List> readFile() throws IOException {
        List> outer = new ArrayList>();
        while (hasNext) {
            ArrayList inner = read();
            if (CollectionUtils.isNotEmpty(inner)) {
                outer.add(inner);
            }
        }
        return outer;
    }

    //读取文件内容,返回每行
    private ArrayList read() throws IOException {
        String nextLine = readNextLine();
        return hasNext ? parseNextLine(nextLine) : null;
    }

    //该行内容不为空,进行解析


    //读取文件流的下一行
    private String readNextLine() throws IOException {
        //如果跳过的行数不为空,则把需要跳过的进行跳过,linesSkipped默认为true,如果skipLines不为0,则进行跳过操作
        if (this.linesSkipped) {
            if (skipLines > 0) {
                for (int i = 0; i < skipLines; i++) {
                    //只读,不接收
                    br.readLine();
                }
            }
            //跳过的行数读完之后,flag置为false,接下来的读取都不进行跳过操作了。
            this.linesSkipped = false;
        }
        //执行跳过操作之后,读取
        String nextLine = br.readLine();
        //如果该行不为空,则证明没有读取到结尾行,hasNext为true
        hasNext = StringValidate.isNotBlank(nextLine);
        return nextLine;
    }


}

你可能感兴趣的:(随笔)