java csv - 读写及其操作.

  今天帮同学处理数据, 主要是从1w多条记录中随机获取8k条, 然后再从8k条记录中随机获取2k条记录. 最后将2k条记录中随机分成10组,使得每组的记录都不重复.

  下面将我的代码都贴上来, 好以后处理csv文件.

  1.   首先使用第三方的jar文件 javcsv.jar : 链接: http://pan.baidu.com/s/1qW5b3u0 密码: qjmx
  2.   虽然该类库可以相对方便提供操作, 但是为了方便处理, 我将处理的字段都放在配置文件中, 然后将每一条记录都封装为Map<String, String>对象,我将读写的基础类封装为 CSVBasic:
    package spt.csv;
    
    
    
    import java.io.Serializable;
    
    import java.nio.charset.Charset;
    
    
    
    import spt.util.PropertyConfig;
    
    
    
    /**
    
     * CSV文件操作基础类.
    
     */
    
    abstract public class CSVBasic implements Serializable {
    
    
    
        private Charset charset;    //编码.
    
        private char delimiter;    //分隔符.
    
        private String fileName;
    
    
    
        /**
    
         * 默认编码.
    
         * 
    
         * @return
    
         */
    
        public static Charset getDefaultCharset() {
    
            return Charset.forName(PropertyConfig.getProperty("charset"));
    
        }
    
    
    
        /**
    
         * 默认分割符.
    
         * 
    
         * @return
    
         */
    
        public static char getDefaultDelimiter() {
    
            return PropertyConfig.getProperty("delimiter").charAt(0);
    
        }
    
        
    
        public String getFileName() {
    
            return fileName;
    
        }
    
    
    
        public void setFileName(String fileName) {
    
            this.fileName = fileName;
    
        }
    
    
    
        public Charset getCharset() {
    
            return charset;
    
        }
    
    
    
        public void setCharset(Charset charset) {
    
            this.charset = charset;
    
        }
    
    
    
        public void setDelimiter(char delimiter) {
    
            this.delimiter = delimiter;
    
        }
    
    
    
        public char getDelimiter() {
    
            return delimiter;
    
        }
    
    
    
        public CSVBasic() {}
    
        
    
        /**使用默认的分隔符和编码.
    
         * @param fileName
    
         */
    
        public CSVBasic(String fileName) {
    
            this(fileName, getDefaultDelimiter(), getDefaultCharset());
    
        }
    
        
    
        public CSVBasic(String fileName, char delimiter, Charset charset) {
    
            setFileName(fileName);
    
            setDelimiter(delimiter);
    
            setCharset(charset);
    
        }
    
    
    
        /**
    
         * 
    
         */
    
        private static final long serialVersionUID = 7916808982930771124L;
    
    }
    View Code

      3.读取csv文件,并映射记录为List<Map<String, String>> 对象:

    package spt.csv;
    
    
    
    import java.io.FileNotFoundException;
    
    import java.io.IOException;
    
    import java.nio.charset.Charset;
    
    import java.util.ArrayList;
    
    import java.util.HashMap;
    
    import java.util.List;
    
    import java.util.Map;
    
    
    
    import spt.util.PropertyConfig;
    
    
    
    import com.csvreader.CsvReader;
    
    
    
    /**
    
     * 读取csv文件的类.
    
     */
    
    public class Reader extends CSVBasic {
    
    
    
        private CsvReader reader;
    
    
    
        public CsvReader getReader() {
    
            return reader;
    
        }
    
    
    
        public void setReader(CsvReader reader) {
    
            this.reader = reader;
    
        }
    
    
    
        public Reader(String fileName) throws FileNotFoundException {
    
            this(fileName, getDefaultDelimiter(), getDefaultCharset());
    
        }
    
    
    
        public Reader(String fileName, char delimiter, Charset charset)
    
                throws FileNotFoundException {
    
            // set before getting.
    
            super(fileName, delimiter, charset);
    
            setReader(new CsvReader(fileName, delimiter, charset));
    
        }
    
    
    
        /**根据字段列表,见每条记录映射为一个Map对象的列表.
    
         * @param fieldNames
    
         *            指定配置文件中字段名的'键'的列表.
    
         * @return
    
         */
    
        public List<Map<String, String>> getResult(List<String> fieldNames) {
    
            // 每行中的每一个项是一个Map<String, String>的键值对.
    
            List<Map<String, String>> lines = new ArrayList<Map<String, String>>();
    
    
    
            CsvReader r = null;
    
            try {
    
                r = getReader();
    
                r.readHeaders(); // 读取表头.
    
    
    
                Map<String, String> itemMap = null; // 每一条记录是一个Map<String, String>.
    
                while (r.readRecord()) {
    
                    itemMap = new HashMap<String, String>();
    
                    String k = null;
    
                    // 每一条记录添加键值对.
    
                    for (String fieldName : fieldNames) {
    
                        // 字段名.
    
                        k = PropertyConfig.getProperty(fieldName);
    
                        itemMap.put(k, r.get(k));
    
                    }
    
                    lines.add(itemMap);
    
                }
    
                return lines;
    
            } catch (IOException e) {
    
                e.printStackTrace();
    
                return null;
    
            } finally {
    
                if(r != null)
    
                    r.close();
    
            }
    
        }
    
    
    
        @Override
    
        public String toString() {
    
            return getFileName();
    
        }
    
    
    
        /**
    
         * 
    
         */
    
        private static final long serialVersionUID = -1712774594374451546L;
    
    }
    View Code

    4.将List<Map<String, String>>输出为csv文件的类:

    package spt.csv;
    
    
    
    import java.io.IOException;
    
    import java.nio.charset.Charset;
    
    import java.util.List;
    
    import java.util.Map;
    
    
    
    import com.csvreader.CsvWriter;
    
    
    
    /**
    
     * csv文件写入类.
    
     */
    
    public class Writer extends CSVBasic {
    
    
    
        private CsvWriter writer = null;
    
    
    
        public boolean write(List<String> fieldNames,
    
                List<Map<String, String>> mapList) {
    
            CsvWriter writer = null;
    
            try {
    
                writer = getWriter();
    
                // 写入表头.
    
                writer.writeRecord((String[]) fieldNames
    
                        .toArray(new String[fieldNames.size()]));
    
                for (Map<String, String> map : mapList) {
    
                    // 存储每行记录.
    
                    String[] records = new String[fieldNames.size()];
    
                    for (int i = 0; i < fieldNames.size(); i++)
    
                        records[i] = map.get(fieldNames.get(i));
    
                    // 写入每行记录.
    
                    writer.writeRecord(records);
    
                }
    
                return true;
    
            } catch (IOException e) {
    
                // TODO Auto-generated catch block
    
                e.printStackTrace();
    
                return false;
    
            } finally {
    
                if (writer != null)
    
                    writer.close();
    
            }
    
        }
    
    
    
        public Writer() {
    
            this(null, getDefaultDelimiter(), getDefaultCharset());
    
        }
    
    
    
        public Writer(String fileName) {
    
            this(fileName, getDefaultDelimiter(), getDefaultCharset());
    
        }
    
    
    
        public Writer(String fileName, char delimiter, Charset charset) {
    
            super(fileName, delimiter, charset);
    
            writer = new CsvWriter(fileName, delimiter, charset);
    
        }
    
    
    
        public CsvWriter getWriter() {
    
            return writer;
    
        }
    
    
    
        public void setWriter(CsvWriter writer) {
    
            this.writer = writer;
    
        }
    
    
    
        /**
    
         * 
    
         */
    
        private static final long serialVersionUID = -9141083858975437622L;
    
    }
    View Code

    5.表中有一个字段NYR, 表示时间, 由于需要将结果按照时间的先后顺序排序, 所以定义一个比较器:

    package spt.csv;
    
    
    
    import java.text.ParseException;
    
    import java.util.Comparator;
    
    import java.util.Map;
    
    
    
    import spt.util.DateService;
    
    import spt.util.PropertyConfig;
    
    
    
    /**
    
     *每条记录是一个Map对象,按照每条记录中的'时间'的列进行排序.
    
     */
    
    public class RecordDateComparator implements Comparator<Map<String, String>> {
    
    
    
        @Override
    
        public int compare(Map<String, String> m1, Map<String, String> m2) {
    
            try {
    
                long l01 = DateService.getDate(m1.get(PropertyConfig.getProperty("NYR"))).getTime();
    
                long l02 = DateService.getDate(m2.get(PropertyConfig.getProperty("NYR"))).getTime();
    
                //long的范围和int的范围不同.
    
                long diff = l01 - l02;
    
                if(diff < 0)
    
                    return -1;
    
                else if(diff > 0)
    
                    return 1;
    
                return 0;
    
            } catch (ParseException e) {
    
                e.printStackTrace();
    
                return 0;
    
            }
    
        }
    
    
    
    }
    View Code

    6,在main类中:

    package spt.csv;
    
    
    
    import java.io.File;
    
    import java.io.FileNotFoundException;
    
    import java.util.ArrayList;
    
    import java.util.Collections;
    
    import java.util.List;
    
    import java.util.Map;
    
    import java.util.Random;
    
    
    
    import spt.util.PropertyConfig;
    
    
    
    /**
    
     * 从1w多条记录中先选出8k条,然后在8k条记录中选出2k条,最后将2k条记录分成10组.
    
     */
    
    public class ReadWriteDemo {
    
    
    
        /**
    
         * @param args
    
         */
    
        public static void main(String[] args) {
    
            // if (args.length < 1)
    
            // throw new NullPointerException("请指定文件路径");
    
            System.out.println("执行中...执行过程请不要关闭此窗口!");
    
            final int first_size = Integer.parseInt(PropertyConfig
    
                    .getProperty("first_size")); // 初次提取长度(8k).
    
            final int second_size = Integer.parseInt(PropertyConfig
    
                    .getProperty("second_size")); // 初次提取(2k).
    
            final int groupCount = Integer.parseInt(PropertyConfig
    
                    .getProperty("groupCount")); // 分组个数(10).
    
            String file = PropertyConfig.getProperty("input_file"); // 源文件路径.
    
    
    
            List<String> fieldNames = null;
    
            try {
    
                fieldNames = initFields();
    
                Reader csv = new Reader(file);
    
                // 总记录.
    
                List<Map<String, String>> totalList = csv.getResult(fieldNames);
    
                // 初次提取的值(8k).
    
                List<Map<String, String>> firstTaken = random(totalList, first_size);
    
                // 再次提取的值(2k).
    
                List<Map<String, String>> secondTaken = random(firstTaken,
    
                        second_size);
    
                // 每组记录数(2百).
    
                List<Map<String, String>> tmpTaken = secondTaken;
    
                for (int i = 0; i < groupCount; i++) {
    
                    List<Map<String, String>> AGroupTaken = random(tmpTaken,
    
                            second_size / groupCount);
    
                    // 除去上次已经使用的元素.
    
                    tmpTaken.removeAll(AGroupTaken);
    
                    // 在当前目录上输出(并验证是否存在).
    
                    String outputFile = null;
    
                    // 如果文件已存在,则自动命名.
    
                    int fileCount = 0;
    
                    do {
    
                        outputFile = "result" + fileCount++ + ".csv";
    
                    } while (new File(outputFile).exists());
    
                    Writer writer = new Writer(outputFile);
    
                    // (集合)排序.
    
                    Collections.sort(AGroupTaken, new RecordDateComparator());
    
                    writer.write(fieldNames, AGroupTaken);
    
                }
    
                System.out.println("done!");
    
            } catch (FileNotFoundException e) {
    
                System.out.println("请指定正确的文件路径!");
    
                // TODO Auto-generated catch block
    
                e.printStackTrace();
    
            }
    
        }
    
    
    
        /**
    
         * 随机产生新的列表(长度比原来小).
    
         * 
    
         * @param originalList
    
         *            输入列表.
    
         * @param new_size
    
         *            新列表的长度.
    
         */
    
        public static List<Map<String, String>> random(
    
                List<Map<String, String>> originalList, int new_size) {
    
            if (new_size <= 0 || new_size > originalList.size())
    
                throw new IndexOutOfBoundsException("新列表的长度错误!");
    
            List<Map<String, String>> newList = new ArrayList<Map<String, String>>(
    
                    new_size);
    
            // 标识是否已被提取.
    
            boolean[] taken = new boolean[originalList.size()];
    
            Random r = new Random();
    
            Map<String, String> map = null; // 即将获取的元素.
    
            int rIdx = 0;
    
            for (int i = 0; i < new_size; i++) {
    
                do {
    
                    rIdx = r.nextInt(new_size);
    
                    map = originalList.get(rIdx);
    
                } while (taken[rIdx]); // 如果发现已经提取,则重复操作.
    
                taken[rIdx] = true; // 标识已被提取.
    
                newList.add(map);
    
            }
    
    
    
            return newList;
    
        }
    
    
    
        private static List<String> initFields() {
    
            // 所有字段.
    
            List<String> fieldNames = new ArrayList<String>(14);
    
            fieldNames.add("id");
    
            fieldNames.add("AJMC");
    
            fieldNames.add("JYAQ");
    
            fieldNames.add("AJLB");
    
            fieldNames.add("AJFAB");
    
            fieldNames.add("AJZT");
    
            fieldNames.add("BASJ");
    
            fieldNames.add("FXSJ");
    
            fieldNames.add("FASJSX");
    
            fieldNames.add("FASJXX");
    
            fieldNames.add("AJBH");
    
            fieldNames.add("ZBX");
    
            fieldNames.add("ZBY");
    
            fieldNames.add("NYR");
    
            
    
            return fieldNames;
    
        }
    
    }
    View Code

    7,用到的自定义工具类为:

    package spt.util;
    
    
    
    import java.text.DateFormat;
    
    import java.text.ParseException;
    
    import java.text.SimpleDateFormat;
    
    import java.util.Calendar;
    
    import java.util.Date;
    
    
    
    /**
    
     * 2015-2-27 提供日期转换的工具类.
    
     */
    
    public class DateService {
    
        // 定义称线程共享,而不是没调用一次就创建一个对象.
    
        private static DateFormat formater = new SimpleDateFormat(PropertyConfig.getProperty("date_format"));
    
    
    
        /**
    
         * 将字符串类型的日期转换为Date.
    
         * 
    
         * @param strDate
    
         * @return
    
         * @throws ParseException
    
         */
    
        public static Date getDate(String strDate) throws ParseException {
    
            // 如果输入为空,则返回null.
    
            if (Str.isEmpty(strDate))
    
                return null;
    
            return formater.parse(strDate);
    
        }
    
    
    
        /**
    
         * 将java.util.Date转换为java.sql.Date;用于诸如'PreparedStatement.setDate'方法.
    
         * 
    
         * @param utilDate
    
         * @return
    
         */
    
        public static java.sql.Date getSQLDate(java.util.Date utilDate) {
    
            if (utilDate == null)
    
                return null;
    
            return new java.sql.Date(utilDate.getTime());
    
        }
    
    
    
        /**
    
         * 将指定的日期转换为
    
         * 
    
         * @param date
    
         * @return
    
         */
    
        public static String getDateStr(java.util.Date date) {
    
            if (date == null)
    
                return null;
    
            return formater.format(date);
    
        }
    
    
    
    
    
        /**
    
         * 计算指定日期与今天的间隔,判断是否是需要日期. disDay表示与今天相隔天数,0:等于今天;1:明天;-1:昨天.
    
         * 
    
         * @param anotherDate
    
         * @param disDay
    
         * @return
    
         */
    
        public static boolean isSpecifiedDay(Date anotherDate, int disDay) {
    
            if (anotherDate == null)
    
                return false;
    
            Calendar cNow = Calendar.getInstance();
    
            cNow.setTime(new Date()); // 每调用一次,都是与当前时间做比较.
    
            cNow.add(Calendar.DAY_OF_MONTH, disDay);
    
    
    
            Calendar cAnotherDate = Calendar.getInstance();
    
            cAnotherDate.setTime(anotherDate);
    
    
    
            return cNow.get(Calendar.YEAR) == cAnotherDate.get(Calendar.YEAR)
    
                    && cNow.get(Calendar.MONTH) == cAnotherDate.get(Calendar.MONTH)
    
                    && cNow.get(Calendar.DAY_OF_MONTH) == cAnotherDate.get(Calendar.DAY_OF_MONTH);
    
        }
    
    }
    View Code
    package spt.util;
    
    
    
    import java.io.IOException;
    
    import java.net.URL;
    
    import java.util.Properties;
    
    
    
    
    
    /**
    
     * 2015-2-27
    
     */
    
    public class PropertyConfig {
    
        
    
        /**
    
         * @param key
    
         * @return
    
         */
    
        public static String getProperty(String key) {
    
            Properties properties = getProperties();
    
            return properties.getProperty(key);
    
        }
    
    
    
        /**
    
         * @param resources
    
         * @return
    
         */
    
        public static Properties getProperties() {
    
            final String configFilePath = "raw/properties.properties";
    
            URL url = PropertyConfig.class.getClassLoader().getResource(configFilePath);
    
            Properties props = new Properties();
    
            try {
    
                props.load(url.openStream());
    
            } catch (IOException e) {
    
                e.printStackTrace();
    
                return null;
    
            } 
    
            return props;
    
        }
    
    }
    View Code
    package spt.util;
    
    
    
    /**
    
     *字符串工具类.
    
     */
    
    public class Str {
    
        /**
    
         * 判断一个字符串是否有内容.
    
         * 
    
         * @param str
    
         * @return 如果不不为空,则返回true,否则返回false.
    
         */
    
        public static boolean hasLength(String str) {
    
            return !isEmpty(str);
    
        }
    
        
    
        /**判断字符串是否为空.
    
         * @param str
    
         * @return
    
         */
    
        public static boolean isEmpty(String str) {
    
            return str == null || str.isEmpty();
    
        }
    
    }
    View Code

    其中,配置文件"raw/properties.properties"是放置在src目录下.

你可能感兴趣的:(java)