java 大文件拆分成小文件

因为涉及到读取大的文件的数据,但是程序内存太小读不出来的原因,所以将文件进行拆分进行读取

package cn.jado.ctt_check.test;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;

import org.apache.log4j.Logger;

import cn.jado.ctt_check.util.IOUtil;

public class SpiltFile {
private static final Logger logger = Logger.getLogger(SpiltFile.class);

public static void main(String[] args) {
    String path = "F:/usr/local/统计/utf-8_20170221_wechat_article.DATA";
    readFileByLineToList(path, "utf-8");
}
/***
 * 将一个大文件以1百万一个文件进行拆分
 * @author jado
 * @param fileName
 * @param charsetName
 * @return
 */
public static  List readFileByLineToList(String fileName,String charsetName){
    List lineList = new ArrayList();
    String path  =  IOUtil.PathParser(fileName)[0]+"/wechat_msg/";
    IOUtil.mkDirs(path);
    if(charsetName == null){
        charsetName = System.getProperty("file.encoding");
    }
    BufferedReader  br = null;
    InputStreamReader isReader = null;
    try {
        if(charsetName == null){
            charsetName = System.getProperty("file.encoding");
        }
        isReader  = new InputStreamReader(new FileInputStream(fileName),charsetName);
        br = new BufferedReader(isReader);
        String tempString = null;
        int v  = 0;
        while((tempString = br.readLine())!= null){
            v+=1;
            //  百万条数据分成一个文件
            lineList.add(tempString);
            if(v%100000==0){
                for(String line :lineList){
                    IOUtil.writeFile(path+v+"_wechat_article.DATA", line+"\n", "utf-8",true);
                }
                lineList.clear();
            }
        }
    } catch (UnsupportedEncodingException e1) {
        // TODO Auto-generated catch block
        logger.error("解析文件编码异常",e1);            
    } catch (FileNotFoundException e2) {
        // TODO Auto-generated catch block
        logger.error("文件没有找到异常",e2);
    } catch (IOException e3) {
        // TODO Auto-generated catch block
        logger.error("Io操作异常",e3);
    }finally{
        if(br != null){
            try {
                br.close();
                br = null;
            } catch (IOException e4) {
                // TODO Auto-generated catch block
                logger.error("文件关闭异常",e4);
            }
        }
        if(isReader != null){
            try {
                isReader.close();
            } catch (IOException e5) {
                // TODO Auto-generated catch block
                logger.error("文件关闭异常",e5);
            }
        }
    }
    return lineList;

}

}

你可能感兴趣的:(java)