JAVA实现多线程分段读取文件

大致思路:

     1:读取文件行数

     2:为每个线程分配读取行数

       分配算法:略平均分配

       

线程数 文件总行数 线程分配的行数
2 10 thread1 = 5,thread2 = 5
2 9 thread1 = 4,thread2 = 5
3 10 thread1 = 3,thread2 = 3,thread3 = 4

     

     3:启动线程读取文件

     4:合并文件内容

     5:校验文件完整性

 核心文件已贴出,UploadService、ReadFileThread,欢迎大家交流讨论

UploadService

package cn.spring.ssm.service.impl;

import cn.spring.ssm.job.ReadFileThread;
import cn.spring.ssm.model.FileThreadVO;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang.StringUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;

import javax.annotation.Resource;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;

/**
 * Created with IntelliJ IDEA.
 * Package: cn.spring.ssm.service.impl
 * User: 25414
 * Date: 2019/11/14
 * Time: 16:10
 * Description:
 */
@Service
@Slf4j
public class UploadService {

    @Value("${file.thread.num}")
    private Integer threadNum; //线程数

    @Resource(name = "asyncServiceExecutor")
    private ThreadPoolTaskExecutor executor;  //线程池

    /**
     * 启用多个线程分段读取文件
     * 

* PS:若文件行数小于线程数会造成线程浪费 * 适用于读取一行一行的数据报文 * * @return */ public String uploadByThread(MultipartFile file) throws Exception { if (file.isEmpty()) { return null; } InputStream is = file.getInputStream(); List threadVOS = new ArrayList<>(threadNum); //自定义线程实体对象 //为线程分配读取行数 Integer lines = getLineNum(is); //文件总行数 Integer line; //每个线程分配行数 Integer start_line; //线程读取文件开始行数 Integer end_line; //线程读取文件结束行数 StringBuffer data = new StringBuffer(); //根据文件行数和线程数计算分配的行数,这里有点繁琐了,待优化 if (lines < threadNum) { for (int i = 1; i <= lines; i++) { FileThreadVO fileThreadVO = new FileThreadVO(); start_line = end_line = i; InputStream stream = file.getInputStream(); ReadFileThread readFileThread = new ReadFileThread(start_line, end_line, stream); fileThreadVO.setStart_line(start_line); fileThreadVO.setIs(stream); fileThreadVO.setEnd_line(end_line); fileThreadVO.setResult(executor.submit(readFileThread).get()); threadVOS.add(fileThreadVO); } } else { for (int i = 1, tempLine = 0; i <= threadNum; i++, tempLine = ++end_line) { InputStream stream = file.getInputStream(); FileThreadVO fileThreadVO = new FileThreadVO(); Integer var1 = lines / threadNum; Integer var2 = lines % threadNum; line = (i == threadNum) ? (var2 == 0 ? var1 : var1 + var2) : var1; start_line = (i == 1) ? 1 : tempLine; end_line = (i == threadNum) ? lines : start_line + line - 1; ReadFileThread readFileThread = new ReadFileThread(start_line, end_line, stream); fileThreadVO.setStart_line(start_line); fileThreadVO.setIs(stream); fileThreadVO.setEnd_line(end_line); fileThreadVO.setResult(executor.submit(readFileThread).get()); threadVOS.add(fileThreadVO); } } threadVOS.forEach(record -> data.append(record.getResult()).append("\r\n")); String mergeStr = data.toString().trim(); boolean isComplete = isComplete(file, mergeStr); if (!isComplete) { log.error("###uploadByThread### 文件完整性校验失败!"); throw new Exception("The file is incomplete!"); } else { return mergeStr; } } /** * 获取文件行数 * * @param is * @return * @throws IOException */ public int getLineNum(InputStream is) throws IOException { int line = 0; BufferedReader reader = new BufferedReader(new InputStreamReader(is)); while (reader.readLine() != null) { line++; } reader.close(); is.close(); return line; } /** * 校验文件完整性 * * @param file * @param data * @return */ public boolean isComplete(MultipartFile file, String data) throws IOException { long originSize = file.getBytes().length; long resultSize = data.getBytes(Charset.forName("utf-8")).length; return StringUtils.equals(String.valueOf(originSize), String.valueOf(resultSize)); } }

ReadFileThread 

package cn.spring.ssm.job;

import lombok.extern.slf4j.Slf4j;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.concurrent.Callable;

/**
 * Created with IntelliJ IDEA.
 * Package: cn.spring.ssm.job
 * User: 25414
 * Date: 2019/11/14
 * Time: 8:51
 * Description:分段读取文件
 */
@Slf4j
public class ReadFileThread implements Callable {

    private Integer start_index;    //文件开始读取行数
    private Integer end_index;      //文件结束读取行数
    private InputStream is;         //输入流

    public ReadFileThread(int start_index, int end_index, InputStream is) {
        this.start_index = start_index;
        this.end_index = end_index;
        this.is = is;
    }

    /**
     * Computes a result, or throws an exception if unable to do so.
     *
     * @return computed result
     * @throws Exception if unable to compute a result
     */
    @Override
    public String call() throws Exception {
        StringBuilder result = new StringBuilder();
        BufferedReader reader = new BufferedReader(new InputStreamReader(is, "utf-8"));
        int loc = 1;
        while (loc < start_index) {
            reader.readLine();
            loc++;
        }

        while (loc < end_index) {
            result.append(reader.readLine()).append("\r\n");
            loc++;
        }
        result.append(reader.readLine());
        String strResult = result.toString();
        reader.close();
        is.close();
        log.info("###ReadFileThread###FILE {} IS COMPLETE result = {} size = {}", Thread.currentThread().getName(), strResult, strResult
                .getBytes(Charset.forName("utf-8")).length);
        return strResult;
    }
}

存在的问题

1:若文件行数小于线程数会造成线程浪费 ,适用于多行的数据报文

2:文件分段的方式,目前的方案是根据文件行数,若改成按字节分段更合理;但按字节分段会存在中文字节占比的问题,若第50个字节是中文,它占用的字节是2,这样读出来是乱码

 

你可能感兴趣的:(JAVA,文件,多线程)