多线程分段导库 ---线程模型

concurrent导库.jpg

2019-07-03 21:33:00 修复

从MySql向InfluxDB导库过程中

遇到一个问题

单线程导库效率太低
目标是把晚上时间充分利用起来,多线程导入

计划如下:

比如 数据量

2019-02-12 00:00:00~2019-04-04 17:39:09
70207964  7kw条

为了避免一口吞进来,或者吞的太大,造成导入方和导出方内存压力

细化切割成 按时间分段,5分钟一段

比如

start:2019-02-12 00:15:00 end:2019-02-12 00:20:00
start:2019-02-12 00:05:00 end:2019-02-12 00:10:00
start:2019-02-12 00:10:00 end:2019-02-12 00:15:00
start:2019-02-12 00:00:00 end:2019-02-12 00:05:00

也方便一旦中途出错,例如断电,断网,可以根据日志找到最后成功时间节点,续接上

不要求必须连续
基本思路是,比如 创建 Thread pool 5 个线程
你们去吧,从0开始 各自取5分钟的数据 导入InfluxDB,不关心顺序,但不能重复

当某个线程完成后,再去取5分钟,分钟数一直在累加,不能重复

就这点需求,折腾一下午....

经过抽象,问题分割为两部分:
1,按时间段从MySql导入到 InfluxDB (之前已完成)
2, 如何用Thread pool 交替打印时间段,长度为5分钟

实现:

/**
 * ClassName: DBTransporter
 * Function:  TODO
 * Date:      2019-07-02 17:13
 * author     daguang
 * version    V1.0
 */
@Slf4j
@Service
public class DBTransporter {
    private static AtomicInteger curMinis = new AtomicInteger(0);
    private static final int stepM = 5;//分钟
    private static final int CORE_NUM = Runtime.getRuntime().availableProcessors() * 2;
    private static Semaphore semaphore = new Semaphore(CORE_NUM);
    private static Lock rLock = new ReentrantLock(true);
    private static final int THREAD_COUNTS = CORE_NUM;
    @Value("${db.time_start}")
    private String allBeginTime;//总起点
    @Value("${db.time_end}")
    private String allEndTime;
    private static long totalMinutes;
    private static final DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
    //总传输条数
    private static AtomicInteger totalTransCnts = new AtomicInteger();
    private static volatile boolean isAllFinshed;
    @Autowired
    DBExporter dbExporter;


    /**
     * 并发传输
     * 5分钟一段
     */
    public void doTransConcurrent() {
        printEnv();
        log.info("THREAD_COUNTS: " + THREAD_COUNTS);

        LocalDateTime beginTime = LocalDateTime.parse(allBeginTime, formatter);
        LocalDateTime endTime = LocalDateTime.parse(allEndTime, formatter);
        totalMinutes = ChronoUnit.MINUTES.between(beginTime, endTime);
        log.info("allBeginTime:{} allEndTime:{} totalMinutes:{}", allBeginTime, allEndTime, totalMinutes);
        scheduleTrans(totalMinutes, dbExporter);
    }

    public void scheduleTrans(long totalMinutes, TransTask transTask) {
        LocalDateTime allTimeBegin = LocalDateTime.parse(allBeginTime, formatter);
        Executors.newFixedThreadPool(2);
        ThreadPoolExecutor threadPool = new ThreadPoolExecutor(THREAD_COUNTS, THREAD_COUNTS, 0, TimeUnit.MILLISECONDS, new LinkedBlockingDeque<>());
        while (!isAllFinshed) {
            try {
                semaphore.acquire();//控制提交的任务数量 超过则阻塞等待(外层仅一个主线程)
                log.info("queueSize:{}", threadPool.getQueue().size());
                /**
                 * 当前thread自身起止时间
                 * 按5分钟步进
                 *
                 * lock 保证当前只有一个线程修改 计数
                 *
                 */
                rLock.lock();
//              log.info("======={} curMinis:{}",Thread.currentThread().getName(), curMinis.get());
//              从头加到当前分钟
                LocalDateTime startTime = allTimeBegin.plusMinutes(curMinis.get());
//                      步进5分钟
                LocalDateTime endTime = startTime.plusMinutes(stepM);
                curMinis.addAndGet(stepM);//其他线程再取是下一段了
                //起点大于总分钟数,则当前执行即最后一段了  例如 :total 12   cur 10~15
                isAllFinshed = curMinis.get() >= totalMinutes;
                rLock.unlock();
                threadPool.submit(new Runnable() {
                    @Override
                    public void run() {
                        log.info("---thread:{} started", Thread.currentThread().getName());
                        semaphore.release();//直到启动才释放手里的 启动权 通行证
                        long _st = System.currentTimeMillis();
                        int transed = transTask.doTrans(startTime, endTime);
                        totalTransCnts.addAndGet(transed);
                        log.info("---done tans:{}~{} time:{}ms cnt:{}", startTime.format(formatter), endTime.format(formatter), (System.currentTimeMillis() - _st), transed);
                    }
                });
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
        threadPool.shutdown();
        //wait until all finished
        try {
            boolean isFinished = threadPool.awaitTermination(Long.MAX_VALUE, TimeUnit.HOURS);
            log.info("isFinished: {}", isFinished);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
        log.info("all finished: {}条", totalTransCnts.get());
    }

    private void printEnv() {
        log.info("host: {} db: {} Influx measure: {}", DBConstants.DB_HOST, DBConstants.DB_NAME, DBConstants.MEASUREMENT_NAME);
    }
}

输出:

THREAD_COUNTS: 16
totalMinutes:20
21:02:22.970 [pool-1-thread-4] INFO  com.testinflux.export.DBTransporter - pool-1-thread-4 curMinis:20 start:2019-02-12 00:15:00 end:2019-02-12 00:20:00
21:02:22.970 [pool-1-thread-2] INFO  com.testinflux.export.DBTransporter - pool-1-thread-2 curMinis:10 start:2019-02-12 00:05:00 end:2019-02-12 00:10:00
21:02:22.970 [pool-1-thread-3] INFO  com.testinflux.export.DBTransporter - pool-1-thread-3 curMinis:15 start:2019-02-12 00:10:00 end:2019-02-12 00:15:00
21:02:22.970 [pool-1-thread-1] INFO  com.testinflux.export.DBTransporter - pool-1-thread-1 curMinis:5 start:2019-02-12 00:00:00 end:2019-02-12 00:05:00

Process finished with exit code 0

你可能感兴趣的:(多线程分段导库 ---线程模型)