2019-07-03 21:33:00 修复
从MySql向InfluxDB导库过程中
遇到一个问题
单线程导库效率太低
目标是把晚上时间充分利用起来,多线程导入
计划如下:
比如 数据量
2019-02-12 00:00:00~2019-04-04 17:39:09
70207964 7kw条
为了避免一口吞进来,或者吞的太大,造成导入方和导出方内存压力
细化切割成 按时间分段,5分钟一段
比如
start:2019-02-12 00:15:00 end:2019-02-12 00:20:00
start:2019-02-12 00:05:00 end:2019-02-12 00:10:00
start:2019-02-12 00:10:00 end:2019-02-12 00:15:00
start:2019-02-12 00:00:00 end:2019-02-12 00:05:00
也方便一旦中途出错,例如断电,断网,可以根据日志找到最后成功时间节点,续接上
不要求必须连续
基本思路是,比如 创建 Thread pool 5 个线程
你们去吧,从0开始 各自取5分钟的数据 导入InfluxDB,不关心顺序,但不能重复
当某个线程完成后,再去取5分钟,分钟数一直在累加,不能重复
就这点需求,折腾一下午....
经过抽象,问题分割为两部分:
1,按时间段从MySql导入到 InfluxDB (之前已完成)
2, 如何用Thread pool 交替打印时间段,长度为5分钟
实现:
/**
* ClassName: DBTransporter
* Function: TODO
* Date: 2019-07-02 17:13
* author daguang
* version V1.0
*/
@Slf4j
@Service
public class DBTransporter {
private static AtomicInteger curMinis = new AtomicInteger(0);
private static final int stepM = 5;//分钟
private static final int CORE_NUM = Runtime.getRuntime().availableProcessors() * 2;
private static Semaphore semaphore = new Semaphore(CORE_NUM);
private static Lock rLock = new ReentrantLock(true);
private static final int THREAD_COUNTS = CORE_NUM;
@Value("${db.time_start}")
private String allBeginTime;//总起点
@Value("${db.time_end}")
private String allEndTime;
private static long totalMinutes;
private static final DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
//总传输条数
private static AtomicInteger totalTransCnts = new AtomicInteger();
private static volatile boolean isAllFinshed;
@Autowired
DBExporter dbExporter;
/**
* 并发传输
* 5分钟一段
*/
public void doTransConcurrent() {
printEnv();
log.info("THREAD_COUNTS: " + THREAD_COUNTS);
LocalDateTime beginTime = LocalDateTime.parse(allBeginTime, formatter);
LocalDateTime endTime = LocalDateTime.parse(allEndTime, formatter);
totalMinutes = ChronoUnit.MINUTES.between(beginTime, endTime);
log.info("allBeginTime:{} allEndTime:{} totalMinutes:{}", allBeginTime, allEndTime, totalMinutes);
scheduleTrans(totalMinutes, dbExporter);
}
public void scheduleTrans(long totalMinutes, TransTask transTask) {
LocalDateTime allTimeBegin = LocalDateTime.parse(allBeginTime, formatter);
Executors.newFixedThreadPool(2);
ThreadPoolExecutor threadPool = new ThreadPoolExecutor(THREAD_COUNTS, THREAD_COUNTS, 0, TimeUnit.MILLISECONDS, new LinkedBlockingDeque<>());
while (!isAllFinshed) {
try {
semaphore.acquire();//控制提交的任务数量 超过则阻塞等待(外层仅一个主线程)
log.info("queueSize:{}", threadPool.getQueue().size());
/**
* 当前thread自身起止时间
* 按5分钟步进
*
* lock 保证当前只有一个线程修改 计数
*
*/
rLock.lock();
// log.info("======={} curMinis:{}",Thread.currentThread().getName(), curMinis.get());
// 从头加到当前分钟
LocalDateTime startTime = allTimeBegin.plusMinutes(curMinis.get());
// 步进5分钟
LocalDateTime endTime = startTime.plusMinutes(stepM);
curMinis.addAndGet(stepM);//其他线程再取是下一段了
//起点大于总分钟数,则当前执行即最后一段了 例如 :total 12 cur 10~15
isAllFinshed = curMinis.get() >= totalMinutes;
rLock.unlock();
threadPool.submit(new Runnable() {
@Override
public void run() {
log.info("---thread:{} started", Thread.currentThread().getName());
semaphore.release();//直到启动才释放手里的 启动权 通行证
long _st = System.currentTimeMillis();
int transed = transTask.doTrans(startTime, endTime);
totalTransCnts.addAndGet(transed);
log.info("---done tans:{}~{} time:{}ms cnt:{}", startTime.format(formatter), endTime.format(formatter), (System.currentTimeMillis() - _st), transed);
}
});
} catch (InterruptedException e) {
e.printStackTrace();
}
}
threadPool.shutdown();
//wait until all finished
try {
boolean isFinished = threadPool.awaitTermination(Long.MAX_VALUE, TimeUnit.HOURS);
log.info("isFinished: {}", isFinished);
} catch (InterruptedException e) {
e.printStackTrace();
}
log.info("all finished: {}条", totalTransCnts.get());
}
private void printEnv() {
log.info("host: {} db: {} Influx measure: {}", DBConstants.DB_HOST, DBConstants.DB_NAME, DBConstants.MEASUREMENT_NAME);
}
}
输出:
THREAD_COUNTS: 16
totalMinutes:20
21:02:22.970 [pool-1-thread-4] INFO com.testinflux.export.DBTransporter - pool-1-thread-4 curMinis:20 start:2019-02-12 00:15:00 end:2019-02-12 00:20:00
21:02:22.970 [pool-1-thread-2] INFO com.testinflux.export.DBTransporter - pool-1-thread-2 curMinis:10 start:2019-02-12 00:05:00 end:2019-02-12 00:10:00
21:02:22.970 [pool-1-thread-3] INFO com.testinflux.export.DBTransporter - pool-1-thread-3 curMinis:15 start:2019-02-12 00:10:00 end:2019-02-12 00:15:00
21:02:22.970 [pool-1-thread-1] INFO com.testinflux.export.DBTransporter - pool-1-thread-1 curMinis:5 start:2019-02-12 00:00:00 end:2019-02-12 00:05:00
Process finished with exit code 0