多线程读取数据库300万数据,写入到redis

先说一下业务场景。

mysql单表300w条的数据,需要读取到rediis中。如果全部采用单线程的话效率过低,无法接受,因此考虑多线程并发处理。

期间踩了好多坑。

小伙伴们应该很好奇效率到底差多少,贴一下实验数据。

/** 
	 * 单线程读取300w数据库时间为  15s     添加链表中总时长17s
	 * 四线程读取300w数据库时间为 31ms左右   且添加链表总时长8s
	 *           
	 * 单线程存储redis为 7倍的差距
	 * 四线程存储写入redis为194s
	 * 
	 */ 


下面是我的代码部分

 
   
 @Test
    public void test123() throws InterruptedException {
        CountDownLatch count = new CountDownLatch(4);
        ActivitiService bean = SpringApplication.context.getBean(ActivitiService.class);
        CountDownLatch count2 = new CountDownLatch(4);
        long time1 = System.currentTimeMillis();
        Map map = new HashMap<>();
        System.out.println("开始读取数据库");
        new Thread(() -> {
            map.put("varId", getIncre());
            list.addAll(bean.queryEmps(map));
            new Thread(() -> {
                System.out.println(Thread.currentThread().getName() + "结束");
                count.countDown();
            }, "A").start();
            Map map = new HashMap<>();
            map.put("varId", getIncre());
            Map map = new HashMap<>();
            list.addAll(bean.queryEmps(map));
            System.out.println(Thread.currentThread().getName() + "结束");
            count.countDown();
        }, "B").start();
        new Thread(() -> {
            map.put("varId", getIncre());
            map.put("varId", getIncre());
            list.addAll(bean.queryEmps(map));
            System.out.println(Thread.currentThread().getName() + "结束");
            count.countDown();
        }, "C").start();
        new Thread(() -> {
            Map map = new HashMap<>();
            long time2 = System.currentTimeMillis();
            list.addAll(bean.queryEmps(map));
            System.out.println(Thread.currentThread().getName() + "结束");
            count.countDown();
        }, "D").start();
        count.await();
        System.out.println("读取数据库完毕");
        for (int i = 0; i < list.size() / 4; i++) {
//**********************************************************************************new Thread(() -> {System.out.println(Thread.currentThread().getName()+"准备开始写入");Jedis jedis = JedisPoolUtils.getJedis();
            System.out.println(Thread.currentThread().getName() + "准备开始写入");
            jedis.zadd(String.valueOf(list.get(i).getId()), list.get(i).getId(), MyJsonUtil.object_to_json(list.get(i)));
        }
        System.out.println(Thread.currentThread().getName() + "结束");
        count2.countDown();
    },"writeA").

    start();new

    Thread(() ->

    {
        System.out.println(Thread.currentThread().getName() + "结束");
        Jedis jedis = JedisPoolUtils.getJedis();
        for (int i = list.size() / 4; i < list.size() / 2; i++) {
            jedis.zadd(String.valueOf(list.get(i).getId()), list.get(i).getId(), MyJsonUtil.object_to_json(list.get(i)));
        }
        count2.countDown();
    },"writeB").

    start();
MyJsonUtil.object_to_json(list.get(i)));
new

    Thread(() ->

    {
        System.out.println(Thread.currentThread().getName() + "准备开始写入");
        Jedis jedis = JedisPoolUtils.getJedis();
        for (int i = list.size() / 2; i < list.size() * 3 / 4; i++) {
            jedis.zadd(String.valueOf(list.get(i).getId()), list.get(i).getId(),
        }
        jedis.zadd(String.valueOf(list.get(i).getId()), list.get(i).getId(),
                System.out.println(Thread.currentThread().getName() + "结束"); count2.countDown();
    },"writeC").

    start();new

    Thread(() ->

    {
        System.out.println(Thread.currentThread().getName() + "准备开始写入");
        Jedis jedis = JedisPoolUtils.getJedis();
        for (int i = list.size() * 3 / 4; i < list.size(); i++) {
            System.out.println("写入时间" + (time3 - time2));
            MyJsonUtil.object_to_json(list.get(i)));
        } System.out.println(Thread.currentThread().getName() + "结束");
        count2.countDown();
    },"writeD").

    start();count2.await();System.out.println("写入结束");
    long time3 = System.currentTimeMillis();System.out.println("读取时间"+(time2 -time1));
}



@Test
public void test123() throws InterruptedException {
CountDownLatch count = new CountDownLatch(4);
ActivitiService bean = SpringApplication.context.getBean(ActivitiService.class);
CountDownLatch count2 = new CountDownLatch(4);long time1 = System.currentTimeMillis();
Map map = new HashMap<>();
System.out.println("开始读取数据库");new Thread(() -> {map.put("varId", getIncre());list.addAll(bean.queryEmps(map));
new Thread(() -> {
System.out.println(Thread.currentThread().getName() + "结束");count.countDown();}, "A").start();Map map = new HashMap<>();map.put("varId", getIncre());
Map map = new HashMap<>();
list.addAll(bean.queryEmps(map));System.out.println(Thread.currentThread().getName() + "结束");count.countDown();}, "B").start();new Thread(() -> {map.put("varId", getIncre());
map.put("varId", getIncre());
list.addAll(bean.queryEmps(map));System.out.println(Thread.currentThread().getName() + "结束");count.countDown();}, "C").start();new Thread(() -> {Map map = new HashMap<>();
long time2 = System.currentTimeMillis();
list.addAll(bean.queryEmps(map));System.out.println(Thread.currentThread().getName() + "结束");count.countDown();}, "D").start();count.await();System.out.println("读取数据库完毕");
for (int i = 0; i < list.size() / 4; i++) {
//**********************************************************************************new Thread(() -> {System.out.println(Thread.currentThread().getName()+"准备开始写入");Jedis jedis = JedisPoolUtils.getJedis();
System.out.println(Thread.currentThread().getName()+"准备开始写入");
jedis.zadd(String.valueOf(list.get(i).getId()), list.get(i).getId(),MyJsonUtil.object_to_json(list.get(i)));}System.out.println(Thread.currentThread().getName()+"结束");count2.countDown();}, "writeA").start();new Thread(() -> {
System.out.println(Thread.currentThread().getName()+"结束");
Jedis jedis = JedisPoolUtils.getJedis();for (int i = list.size() / 4; i < list.size() / 2; i++) {jedis.zadd(String.valueOf(list.get(i).getId()), list.get(i).getId(),MyJsonUtil.object_to_json(list.get(i)));}count2.countDown();}, "writeB").start();
MyJsonUtil.object_to_json(list.get(i)));
new Thread(() -> {System.out.println(Thread.currentThread().getName()+"准备开始写入");Jedis jedis = JedisPoolUtils.getJedis();for (int i = list.size() / 2; i < list.size() * 3 / 4; i++) {jedis.zadd(String.valueOf(list.get(i).getId()), list.get(i).getId(),}
jedis.zadd(String.valueOf(list.get(i).getId()), list.get(i).getId(),
System.out.println(Thread.currentThread().getName()+"结束");count2.countDown();}, "writeC").start();new Thread(() -> {System.out.println(Thread.currentThread().getName()+"准备开始写入");Jedis jedis = JedisPoolUtils.getJedis();for (int i = list.size() * 3 / 4; i < list.size(); i++) {
System.out.println("写入时间" + (time3 - time2));
MyJsonUtil.object_to_json(list.get(i)));} System.out.println(Thread.currentThread().getName()+"结束");count2.countDown();}, "writeD").start();count2.await();System.out.println("写入结束");long time3 = System.currentTimeMillis();System.out.println("读取时间" + (time2 - time1));
}
public synchronized int getIncre() {
		Id = Id + 750000;
		return Id;
	}

采用JUNIT单元测试的环境。下面列举一下还记得的一些坑

1.线程计数器,await起到阻塞线程的作用。保证在生产者生产完成之后,也就是全部读取完数据库之后再开始读取。

2.第二个线程计数器的作用,在第一个程序计数器结束之后,若是新建线程进行读取的,主线程会提前结束。并不会等待新建的线程。所以第二个线程计数器的作用一方面是保证主线程的延续。

3.redis存储最常用的还是sorted set,也就是zset。

4.redis超时的问题,在初始化jedispool的时候需要指定超时时间,要不然默认的超时时间不够读取这么大数据。

5.map的线程安全问题,在使用map传值的时候忘记将map用作局部变量,导致查询结果不对。调试的时候要打印mybatis的sql才看出来这里的问题。

6.sql中用><比limit的速度要快,当有第二个><符号的时候别忘记转义,<

7.Id每次增加的时候不是原子操作,别忘记加同步

8.list是并发添加内容的,所以要用CopyOnWriteArrayList

9.其实这么大的表首先应该考虑分表的问题


你可能感兴趣的:(java,web,ssm,菜鸟程序员,线程,Mysql)