大文件导出

关于大文件导出的优化迭代情况如下:
计算机配置:四核16G内存
初始版本为单线程单文件导出文件,mybatis读 opencsv写,耗时将近三小时;
第一轮优化改为多线程单文件,提高读数据效率,时间仅缩减十分钟;
第二轮改为多线程多文件,提高写文件效率,时间缩减一个半小时;
第三轮使用 Mybatis 流式查询,并改用 Map 封装数据,提高内存利用率,时间缩减十分钟;
第四轮弃用 Mybatis ,改用原生 JDBC 获取数据并直接拼接,时间缩减十分钟;
第五轮弃用 opencsv ,改用 BufferWriter 直接写数据,时间缩减十分钟;

2023-04-23 22:01:30 [main] INFO  WriteData - 单线程单文件 total time in 258s
2023-04-23 22:02:44 [main] INFO  WriteData - 固定线程单文件 total time in 74s
2023-04-23 22:03:40 [main] INFO  WriteData - 固定线程多文件 total time in 55s
2023-04-23 22:04:18 [main] INFO  WriteData - concurrentWrite total time in 37s
2023-04-23 22:26:28 [Thread-1] INFO  WriteData - query in 42s
2023-04-23 22:26:28 [Thread-3] INFO  WriteData - query in 42s
2023-04-23 22:26:28 [Thread-4] INFO  WriteData - query in 42s
2023-04-23 22:26:28 [Thread-6] INFO  WriteData - query in 42s
2023-04-23 22:26:28 [Thread-7] INFO  WriteData - query in 42s
2023-04-23 22:26:28 [Thread-2] INFO  WriteData - query in 42s
2023-04-23 22:26:28 [Thread-5] INFO  WriteData - query in 42s
2023-04-23 22:26:30 [Thread-0] INFO  WriteData - query in 44s

2023-04-23 22:27:00 [Thread-5] INFO  WriteData - write in 31s
2023-04-23 22:27:00 [Thread-1] INFO  WriteData - write in 31s
2023-04-23 22:27:00 [Thread-7] INFO  WriteData - write in 31s
2023-04-23 22:27:00 [Thread-2] INFO  WriteData - write in 31s
2023-04-23 22:27:00 [Thread-3] INFO  WriteData - write in 32s
2023-04-23 22:27:00 [Thread-6] INFO  WriteData - write in 32s
2023-04-23 22:27:00 [Thread-4] INFO  WriteData - write in 32s
2023-04-23 22:27:01 [Thread-0] INFO  WriteData - write in 31s

2023-04-23 22:27:01 [main] INFO  WriteData - 固定线程单文件 total time in 75s

2023-04-23 22:27:24 [Thread-14] INFO  WriteData - query in 22s
2023-04-23 22:27:24 [Thread-13] INFO  WriteData - query in 22s
2023-04-23 22:27:24 [Thread-12] INFO  WriteData - query in 22s
2023-04-23 22:27:24 [Thread-9] INFO  WriteData - query in 22s
2023-04-23 22:27:24 [Thread-11] INFO  WriteData - query in 22s
2023-04-23 22:27:24 [Thread-10] INFO  WriteData - query in 22s
2023-04-23 22:27:24 [Thread-15] INFO  WriteData - query in 22s
2023-04-23 22:27:25 [Thread-8] INFO  WriteData - query in 23s

2023-04-23 22:27:55 [Thread-12] INFO  WriteData - write in 31s
2023-04-23 22:27:55 [Thread-14] INFO  WriteData - write in 31s
2023-04-23 22:27:55 [Thread-9] INFO  WriteData - write in 31s
2023-04-23 22:27:55 [Thread-11] INFO  WriteData - write in 31s
2023-04-23 22:27:55 [Thread-13] INFO  WriteData - write in 31s
2023-04-23 22:27:56 [Thread-15] INFO  WriteData - write in 31s
2023-04-23 22:27:56 [Thread-10] INFO  WriteData - write in 31s
2023-04-23 22:27:56 [Thread-8] INFO  WriteData - write in 31s

2023-04-23 22:27:56 [main] INFO  WriteData - 固定线程多文件 total time in 54s
import com.zaxxer.hikari.HikariConfig;
import com.zaxxer.hikari.HikariDataSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.sql.*;
import java.time.Duration;
import java.time.LocalDate;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;

public class WriteData {

    static final Logger log = LoggerFactory.getLogger(WriteData.class);
    public static final String PARENT_PATH = "C:\\Users\\qiu01\\Desktop\\server\\docker\\mysql\\master\\data\\stu_data\\";
    public static final String URL = "jdbc:mysql://localhost:3307/stu?allowPublicKeyRetrieval=TRUE&useCursorFetch=true";
    public static final String USERNAME = "root";
    public static final String PASSWORD = "123456";
    public static final String SQL = "SELECT * FROM student WHERE id > ? AND id <= ?";
    public static final int TOTAL = 10000000;

    public static final ThreadPoolExecutor POOL = new ThreadPoolExecutor(8, 9, 3, TimeUnit.SECONDS, new LinkedBlockingDeque<>());
    public static final HikariDataSource DS;

    static {
        HikariConfig config = new HikariConfig();

        config.setJdbcUrl(URL);
        config.setUsername(USERNAME);
        config.setPassword(PASSWORD);

        DS = new HikariDataSource(config);
    }


    public static void main(String[] args) {
    	// 单线程写文件
        singleThreadWrite();
        // 固定线程写同
        concurrentWriteWithFixedThread(true);
        concurrentWriteWithFixedThread(false);
        concurrentWrite();
    }

    public static void singleThreadWrite() {
        String file = PARENT_PATH + "file.csv";
        long start = System.currentTimeMillis();
        try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(Files.newOutputStream(Paths.get(file))));
             Connection connection = DS.getConnection();
             PreparedStatement stmt = connection.prepareStatement(SQL,ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);) {

            stmt.setFetchSize(10000);
            stmt.setFetchDirection(ResultSet.FETCH_REVERSE);
            stmt.setInt(1, 0);
            stmt.setInt(2, 10000000);

            ResultSet rs = stmt.executeQuery();
            writeToFile(writer, rs);
        } catch (SQLException | IOException e) {
            throw new RuntimeException(e);
        }
        log.info("单线程单文件 total time in {}s", getSeconds(start));
        emptyFolder();
    }


    private static void concurrentWriteWithFixedThread(boolean writeInOneFile) {
        int batch_size = 1250000;
        Thread[] threads = new Thread[TOTAL/batch_size];
        long start = System.currentTimeMillis();
        for (int i = 0; i < TOTAL; i = i + batch_size) {
            final int j = i;
            int no = i / batch_size;

            Thread t = new Thread(() -> {
                String file;
                if (writeInOneFile) {
                    file = PARENT_PATH + "file.csv";
                } else {
                    file = PARENT_PATH + "file_" + no + ".csv";
                }

                try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file, true)));
                     Connection connection = DS.getConnection();
                     PreparedStatement stmt = connection.prepareStatement(SQL, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
                ) {
                    stmt.setFetchSize(10000);
                    stmt.setFetchDirection(ResultSet.FETCH_REVERSE);
                    stmt.setInt(1, j);
                    stmt.setInt(2, j + batch_size);
                    long queryStart = System.currentTimeMillis();
                    try (ResultSet rs = stmt.executeQuery()) {
                        log.info("query in {}s", getSeconds(queryStart));
                        long writeStart = System.currentTimeMillis();
                        writeToFile(writer, rs);
                        log.info("write in {}s", getSeconds(writeStart));
                    }
                } catch (SQLException | IOException e) {
                    throw new RuntimeException(e);
                }
            });
            t.start();
            threads[no] = t;
        }
        for (Thread t : threads) {
            try {
                t.join();
            } catch (InterruptedException e) {
                throw new RuntimeException(e);
            }
        }
        if (writeInOneFile) {
            log.info("固定线程单文件 total time in {}s", getSeconds(start));
        } else {
            log.info("固定线程多文件 total time in {}s", getSeconds(start));
        }
//        emptyFolder();
    }

    private static void concurrentWrite() {
        int batch_size = 10000;
        CompletableFuture<Void>[] futures = new CompletableFuture[TOTAL/batch_size];
        long start = System.currentTimeMillis();
        for (int i = 0; i < TOTAL; i = i + batch_size) {
            final int j = i;
            int no = i / batch_size;

            CompletableFuture<Void> t = CompletableFuture.runAsync(() -> {
                String file = PARENT_PATH + "file_" + no + ".csv";

                try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(Files.newOutputStream(Paths.get(file))));
                     Connection connection = DS.getConnection();
                     PreparedStatement stmt = connection.prepareStatement(SQL,ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
                     ) {
                    stmt.setInt(1, j);
                    stmt.setInt(2, j + batch_size);
                    try (ResultSet rs = stmt.executeQuery()){
                        writeToFile(writer, rs);
                    }
                } catch (SQLException | IOException e) {
                    throw new RuntimeException(e);
                }
            },POOL);
            futures[no] = t;
        }
        CompletableFuture.allOf(futures).join();
        log.info("多线程多文件 total time in {}s", getSeconds(start));
        POOL.shutdown();
        emptyFolder();
    }

    private static void emptyFolder() {
        File file = new File(PARENT_PATH);
        File[] files = file.listFiles();
        for (File f : files) {
            f.delete();
        }
    }

    private static void writeToFile(BufferedWriter writer, ResultSet rs) throws SQLException, IOException {
        StringBuilder builder = new StringBuilder();
        while (rs.next()) {
            String firstName = rs.getString("first_name");
            String lastName = rs.getString("last_name");
            LocalDate dob = rs.getDate("date_of_birth").toLocalDate();
            String gender = rs.getString("gender");
            String email = rs.getString("email");
            String phone = rs.getString("phone_number");
            String address = rs.getString("address");
            String city = rs.getString("city");
            String state = rs.getString("state");
            String zip = rs.getString("zip_code");
            String country = rs.getString("country");
            String nationality = rs.getString("nationality");
            String religion = rs.getString("religion");
            String emergencyContactName = rs.getString("emergency_contact_name");
            String emergencyContactPhone = rs.getString("emergency_contact_phone_number");
            String guardianName = rs.getString("guardian_name");
            String guardianPhone = rs.getString("guardian_phone_number");
            String highSchoolName = rs.getString("high_school_name");
            double highSchoolGpa = rs.getDouble("high_school_gpa");
            int highSchoolGradYear = rs.getInt("high_school_graduation_year");
            String major = rs.getString("major");
            String degreeLevel = rs.getString("degree_level");
            String enrollmentStatus = rs.getString("enrollment_status");

            builder.append(firstName).append("|");
            builder.append(lastName).append("|");
            builder.append(dob).append("|");
            builder.append(gender).append("|");
            builder.append(email).append("|");
            builder.append(phone).append("|");
            builder.append(address).append("|");
            builder.append(city).append("|");
            builder.append(state).append("|");
            builder.append(zip).append("|");
            builder.append(country).append("|");
            builder.append(nationality).append("|");
            builder.append(religion).append("|");
            builder.append(emergencyContactName).append("|");
            builder.append(emergencyContactPhone).append("|");
            builder.append(guardianName).append("|");
            builder.append(guardianPhone).append("|");
            builder.append(highSchoolName).append("|");
            builder.append(highSchoolGpa).append("|");
            builder.append(highSchoolGradYear).append("|");
            builder.append(major).append("|");
            builder.append(degreeLevel).append("|");
            builder.append(enrollmentStatus).append("\n");

            writer.write(builder.toString());
            builder.delete(0, builder.length());
        }
    }

    private static long getSeconds(long start) {
        return Duration.ofMillis(System.currentTimeMillis() - start).getSeconds();
    }
}

你可能感兴趣的:(java,mysql,mybatis)