关于大文件导出的优化迭代情况如下:
计算机配置:四核16G内存
初始版本为单线程单文件导出文件,mybatis读 opencsv写,耗时将近三小时;
第一轮优化改为多线程单文件,提高读数据效率,时间仅缩减十分钟;
第二轮改为多线程多文件,提高写文件效率,时间缩减一个半小时;
第三轮使用 Mybatis 流式查询,并改用 Map 封装数据,提高内存利用率,时间缩减十分钟;
第四轮弃用 Mybatis ,改用原生 JDBC 获取数据并直接拼接,时间缩减十分钟;
第五轮弃用 opencsv ,改用 BufferWriter 直接写数据,时间缩减十分钟;
2023-04-23 22:01:30 [main] INFO WriteData - 单线程单文件 total time in 258s
2023-04-23 22:02:44 [main] INFO WriteData - 固定线程单文件 total time in 74s
2023-04-23 22:03:40 [main] INFO WriteData - 固定线程多文件 total time in 55s
2023-04-23 22:04:18 [main] INFO WriteData - concurrentWrite total time in 37s
2023-04-23 22:26:28 [Thread-1] INFO WriteData - query in 42s
2023-04-23 22:26:28 [Thread-3] INFO WriteData - query in 42s
2023-04-23 22:26:28 [Thread-4] INFO WriteData - query in 42s
2023-04-23 22:26:28 [Thread-6] INFO WriteData - query in 42s
2023-04-23 22:26:28 [Thread-7] INFO WriteData - query in 42s
2023-04-23 22:26:28 [Thread-2] INFO WriteData - query in 42s
2023-04-23 22:26:28 [Thread-5] INFO WriteData - query in 42s
2023-04-23 22:26:30 [Thread-0] INFO WriteData - query in 44s
2023-04-23 22:27:00 [Thread-5] INFO WriteData - write in 31s
2023-04-23 22:27:00 [Thread-1] INFO WriteData - write in 31s
2023-04-23 22:27:00 [Thread-7] INFO WriteData - write in 31s
2023-04-23 22:27:00 [Thread-2] INFO WriteData - write in 31s
2023-04-23 22:27:00 [Thread-3] INFO WriteData - write in 32s
2023-04-23 22:27:00 [Thread-6] INFO WriteData - write in 32s
2023-04-23 22:27:00 [Thread-4] INFO WriteData - write in 32s
2023-04-23 22:27:01 [Thread-0] INFO WriteData - write in 31s
2023-04-23 22:27:01 [main] INFO WriteData - 固定线程单文件 total time in 75s
2023-04-23 22:27:24 [Thread-14] INFO WriteData - query in 22s
2023-04-23 22:27:24 [Thread-13] INFO WriteData - query in 22s
2023-04-23 22:27:24 [Thread-12] INFO WriteData - query in 22s
2023-04-23 22:27:24 [Thread-9] INFO WriteData - query in 22s
2023-04-23 22:27:24 [Thread-11] INFO WriteData - query in 22s
2023-04-23 22:27:24 [Thread-10] INFO WriteData - query in 22s
2023-04-23 22:27:24 [Thread-15] INFO WriteData - query in 22s
2023-04-23 22:27:25 [Thread-8] INFO WriteData - query in 23s
2023-04-23 22:27:55 [Thread-12] INFO WriteData - write in 31s
2023-04-23 22:27:55 [Thread-14] INFO WriteData - write in 31s
2023-04-23 22:27:55 [Thread-9] INFO WriteData - write in 31s
2023-04-23 22:27:55 [Thread-11] INFO WriteData - write in 31s
2023-04-23 22:27:55 [Thread-13] INFO WriteData - write in 31s
2023-04-23 22:27:56 [Thread-15] INFO WriteData - write in 31s
2023-04-23 22:27:56 [Thread-10] INFO WriteData - write in 31s
2023-04-23 22:27:56 [Thread-8] INFO WriteData - write in 31s
2023-04-23 22:27:56 [main] INFO WriteData - 固定线程多文件 total time in 54s
import com.zaxxer.hikari.HikariConfig;
import com.zaxxer.hikari.HikariDataSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.sql.*;
import java.time.Duration;
import java.time.LocalDate;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
public class WriteData {
static final Logger log = LoggerFactory.getLogger(WriteData.class);
public static final String PARENT_PATH = "C:\\Users\\qiu01\\Desktop\\server\\docker\\mysql\\master\\data\\stu_data\\";
public static final String URL = "jdbc:mysql://localhost:3307/stu?allowPublicKeyRetrieval=TRUE&useCursorFetch=true";
public static final String USERNAME = "root";
public static final String PASSWORD = "123456";
public static final String SQL = "SELECT * FROM student WHERE id > ? AND id <= ?";
public static final int TOTAL = 10000000;
public static final ThreadPoolExecutor POOL = new ThreadPoolExecutor(8, 9, 3, TimeUnit.SECONDS, new LinkedBlockingDeque<>());
public static final HikariDataSource DS;
static {
HikariConfig config = new HikariConfig();
config.setJdbcUrl(URL);
config.setUsername(USERNAME);
config.setPassword(PASSWORD);
DS = new HikariDataSource(config);
}
public static void main(String[] args) {
// 单线程写文件
singleThreadWrite();
// 固定线程写同
concurrentWriteWithFixedThread(true);
concurrentWriteWithFixedThread(false);
concurrentWrite();
}
public static void singleThreadWrite() {
String file = PARENT_PATH + "file.csv";
long start = System.currentTimeMillis();
try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(Files.newOutputStream(Paths.get(file))));
Connection connection = DS.getConnection();
PreparedStatement stmt = connection.prepareStatement(SQL,ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);) {
stmt.setFetchSize(10000);
stmt.setFetchDirection(ResultSet.FETCH_REVERSE);
stmt.setInt(1, 0);
stmt.setInt(2, 10000000);
ResultSet rs = stmt.executeQuery();
writeToFile(writer, rs);
} catch (SQLException | IOException e) {
throw new RuntimeException(e);
}
log.info("单线程单文件 total time in {}s", getSeconds(start));
emptyFolder();
}
private static void concurrentWriteWithFixedThread(boolean writeInOneFile) {
int batch_size = 1250000;
Thread[] threads = new Thread[TOTAL/batch_size];
long start = System.currentTimeMillis();
for (int i = 0; i < TOTAL; i = i + batch_size) {
final int j = i;
int no = i / batch_size;
Thread t = new Thread(() -> {
String file;
if (writeInOneFile) {
file = PARENT_PATH + "file.csv";
} else {
file = PARENT_PATH + "file_" + no + ".csv";
}
try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file, true)));
Connection connection = DS.getConnection();
PreparedStatement stmt = connection.prepareStatement(SQL, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
) {
stmt.setFetchSize(10000);
stmt.setFetchDirection(ResultSet.FETCH_REVERSE);
stmt.setInt(1, j);
stmt.setInt(2, j + batch_size);
long queryStart = System.currentTimeMillis();
try (ResultSet rs = stmt.executeQuery()) {
log.info("query in {}s", getSeconds(queryStart));
long writeStart = System.currentTimeMillis();
writeToFile(writer, rs);
log.info("write in {}s", getSeconds(writeStart));
}
} catch (SQLException | IOException e) {
throw new RuntimeException(e);
}
});
t.start();
threads[no] = t;
}
for (Thread t : threads) {
try {
t.join();
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
if (writeInOneFile) {
log.info("固定线程单文件 total time in {}s", getSeconds(start));
} else {
log.info("固定线程多文件 total time in {}s", getSeconds(start));
}
// emptyFolder();
}
private static void concurrentWrite() {
int batch_size = 10000;
CompletableFuture<Void>[] futures = new CompletableFuture[TOTAL/batch_size];
long start = System.currentTimeMillis();
for (int i = 0; i < TOTAL; i = i + batch_size) {
final int j = i;
int no = i / batch_size;
CompletableFuture<Void> t = CompletableFuture.runAsync(() -> {
String file = PARENT_PATH + "file_" + no + ".csv";
try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(Files.newOutputStream(Paths.get(file))));
Connection connection = DS.getConnection();
PreparedStatement stmt = connection.prepareStatement(SQL,ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
) {
stmt.setInt(1, j);
stmt.setInt(2, j + batch_size);
try (ResultSet rs = stmt.executeQuery()){
writeToFile(writer, rs);
}
} catch (SQLException | IOException e) {
throw new RuntimeException(e);
}
},POOL);
futures[no] = t;
}
CompletableFuture.allOf(futures).join();
log.info("多线程多文件 total time in {}s", getSeconds(start));
POOL.shutdown();
emptyFolder();
}
private static void emptyFolder() {
File file = new File(PARENT_PATH);
File[] files = file.listFiles();
for (File f : files) {
f.delete();
}
}
private static void writeToFile(BufferedWriter writer, ResultSet rs) throws SQLException, IOException {
StringBuilder builder = new StringBuilder();
while (rs.next()) {
String firstName = rs.getString("first_name");
String lastName = rs.getString("last_name");
LocalDate dob = rs.getDate("date_of_birth").toLocalDate();
String gender = rs.getString("gender");
String email = rs.getString("email");
String phone = rs.getString("phone_number");
String address = rs.getString("address");
String city = rs.getString("city");
String state = rs.getString("state");
String zip = rs.getString("zip_code");
String country = rs.getString("country");
String nationality = rs.getString("nationality");
String religion = rs.getString("religion");
String emergencyContactName = rs.getString("emergency_contact_name");
String emergencyContactPhone = rs.getString("emergency_contact_phone_number");
String guardianName = rs.getString("guardian_name");
String guardianPhone = rs.getString("guardian_phone_number");
String highSchoolName = rs.getString("high_school_name");
double highSchoolGpa = rs.getDouble("high_school_gpa");
int highSchoolGradYear = rs.getInt("high_school_graduation_year");
String major = rs.getString("major");
String degreeLevel = rs.getString("degree_level");
String enrollmentStatus = rs.getString("enrollment_status");
builder.append(firstName).append("|");
builder.append(lastName).append("|");
builder.append(dob).append("|");
builder.append(gender).append("|");
builder.append(email).append("|");
builder.append(phone).append("|");
builder.append(address).append("|");
builder.append(city).append("|");
builder.append(state).append("|");
builder.append(zip).append("|");
builder.append(country).append("|");
builder.append(nationality).append("|");
builder.append(religion).append("|");
builder.append(emergencyContactName).append("|");
builder.append(emergencyContactPhone).append("|");
builder.append(guardianName).append("|");
builder.append(guardianPhone).append("|");
builder.append(highSchoolName).append("|");
builder.append(highSchoolGpa).append("|");
builder.append(highSchoolGradYear).append("|");
builder.append(major).append("|");
builder.append(degreeLevel).append("|");
builder.append(enrollmentStatus).append("\n");
writer.write(builder.toString());
builder.delete(0, builder.length());
}
}
private static long getSeconds(long start) {
return Duration.ofMillis(System.currentTimeMillis() - start).getSeconds();
}
}