记-从Doris大表整成csv文件到本地的过程

任务明细

任务是需要同步服务器上的一张大表里面的数据整理成csv的方式到本地,表数据量1.7个亿。

跑数

因为连接公司内网不稳定,故使用公司服务器跑数
因为数据量和csv大小不成线性关系,更像指数递增,故切分为小文件来做

代码

java代码如下

import java.io.FileWriter;
import java.io.IOException;
import java.sql.*;

public class hello1 {
    private static final int PAGE_SIZE = 1000000; // 每页查询的记录数

    public static void main(String[] args) {
        String jdbcUrl = "jdbc:mysql://A.B.C.D:9034/aps_hbyc";
        String username = "root";
        String password = "123456";

        String sql = "SELECT * FROM hbyc_ddxx1  ORDER BY cust_id ,data_mth,prd_id ASC LIMIT ?, ?";

        try (Connection connection = DriverManager.getConnection(jdbcUrl, username, password);
             PreparedStatement statement = connection.prepareStatement(sql)) {

            int page = 30;
            int fileCount = 31;

            while (true) {
                // 设置分页参数
                int start = page * PAGE_SIZE;
                statement.setInt(1, start);
                statement.setInt(2, PAGE_SIZE);

                try (ResultSet resultSet = statement.executeQuery()) {
                    if (!resultSet.next()) {
                        break; // 没有更多数据,结束循环
                    }

                    String fileName = "/Users/songhuaitang/Downloads/result/data" + fileCount + ".csv";
                    FileWriter writer = new FileWriter(fileName);

                    // 写入CSV文件的标题行(如果需要)
                    writer.append("cust_id,data_mth,prd_id,xq_cnt,cl_cnt,dd_cnt"); // 替换为实际的列名
                    writer.append("\n");

                    int recordCount = 0;
                    do {
                        String column1Value = Long.toString(resultSet.getLong("cust_id")); // 替换为实际的列名
                        String column2Value = resultSet.getString("data_mth");
                        String column3Value = resultSet.getString("prd_id");
                        String column4Value = resultSet.getString("xq_cnt");
                        String column5Value = resultSet.getString("cl_cnt");
                        String column6Value = resultSet.getString("dd_cnt");

                        writer.append(column1Value)
                                .append(",")
                                .append(column2Value)
                                .append(",")
                                .append(column3Value)
                                .append(",")
                                .append(column4Value)
                                .append(",")
                                .append(column5Value)
                                .append(",")
                                .append(column6Value)
                                .append("\n");

                        recordCount++;

                        if (recordCount % PAGE_SIZE == 0) {
                            writer.flush();
                        }
                    } while (resultSet.next());

                    writer.flush();
                    writer.close();

                    fileCount++;
                }

                page++;
            }

            System.out.println("Data exported successfully.");
        } catch (SQLException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

代码输出的结果为177个100万的csv文件
然后打包输出到本地
在本地电脑上打开termterxxx
输入

cd 到目录下执行
awk '(NR == 1) || (FNR > 1)' data*.csv > merged.csv

然后就得到一个大的csv文件

文件验证

数据量验证
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;

public class datarollback {
    public static void main(String[] args) {

        String csvFile = "/Users/songhuaitang/Downloads/root/result/merged.csv";
        int rowCount = 0;

        try (BufferedReader br = new BufferedReader(new FileReader(csvFile))) {

            rowCount = (int) br.lines().parallel().count();

            System.out.println("Total number of data rows: " + rowCount);

        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

文件内容验证

import java.io.BufferedReader;
        import java.io.FileReader;
        import java.io.IOException;

public class csv_print {
    public static void main(String[] args) {

        String csvFile = "/Users/songhuaitang/Downloads/root/result/merged.csv";
        int rowCount = 0;

        try (BufferedReader br = new BufferedReader(new FileReader(csvFile))) {
            String line;
            while ((line = br.readLine()) != null && rowCount < 10) {
                System.out.println(line);
                rowCount++;
            }

            System.out.println("Total number of data rows: " + rowCount);

        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

ok ```

你可能感兴趣的:(java,doris,sql)