【Hadoop-Cos】存储对象Cos通过Java-SDK获取文件数以及数据量大小

package gaei.cn.x5l.x5lhive2cos.utils;

import com.qcloud.cos.COSClient;
import com.qcloud.cos.ClientConfig;
import com.qcloud.cos.auth.BasicCOSCredentials;
import com.qcloud.cos.auth.COSCredentials;
import com.qcloud.cos.exception.CosClientException;
import com.qcloud.cos.exception.CosServiceException;
import com.qcloud.cos.http.HttpProtocol;
import com.qcloud.cos.model.COSObjectSummary;
import com.qcloud.cos.model.ListObjectsRequest;
import com.qcloud.cos.model.ObjectListing;
import com.qcloud.cos.region.Region;

import java.io.*;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.List;

public class CosFileCountAndDataSize {
    transient static String dbName = "database";
    transient static String tableName = "table_name";
    transient static String preDir = "/user/x5l/hive/";
    transient static String sufFile = ".txt";
    transient static String separator = "/";
    transient static String localPreDir = "C:/Users/MECHREVO/Desktop/新建文件夹/history_partitions/";

    //Mysql连接信息系
    private final static String MYSQL_URL = "jdbc:mysql://10.1.1.1:3316/test?useSSL=false";
    private final static String MYSQL_USERNAME = "root";
    private final static String MYSQL_PASSWORD = "123456";
    //Mysql目标
    private final static String MYSQL_TABLE = "`test`.`test`";

    private static Connection conn = null;
    private static ResultSet rs = null;
    private static PreparedStatement ps = null;

    private final static String BUCKET_NAME = "pro-x5l-1111111111";
    private final static String SECRET_ID = "****************************";
    private final static String SECRET_KEY = "****************************";
    private final static String REGION = "ap-guangzhou";
    private final static int MAXKEYS = 10000;
    private final static String CHART_SET = "GBK";

    transient static COSClient cosClient = null;
    transient static ListObjectsRequest listObjectsRequest = null;

    public static void main(String[] args) {

        BufferedReader reader = null;
        FileWriter fw = null;
        String partitionDir = null;
        String sampleDate = null;
        int line = 1;
        long startTime = 0L;
        List<String> sampleDataList = new ArrayList<>();
        try {
            reader = new BufferedReader(new InputStreamReader(new FileInputStream(new File(localPreDir + "需要统计的sample_date" + separator + tableName + sufFile)), CHART_SET));
            while ((sampleDate = reader.readLine()) != null) {
                sampleDataList.add(sampleDate);
                line++;
            }
            System.out.println(sampleDataList);
            fw = new FileWriter(new File(localPreDir + "cos数据量统计" + separator + tableName + sufFile));
            // 1 初始化用户身份信息(secretId, secretKey)。
            // prefix 表示列出的 object 的 key 以 prefix 开始(以/user/x5l/hive/ods_x5l/ods_hive_gb_and_bms_gb/开头桶下的所有绝对路径)所有的文件信息都是基于这里给的目录下
            COSCredentials cred = new BasicCOSCredentials(SECRET_ID, SECRET_KEY);
            // 2 设置 bucket 的地域。
            Region region = new Region(REGION);
            ClientConfig clientConfig = new ClientConfig(region);
            // 这里建议设置使用 https 协议(从 5.6.54 版本开始,默认使用了 https)。
            clientConfig.setHttpProtocol(HttpProtocol.https);
            // 3 生成 cos 客户端。
            cosClient = new COSClient(cred, clientConfig);
            // 4 遍历 bucket 目录
            listObjectsRequest = new ListObjectsRequest();
            // 设置 bucket 名称
            listObjectsRequest.setBucketName(BUCKET_NAME);
            for (String date : sampleDataList) {
                startTime = System.currentTimeMillis();
                long fileSize = 0L;
                //定义文件计数器,由于自身目录也会记作总数,所以初始值定义为-1
                int count = -1;
                reader = new BufferedReader(new InputStreamReader(new FileInputStream(new File(localPreDir + "分区详情" + separator + tableName + sufFile)), CHART_SET));
                while ((partitionDir = reader.readLine()) != null) {
                    if (partitionDir.contains(date)) {
                        // /user/x5l/hive/ods_x5l/ods_original_data/sample_date=20220101/datatype=2/sourcetopic=can_packet/partition_name=0/
                        listObjectsRequest.setPrefix(preDir + dbName + separator + tableName + separator + partitionDir + separator);
//                        System.out.println(preDir + dbName + separator + tableName + separator + partitionDir + separator);
                        // deliter 表示分隔符, 设置为/表示列出当前目录下的 object, 设置为空表示列出所有的 object
                        listObjectsRequest.setDelimiter(separator);
                        // 设置最大遍历出多少个对象, 一次 listobject 最大支持1000
                        listObjectsRequest.setMaxKeys(MAXKEYS);
                        ObjectListing objectListing = null;
                        do {
                            try {
                                objectListing = cosClient.listObjects(listObjectsRequest);
                            } catch (CosServiceException e) {
                                e.printStackTrace();
                                return;
                            } catch (CosClientException e) {
                                e.printStackTrace();
                                return;
                            }
                            // object summary 表示所有列出的 object 列表, 最大1000
                            List<COSObjectSummary> cosObjectSummaries = objectListing.getObjectSummaries();
//                            System.out.println("object列表文件数量:"+cosObjectSummaries.size());
                            for (COSObjectSummary cosObjectSummary : cosObjectSummaries) {
                                // 文件的长度
                                fileSize += cosObjectSummary.getSize();
                                count++;
//                                System.out.println(date + ":第 " + count + " 次循环");
                            }
                            String nextMarker = objectListing.getNextMarker();
                            listObjectsRequest.setMarker(nextMarker);
                        } while (objectListing.isTruncated());
                    }
                    line++;
                }
                fw.write(date + separator + count + separator + fileSize + "\n");
                fw.flush();
                System.out.println("计算日期:" + date + " - 文件总数:" + count + " - 数据量大小:" + fileSize + " Bytes " + "(" + new DecimalFormat("0.00").format((double) fileSize / 1024 / 1024 / 1024) + " Gb - " + new DecimalFormat("0.00").format((double) fileSize / 1024 / 1024 / 1024 / 1024) + " Tb" + ")" + " - 用时:" + new DecimalFormat("0.00").format((double) (System.currentTimeMillis() - startTime) / 1000) + "s");
            }
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                if (reader != null) {
                    reader.close();
                    System.out.println("reader资源释放完成");
                }
                if (fw != null) {
                    fw.close();
                    System.out.println("fw资源释放完成");
                }
                cosClient.shutdown();
                System.out.println("cosClient资源释放完成");
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

你可能感兴趣的:(Hadoop,hadoop,大数据,分布式)