org.apache.poi
poi-ooxml
5.2.1
org.apache.poi
poi
5.2.1
jxl
jxl
1.0
commons-io
commons-io
2.11.0
注意:当前excel和commons-io版本都是较较新版本,而commons-io在spark的jars安装目录下也在commons-io的包,如版本冲突,找不到org\apache\commons\io\output\ByteArrayOutputStream.class。如果spark的是2.4或者更低版本,则找不到org\apache\commons\io\output\UnsynchronizedByteArrayOutputStream.class,请同步spark的版本的当前应用包的即可!
当前是使用spark的本地聚集方法,Iterator
/**
* 生成文件导出
*
* @param sparkSession sparkSession
* @param paramsMap 参数
*/
public static void sqlAccessFileExport(SparkSession sparkSession, Map paramsMap) {
Long downloadTaskId = MapParamUtil.getLongValue(paramsMap, "downloadTaskId");
String taskName = MapParamUtil.getStringValue(paramsMap, "taskName");
String fileType = MapParamUtil.getStringValue(paramsMap, "fileType");
String waterMark = MapParamUtil.getStringValue(paramsMap, "waterMark");
String tmpDir = MapParamUtil.getStringValue(paramsMap, "tmpDir");
String receiveClient = MapParamUtil.getStringValue(paramsMap, "receiveClient");
Map metaData = (Map) paramsMap.get("metaData");
// 文件名称
String fileName = UUID.randomUUID().toString().concat(".").concat(fileType);
if (!tmpDir.endsWith("/")) {
tmpDir = tmpDir.concat("/");
}
String fileLocation = tmpDir.concat(fileName);
Map returnMap = new HashMap(10);
returnMap.put("downloadTaskId", downloadTaskId);
returnMap.put("method", "sqlAccessFileExportResult");
try {
long start = System.currentTimeMillis();
Dataset dataset = SqlAccessFile.getDataset(sparkSession, paramsMap);
// 如果是excel文件,创建方式不同
if ("xlsx".equalsIgnoreCase(fileType) || "xls".equalsIgnoreCase(fileType)) {
SqlAccessFile.createExcel(dataset, fileLocation, taskName, waterMark, metaData);
}
else {
// 创建csv文件
SqlAccessFile.createCsvOrTxt(dataset, fileLocation, metaData);
}
long end = System.currentTimeMillis();
returnMap.put("resultCode", KeyValues.SUCCESS);
returnMap.put("resultDesc", "PTO label run success:" + (end - start) + "ms");
returnMap.put("total", dataset.count());
returnMap.put("fileLocation", fileLocation);
SendMessageUtil.sendMessage(receiveClient, returnMap);
}
catch (Exception e) {
logger.error(e.getMessage(), e);
returnMap.put("method", "sqlAccessFileExportResult");
returnMap.put("resultCode", KeyValues.FAIL);
returnMap.put("resultDesc", e.getMessage());
SendMessageUtil.sendMessage(receiveClient, returnMap);
}
}
/**
* 获取数据集
*
* @param sparkSession sparkSession
* @param paramsMap 参数
* @return Dataset
*/
private static Dataset getDataset(SparkSession sparkSession, Map paramsMap) {
String datasourceType = MapParamUtil.getStringValue(paramsMap, "datasourceType");
String url = MapParamUtil.getStringValue(paramsMap, "url");
String username = MapParamUtil.getStringValue(paramsMap, "username");
String password = MapParamUtil.getStringValue(paramsMap, "password");
String driver = MapParamUtil.getStringValue(paramsMap, "driver");
String sql = MapParamUtil.getStringValue(paramsMap, "sql");
Long downloadTaskId = MapParamUtil.getLongValue(paramsMap, "downloadTaskId");
if ("hive".equalsIgnoreCase(datasourceType)) {
Dataset dataset = sparkSession.sql(sql);
return dataset;
}
else {
Dataset dataset = sparkSession.read().format("jdbc")
// 地址
.option("url", url)
// 用户名
.option("user", username)
// 密码
.option("password", new DesCryptUtil(DesCryptUtil.simpleKey).decrypt(password))
// 驱动
.option("driver", driver)
// 生成临时表查询逻辑中,查询逻辑
.option("dbtable", "(" + sql + ") as temp_" + downloadTaskId).load();
return dataset;
}
}
/**
* 创建excel文件
*
* @param dataset 数据集
* @param fileLocation 文件路径
* @param metaData 元数据翻译
*/
private static void createExcel(Dataset dataset, String fileLocation, String taskName, String waterMark,
Map metaData) {
// 自动关闭流
try (Workbook workbook = new XSSFWorkbook();
FileOutputStream fileOutputStream = new FileOutputStream(fileLocation);) {
CellStyle headStyle = workbook.createCellStyle();
headStyle.setAlignment(HorizontalAlignment.CENTER);
headStyle.setVerticalAlignment(VerticalAlignment.CENTER);
headStyle.setFillPattern(FillPatternType.SOLID_FOREGROUND);
headStyle.setFillForegroundColor(IndexedColors.GREY_25_PERCENT.getIndex());
// 创建工作薄,写入列头
Sheet sheet = workbook.createSheet(taskName);
org.apache.poi.ss.usermodel.Row excelRow = sheet.createRow(0);
String[] headersNames = dataset.columns();
for (int i = 0; i < headersNames.length; i++) {
Cell cell = excelRow.createCell(i);
String headersName = headersNames[i];
cell.setCellValue(MapParamUtil.getStringValue(metaData, headersName, headersName));
}
// 抽样数据到本地
Iterator localIterator = dataset.toLocalIterator();
while (localIterator.hasNext()) {
Row row = localIterator.next();
excelRow = sheet.createRow(sheet.getLastRowNum() + 1);
for (int i = 0; i < headersNames.length; i++) {
Cell cell = excelRow.createCell(i);
cell.setCellValue(String.valueOf(row.get(i)));
}
}
// 水印输出处理
if (StringUtil.isNotEmpty(waterMark)) {
ExcelWaterRemarkUtils.painWaterMarkToWorld((XSSFWorkbook) workbook, waterMark);
}
// 生成文件
workbook.write(fileOutputStream);
fileOutputStream.flush();
}
catch (Exception e) {
logger.error(e.getMessage(), e);
throw new SparkException(e.getMessage(), e);
}
}
/**
* 生成csv文件或者txt
*
* @param dataset 数据集
* @param fileLocation 文件路径
* @param metaData 元数据翻译
*/
private static void createCsvOrTxt(Dataset dataset, String fileLocation, Map metaData) {
try (FileOutputStream fileOutputStream = new FileOutputStream(fileLocation);
OutputStreamWriter outputStreamWriter = new OutputStreamWriter(fileOutputStream, "UTF-8");
BufferedWriter bufferedWriter = new BufferedWriter(outputStreamWriter);) {
// 获取列头,进行元数据翻译操作
String[] columns = dataset.columns();
List headersNames = new ArrayList<>(20);
for (int i = 0; i < columns.length; i++) {
String columnCode = columns[i];
headersNames.add(MapParamUtil.getStringValue(metaData, columnCode, columnCode));
}
// 写入列头,用逗号分隔
bufferedWriter.write(StringUtil.join(headersNames, ","));
bufferedWriter.newLine();
// 抽样数据到本地
Iterator localIterator = dataset.toLocalIterator();
while (localIterator.hasNext()) {
Row row = localIterator.next();
// 竖线分隔
String mkString = row.mkString(",");
bufferedWriter.write(mkString);
bufferedWriter.newLine();
}
bufferedWriter.flush();
}
catch (Exception e) {
logger.error(e.getMessage(), e);
throw new SparkException(e.getMessage(), e);
}
}
{
"method": "sqlAccessFileExport",
"appName": "xxx分析_develop_6924_2_20230420",
"receiveClient": "http://xxx.xx.xx.xxx:8559/dataservice/",
"url": "jdbc:postgresql://xxx.xx.xx.xx:5432/bigdata?gssEncMode\u003ddisable\u0026reWriteBatchedInserts\u003dtrue",
"sql": "select product_name,charge_zy_sum,calculate979,product_name_count from smart_test.app_1681284022118 limit 100",
"downloadTaskId": 42,
"datasourceType": "gp",
"tmpDir": "/home/xxxxx/xxxx/xxxxx/shuxkaiftp",
"metaData": {
"charge_zy_sum": "主营收入(万)",
"product_name_count": "产品名称",
"calculate979": "计算字段979",
"product_name": "产品名称(产品维度)"
},
"password": "2047d192e697a909",
"driver": "org.postgresql.Driver",
"taskName": "xxx分析_develop_6924_2_20230420",
"waterMark": "xxxx分析-(xxxuser) 2023-04-29 23:06:02",
"fileType": "xlsx",
"username": "gpxxxx"
}