spark-submit --master local[*] --packages org.postgresql:postgresql:42.2.24 demo8-pgsql-jdbc.py
spark-submit --master yarn --packages org.postgresql:postgresql:42.2.24 demo8-pgsql-jdbc.py
hadoop fs -mkdir -p /aaa/bbb
这是因为该节点处于安全模式,可以采用强制退出的安全模式命令:
hadoop dfsadmin -safemode leave
hadoop dfsadmin -safemode get
如果无法关闭安全模式,就重启hadoop,再次关闭安全模式,可能就可以啦
spark-submit --master local --packages com.databricks:spark-xml_2.12:0.16.0,org.postgresql:postgresql:42.2.24 --py-files submit.zip csv_jdbc_two.py
import json
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, create_map, udf, lit, array, to_json
from pyspark.sql.types import FloatType, StructType, StructField, StringType, IntegerType
from derivedParameter._VRTG_STD import _VRTG
import time
import os
from utils.index import getValueFromXml, getKeyValue, getKeyValueTuple
os.environ["HADOOP_CONF_DIR"]="/bigdata/server/hadoop/etc/hadoop"
os.environ["YARN_CONF_DIR"]="/bigdata/server/hadoop/etc/hadoop"
if __name__ == '__main__':
# spark-submit --master local --packages com.databricks:spark-xml_2.12:0.16.0,org.postgresql:postgresql:42.2.24 --py-files submit.zip csv_jdbc_two.py
spark = SparkSession.builder.appName("测试生成导出参数") \
.config("spark.jars.packages", "com.databricks:spark-xml_2.12:0.16.0,org.postgresql:postgresql:42.2.24") \
.getOrCreate()
dictList = [{
"kyvCode": "#KP_VRTG_BNDRY",
"path": "hdfs://ecs-qar1-0002:8020/qar/keyValues/KP_VRTG_BNDRY_STD.xml"
}, {
"kyvCode": "#KP_VRTG_PCT",
"path": "hdfs://ecs-0002:8020/qar/keyValues/KP_VRTG_PCT_STD.xml"
}]
keyValueTuple = getKeyValueTuple(spark=spark, dictList=dictList)
df = spark.read\
.option("sep", ",") \
.option("header", True) \
.option("encoding", "utf-8") \
.option("comment", ",") \
.csv('hdfs://ecs-qar1-0002:8020/qar/B5671_20230504064024.qar.csv')
tempDf = df.withColumn(colName="rownumid", col=df['rownum'].cast(IntegerType())).withColumn(colName="keyValue", col=create_map(*keyValueTuple))
VRTG_JSON_STR = json.dumps(tempDf.rdd.map(lambda x: {'rownumid': x.rownumid, 'aVRTG': x.aVRTG, 'ASCV': x.ASCV, 'aSELASPD1': x.aSELASPD1}).collect())
_VRTG_UDF = udf(_VRTG, StringType())
resultDf = tempDf.withColumn(colName="_VRTG", col=_VRTG_UDF(tempDf['rownumid'], lit(VRTG_JSON_STR), tempDf['keyValue']))
resultDf = resultDf.drop("keyValue").select("rownumid", "_VRTG")
writeOptions = {"url": "jdbc:postgresql://ecs-qar1-0002:5432/qardb",\
"driver": "org.postgresql.Driver",\
"dbtable": "qar_demo",\
"user": "qardb", \
"password": "qaruser", \
"createTableColumnTypes": "rownumid numeric(20, 0), _VRTG varchar(120)"}
# 将数据写入PostgreSQL
resultDf.write.format("jdbc").options(**writeOptions).\
mode("overwrite").\
save()