1、定义MySQL的配置
self.db_config = {
"url": "jdbc:mysql://{host}:{port}/db",
"driver": "com.mysql.jdbc.Driver",
"user": "poctest",
"password": "123",
"port": "3306",
"host": "0.0.0.0",
"database": "db"
}
self.sql_engine = create_engine('mysql+pymysql://{user}:{pwd}@{host}:{port}/{database}'.format(
user=self.db_config['user'],
pwd=self.db_config['password'],
host=self.db_config['host'],
port=self.db_config['port'],
database=self.db_config['database']
))
2、pyspark 读取MySQL表
def load_table_myspark(sparkSession, comm, table_name):
"""
:argument 将MySQ表数据加载到程序中
:param sparkSession
:param comm: common配置模块
:param table_name: 要查询的表名,表名可以是原始表名,也可以是(select * from t) as t 这种构造表
:return: spark DataFrame
"""
df = None
db_config = comm.db_config
try:
df = sparkSession.read.format('jdbc').options(
url=db_config['url'],
driver=db_config['driver'],
dbtable=table_name,
user=db_config['user'],
password=db_config['password']
).load()
except Exception as e:
print("-----数据加载失败,错误异常信息:", e)
return df
3、pyspark 写MySQL表
def save_table_myspark(sparkSession, comm, table_name, df, mode="append"):
"""
:argument
:param sparkSession
:param comm
:param table_name
:param df: spark的DataFrame类型
:param mode: 默认为append模式
:return:
"""
db_config = comm.db_config
flag = False
try:
df.write.mode(mode) \
.format("jdbc") \
.option("url", db_config['url']) \
.option("dbtable", table_name) \
.option("user", db_config['user']) \
.option("password", db_config['password']) \
.save()
flag = True
except Exception as e:
print("-----数据加载失败,错误异常信息:", e)
return flag