import psycopg2
def establish_connection(host, port, dbname, user, password):
"""
建立连接关系并返回连接对象
:return: 连接对象
"""
try:
conn_string = f"host={host} port={port} dbname={dbname} user={user} password={password}"
connect_obj = psycopg2.connect(conn_string)
except Exception as e:
print(e)
else:
return connect_obj
from sqlalchemy import create_engine
def get_engine(host, port, dbname, user, password):
"""
建立连接关系并返回engine对象(适用于sqlalchemy方式的数据库交互)
:return:
"""
try:
conn_string = f'postgresql+psycopg2://{user}:{password}@{host}:{port}/{dbname}'
engine = create_engine(conn_string, isolation_level="AUTOCOMMIT")
except Exception as e:
print(e)
else:
return engine
import pandas as pd
def select_data(sql_command, conn):
"""
通过SQL获取数据到DataFrame中
:param sql_command: SQL查询语句
:param conn: 数据库连接对象或engine
:return: 查询到的数据(DataFrame)
"""
try:
df = pd.read_sql(sql_command, conn)
except Exception as e:
print(e)
conn.close()
return pd.DataFrame()
else:
conn.close()
return df
from psycopg2 import extras
def insert_data(sql_command, data, conn):
"""
通过传参的SQL将DataFrame中的数据写入数据库对应表格
:param sql_command: 带格式化传参的SQL语句,例如:
insert into table_name(column1, column2, column3, 省略字段名...) values %s
on conflict(primary_key) do update
set
column1 = excluded.column1,
column2 = excluded.column2,
column3 = excluded.column3,
后面的内容类似,此处省略...
:param data:待写入数据库的字段名,字段的顺序需要和SQL里面的insert字段顺序完全一致
:param conn: 数据库连接对象
:return:写入状态:status=1表示写入成功,status=0表示写入失败
"""
curs = conn.cursor()
# 根据dataFrame创建元组列表
tuples = [tuple(x) for x in data.to_numpy()]
status = 0
try:
# Psycopg2的批量插入模块execute_values
extras.execute_values(curs, sql_command, tuples)
conn.commit()
except (Exception, psycopg2.DatabaseError) as error:
print('Error: %s' % error)
conn.rollback() # 如果发生异常就回滚
curs.close()
conn.close()
status = 0
else:
curs.close()
conn.close()
status = 1
finally:
return status
from io import StringIO
def copy_into_db_v2(save_df, conn, table_name):
"""
按照copy from的方式写入数据(确保原表没有重复的数据)
:param save_df: 待保存的数据(DataFrame格式,字段名称、顺序与数据库表完全一致)
:param conn: 数据库连接对象
:param table_name: 表名
:return: 是否成功的标志
"""
signal = 1
try:
curs = conn.cursor()
# DatFrame类型转换为IO缓冲区中的str类型
output = StringIO()
save_df.to_csv(output, sep='\t', index=False, header=False)
output1 = output.getvalue()
# 注意,这里的列表顺序需要和数据库建表字段顺序一致
save_columns = list(save_df.columns.values)
curs.copy_from(StringIO(output1), table_name, columns=save_columns)
conn.commit()
except Exception as e:
print(e)
signal = 0
else:
curs.close()
conn.close()
signal = 1
finally:
return signal
copy from的方式适合数据量很大而且待写入的表是一张空白表或者数据不会发生主键冲突的情况。
def execute_sql(sql_command, conn):
"""
执行一条SQL(插入、删除、更新),包含正常执行和提交以及发生异常时的回滚
:param sql_command: 可执行的完整SQL语句,该SQL不带传参,可为insert、update或者delete
:param conn: 数据库连接对象
:return:执行状态:status=1表示执行成功,status=0表示执行失败
"""
curs = conn.cursor()
status = 0
# 为了保持数据库的一致性,每执行一次就commit一次
try:
curs.execute(sql_command)
conn.commit()
except Exception as e:
print(str(e))
conn.rollback() # 如果发生异常就回滚
curs.close()
conn.close()
status = 0
else:
curs.close()
conn.close()
print('successfully execute sql by cursor')
status = 1
finally:
return status
class SQLEngine:
def __init__(self, conf):
db_string = sqlalchemy.engine.url.URL(**conf)
self.engine = sqlalchemy.create_engine(db_string, isolation_level="AUTOCOMMIT",
connect_args={'connect_timeout': 10, })
def insert(self, sql, data=()):
self.engine.execute(sql, data)
def update(self, sql, data=()):
self.engine.execute(sql, data)
def delete(self, sql, data=()):
self.engine.execute(sql, data)
def select(self, sql, data=()):
return self.engine.execute(sql, data).fetchall()
def execute(self, sql):
return self.engine.execute(sql)
调用方式:
db_conf = {
'database': 'xxx',
'username': 'xxx',
'password': 'xxx',
'drivername': 'postgresql',
'host': 'xxxxx',
'port': 5432
}
engine = SQLEngine(db_conf)
sql_command = """
select *
from
table_name
limit 200;
"""
res = engine.select(sql_command)
print(res)