import duckdb
import pandas as pd
from sqlalchemy import create_engine
# 定义连接到您的 MySQL 或 PostgreSQL 数据库的参数
db_type = 'mysql' # 或 'postgresql'
user = 'your_username'
password = 'your_password'
host = 'your_host'
port = 'your_port'
database = 'your_database'
table_name = 'your_table'
# 创建 SQLAlchemy 引擎
if db_type == 'mysql':
engine = create_engine(f'mysql+pymysql://{user}:{password}@{host}:{port}/{database}')
else: # postgresql
engine = create_engine(f'postgresql+psycopg2://{user}:{password}@{host}:{port}/{database}')
# 从 MySQL/PostgreSQL 读取数据
with engine.connect() as conn:
query = f'SELECT * FROM {table_name}'
df = pd.read_sql(query, conn)
# 使用 DuckDB
con = duckdb.connect(database=':memory:')
con.execute(f'CREATE TABLE {table_name} AS SELECT * FROM df')
con.execute(f'COPY {table_name} TO \'output.parquet\' (FORMAT \'parquet\')')
print("数据已成功导出为 Parquet 格式")