Python3在hadoop集群上进行sqoop迁移任务

Python3在hadoop集群上进行sqoop迁移任务


废话不多说,直接呈现脚本(从mysql迁移单表到hive)

#!/bin/python3

"""
创建人:howieshi
创建时间:2019-12-12 19:08:00
"""

import pandas as pd
import subprocess

def run_cmd(cmd):
    """调取shell脚本,在Linux执行程序"""
    status, result = subprocess.getstatusoutput(cmd)
    print(result)


def sqoop_mysql_to_hive(conf):
    """调取配置字典中的数据库信息,进行单表的mysql到hive的数据迁移"""
    DB_HOST = conf['DB_HOST']
    USER = conf['USER']
    PASS = conf['PASS']
    SOURCE_DATABASE = conf['SOURCE_DATABASE']
    SOURCE_TABLE = conf['SOURCE_TABLE']
    TARGET_DATABASE = conf['TARGET_DATABASE']
    TARGET_TABLE = conf['TARGET_TABLE']

    # 组装sqoop脚本
    sqoop_cmd = f"""
    sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true \
    --connect "jdbc:mysql://{DB_HOST}/{SOURCE_DATABASE}?tinyInt1isBit=false&&characterEncoding=utf8" \
    --username "{USER}" \
    --password "{PASS}" \
    --table "{SOURCE_TABLE}" \
    --m 1 \
    --hive-import \
    --hive-overwrite \
    --compress --hive-drop-import-delims \
    --delete-target-dir \
    --target-dir {TARGET_DATABASE}.{TARGET_TABLE} \
    --hive-table {TARGET_DATABASE}.{TARGET_TABLE} 2>&1
    """

    create_db = f"CREATE DATABASE IF NOT EXISTS {TARGET_DATABASE};"      # 如果不存在目标数据库,创建目标数据库
    create_target_db = run_cmd(f'hive -e "{create_db}"')

    hive_sql = f"DROP TABLE IF EXISTS {TARGET_DATABASE}.{TARGET_TABLE};" # 如果目标数据存在,进行删除
    drop_target_table = run_cmd(f'hive -e "{hive_sql}"')
    
    mysql_to_hive = run_cmd(sqoop_cmd)                                   # 在集群上用sqoop进行数据迁移
    print(f"{TARGET_DATABASE}.{TARGET_TABLE}迁移成功!!!!!!")

db_conf = {
    'DB_HOST'        :'mysql的host',
    'USER'           :'mysql的账号',
    'PASS'           :'mysql的密码',
    'SOURCE_DATABASE':'mysql的源库',
    'SOURCE_TABLE'   :'mysql的源表',
    'TARGET_DATABASE':'hive的目标库',
    'TARGET_TABLE'   :'hive的目标表'
}

if __name__ == '__main__':
    try:
        sqoop_mysql_to_hive(db_conf)
    except Exception as e:
        print(str(e))
        print("任务执行失败!!!!!!")

你可能感兴趣的:(python,sqoop,hive,mysql)