Centos环境Python3使用impyla连接Hive

pip install pure-sasl==0.5.1

pip install thrift_sasl==0.2.1 --no-deps

pip install thrift==0.9.3

pip install bitarray==0.8.3

pip install cython thriftpy==0.3.9

pip install impyla

 

def query_hive(df,p=""):
    from impala.dbapi import connect
    from krbcontext import krbcontext
    from impala.util import as_pandas
    import os
    host = df.loc[:,'host'][0].strip()
    port = int(df.loc[:,'port'][0])
    database = str(df.loc[:,'database'][0]).strip()
    username = str(df.loc[:,'username'][0]).strip()
    sql = p.strip()
#判断是否是kerberos认证的HIVE
    properties = df.loc[:,'properties'][0].strip()
    if properties != "":
        properties = '{"'+properties.replace('=','":"').replace('&','","')+'"}'
        properties = json.loads(properties)
        authType = int(properties['authType'])
        if authType == 1:
            password = df.loc[:,'password'][0].strip()
            try:
                conn = connect(host=host, port=port, auth_mechanism='PLAIN', user=username, password=password, database=database)
            except Exception as e:
                return pd.DataFrame([str(e)], columns=['error'])
        elif authType == 0:
            try:
                conn = connect(host=host, port=port, auth_mechanism='PLAIN', database=database)
            except Exception as e:
                return pd.DataFrame([str(e)], columns=['error'])
        elif authType == 2:
            keytab_file = properties['kbsKeyTab']
            kbsUser = properties['kbsUser']
            principal = kbsUser
            try:
                os.system('kinit -kt '+keytab_file+' '+kbsUser)
                with krbcontext(using_keytab=True,principal=principal,keytab_file=keytab_file):
                    conn = connect(host=host, port=port, auth_mechanism='GSSAPI', kerberos_service_name='hive',database=database)
            except Exception as e:
                return pd.DataFrame([str(e)], columns=['error'])
        try:
            cursor = conn.cursor()
            cursor.execute(sql)
            aa = as_pandas(cursor)
            return aa
        except Exception as e:
            return pd.DataFrame([str(e)], columns=['error'])

你可能感兴趣的:(HIVE,python)