获取hive表信息

获取hive的表结构信息:表owner、表location

一、boto3方式

import boto3
import os

# 创建AWS Glue客户端
glue = boto3.client('glue')

# 读取表名
file=open('res.log', encoding='utf-8')
lines=file.readlines()
file.close()

# 获取表信息,并写出到文件
file_w = open('size.txt', 'w')
for line in lines:
    db_name=line.split('\t')[0]
    tbl_name=line.split('\t')[1]
    response = glue.get_table(
        DatabaseName=db_name,
        Name=tbl_name
    )
    try:
        tbl_loc = response['Table']['StorageDescriptor']['Location']
        tbl_owner = response['Table']['Owner']
        bash_command = 'aws s3 ls %s --recursive --summarize | grep Size'%(tbl_loc)
        res=os.popen( bash_command )
        size=res.read().replace('\n', '').split('Size:')[1].replace(' ', '')
        tmp='%s.%s#%s#%s#%s'%(db_name,tbl_name,tbl_owner,tbl_loc,size)
        file_w.write(tmp)
        file_w.write('\n')
    except Exception as e:
        print(db_name+'.'+tbl_name+' 报错')
        print(e)

# 关闭文件
file_w.close()

你可能感兴趣的:(hive,大数据,python)