获取hive的表结构信息:表owner、表location
import boto3
import os
# 创建AWS Glue客户端
glue = boto3.client('glue')
# 读取表名
file=open('res.log', encoding='utf-8')
lines=file.readlines()
file.close()
# 获取表信息,并写出到文件
file_w = open('size.txt', 'w')
for line in lines:
db_name=line.split('\t')[0]
tbl_name=line.split('\t')[1]
response = glue.get_table(
DatabaseName=db_name,
Name=tbl_name
)
try:
tbl_loc = response['Table']['StorageDescriptor']['Location']
tbl_owner = response['Table']['Owner']
bash_command = 'aws s3 ls %s --recursive --summarize | grep Size'%(tbl_loc)
res=os.popen( bash_command )
size=res.read().replace('\n', '').split('Size:')[1].replace(' ', '')
tmp='%s.%s#%s#%s#%s'%(db_name,tbl_name,tbl_owner,tbl_loc,size)
file_w.write(tmp)
file_w.write('\n')
except Exception as e:
print(db_name+'.'+tbl_name+' 报错')
print(e)
# 关闭文件
file_w.close()