1.利用python将excel转成txt文件
#!/user/bin env python
import pandas as pd
import sys
def excel_to_txt(dt):
df = pd.read_excel('data/excel/%s.xlsx'%(dt), header=None) # 使用pandas模块读取数据, header 默认0,指定表头行,None:不取表头
print('开始写入txt文件')
df.to_csv('data/txt/%s.txt' % (dt), header=None, sep='\t', index=False) # 写入txt中,tab分隔
print('文件写入成功!')
if __name__ == '__main__':
excel_to_txt('IS_GS_Recruitment_Data_20231211')
2.上传到hdfs
3.在hive中创建表
drop table if exists ticket.test_text;
create external table IF NOT EXISTS ticket.test_text
(
name string,
age int
) comment ''
row format delimited fields terminated by '\t'
lines terminated by '\n'
NULL DEFINED AS ''
stored as textfile
LOCATION '/warehouse/ticket/ods/test_text';
4.将hdfs数据写入hive
load data inpath '/origin_data/test.txt' overwrite into table ticket.test_text;