pandas参照一个文件,根据id来判断存在,修改另一个文件的字段

import pandas as pd

data_file=''
compare_file=''
out_file=''

id_line_name= '标题'
update_line_name='价格'
sep_line='|'
data_df=pd.read_csv(data_file,header=None,sep=sep_line,error_bad_lines=False,low_memory=False)
data_df.drop_duplicates(id_line_name,keep='first')
data_df=data_df.reset_index(drop=True)

compare_df=pd.read_csv(compare_file,header=None,sep=sep_line,low_memory=False,on_bad_lines='skip')
compare_df.drop_duplicates(id_line_name,keep='first')
compare_df=compare_df.reset_index(drop=True)

out_df=pd.DataFrame(columns=None)

#把compare_df两列转化为字典
compare_dict=compare_df.set_index(id_line_name)[update_line_name].to_dict()
num = 1
for index,id in enumerate(data_df[id_line_name]):
    need_update_line=compare_dict.get(id,'')
    if need_update_line:
        data_bidtype=data_df[update_line_name].at[index]
        if data_bidtype != need_update_line:
            data_df[update_line_name].at[index]=need_update_line
            new_line=data_df.loc[index]
            out_df=out_df.append(new_line)
            print('已经加入第{0}数据'.format(num))
            num += 1
out_df.to_csv(out_file,sep=sep_line,header=False,index=False)

你可能感兴趣的:(pandas,机器学习,人工智能)