pandas 两个文件差异比较

import pandas as pd
base815 = "00000000.00000002.base_userinfo.20190815.0001.txt"
pd815 = pd.read_csv(base815,sep='|',encoding='utf-8',low_memory=False)
pd815.columns = ['width_id','userid','usertype','areaid','usergroup','status','datecreated']
base901 = "00000000.00000002.base_userinfo.20190901.0001.txt"
pd901 = pd.read_csv(base901,sep='|',encoding='utf-8',low_memory=False)
pd901.columns = ['width_id','userid','usertype','areaid','usergroup','status','datecreated']

notin901 = pd815[~pd815.isin(pd901.to_dict('l')).all(1)]
notin815 = pd901[~pd901.isin(pd815.to_dict('l')).all(1)]

你可能感兴趣的:(Python,pandas)