待我学有所成,结发与蕊可好。@夏瑾墨 by Jooey
1.数据库风格的DataFrame合并
import pandas as pd
from pandas import Series,DataFrame
#一对多
df1=DataFrame({'key':['b','b','a','c','a','a','b'],'data1':range(7)})
df2=DataFrame({'key':['a','b','d'],'data2':range(3)})
pd.merge(df1,df2) #merge默认inner连接,结果中的键是交集。
pd.merge(df1,df2,on='key')
df3=DataFrame({'lkey':['b','b','a','c','a','a','b'],'data1':range(7)})
df4=DataFrame({'rkey':['a','b','d'],'data2':range(3)})
pd.merge(df3,df4,left_on='lkey',right_on='rkey')
pd.merge(df1,df2,how='outer')
#多对多连接产生的是行的笛卡尔积。
df1=DataFrame({'key':['b','b','a','c','a','b'],'data1':range(6)})
df2=DataFrame({'key':['a','b','a','b','d'],'data2':range(5)})
pd.merge(df1,df2,how='left')
pd.merge(df1,df2,how='inner')
#根据多个键进行合并
left=DataFrame({
'key1':['foo','foo','bar'],
'key2':['one','two','one'],
'lval':[1,2,3]
})
right=DataFrame({
'key1':['foo','foo','bar','bar'],
'key2':['one','one','one','two'],
'rval':[4,5,6,7]
})
pd.merge(left,right,on=['key1','key2'],how='outer')#outer外连接求取的是键的并集。组合了左连接和右连接的效果。
#重复列名的处理
pd.merge(left,right,on='key1')
pd.merge(left,right,on='key1',suffixes=('_left','_right'))
pandas.merge 可根据一个或者多个键将不同的DataFrame的行连接起来,实现的效果同数据库SQL语句的连接操作。
pandas.concat 沿一条轴将多个对象编接在一起。
suffixes用于指定附加到左右两个DataFrame对象重叠列名的上的字符串。
2.索引上的合并
left1=DataFrame({'key':['a','b','a','a','b','c'],'value':range(6)})
right1=DataFrame({'group_val':[3.5,7]},index=['a','b'])
pd.merge(left1,right1,left_on='key',right_index=True)
pd.merge(left1,right1,left_on='key',right_index=True,how='outer')
#层次化索引
lefth=DataFrame({
'key1':['Ohio','Ohio','Ohio','Nevada','Nevada'],
'key2':[2000,2001,2002,2001,2002],
'data':np.arange(5)
})
righth=DataFrame(
np.arange(12).reshape((6,2)),
index=[['Nevada','Nevada','Ohio','Ohio','Ohio','Ohio'],
[2001,2000,2000,2000,2001,2002]],
columns=['event1','event2']
)
pd.merge(lefth,righth,left_on=['key1','key2'],right_index=True)
pd.merge(lefth,righth,left_on=['key1','key2'],right_index=True,how='outer')
#同时使用合并双方的索引
left2=DataFrame([[1,2],[3,4],[5,6]],index=['a','c','e'],columns=['Ohio','Nevada'])
right2=DataFrame([[7,8],[9,10],[11,12],[13,14]],index=['b','c','d','e'],columns=['Missouri','Alabama'])
pd.merge(left2,right2,how='outer',left_index=True,right_index=True)
#join实例方法
left2.join(right2,how='outer')
left1.join(right1,on='key')
向Join传入一组DataFrame
another=DataFrame([[7,8],[9,10],[11,12],[16,17]],
index=['a','c','e','f'],columns=['New York','Oregon'])
left2.join([right2,another])
left2.join([right2,another],how='outer')
待我学有所成,结发与蕊可好。@夏瑾墨 by Jooey