https://blog.csdn.net/weixin_39778570/article/details/81157884
import pandas as pd
import numpy as np
from pandas import Series, DataFrame
# arrange上的Concatenate
arr1 = np.arange(9).reshape(3,3)
arr1
Out[6]:
array([[0, 1, 2],
[3, 4, 5],
[6, 7, 8]])
arr2 = np.arange(9).reshape(3,3)
arr2
Out[9]:
array([[0, 1, 2],
[3, 4, 5],
[6, 7, 8]])
# 进行concatenate,axis参数表示结合方向,默认0是纵向结合
np.concatenate([arr1,arr2])
Out[10]:
array([[0, 1, 2],
[3, 4, 5],
[6, 7, 8],
[0, 1, 2],
[3, 4, 5],
[6, 7, 8]])
np.concatenate([arr1,arr2], axis=1)
Out[11]:
array([[0, 1, 2, 0, 1, 2],
[3, 4, 5, 3, 4, 5],
[6, 7, 8, 6, 7, 8]])
# Series上的concatenate
s1 = Series([1,2,3], index=['X','Y','Z'])
S2 = Series([4,5], index=['A','B'])
S2
Out[15]:
A 4
B 5
dtype: int64
pd.concat([s1,S2])
Out[16]:
X 1
Y 2
Z 3
A 4
B 5
# 缺失值会补齐为NaN
pd.concat([s1,S2], axis=1)
Out[17]:
0 1
A NaN 4.0
B NaN 5.0
X 1.0 NaN
Y 2.0 NaN
Z 3.0 NaN
# DataFrame上的comcatenate
df1 = DataFrame(np.random.rand(4,3), columns=['X','Y','Z'])
df1
Out[20]:
X Y Z
0 0.093816 0.087879 0.539844
1 0.087522 0.012905 0.446522
2 0.269924 0.213385 0.900469
3 0.004105 0.437186 0.817560
df2 = DataFrame(np.random.rand(3,3), columns=['X','Y','A'])
df2
Out[22]:
X Y A
0 0.938714 0.122255 0.189125
1 0.592859 0.459991 0.596478
2 0.337845 0.977800 0.401993
pd.concat([df1,df2])
Out[24]:
A X Y Z
0 NaN 0.093816 0.087879 0.539844
1 NaN 0.087522 0.012905 0.446522
2 NaN 0.269924 0.213385 0.900469
3 NaN 0.004105 0.437186 0.817560
0 0.189125 0.938714 0.122255 NaN
1 0.596478 0.592859 0.459991 NaN
2 0.401993 0.337845 0.977800 NaN
pd.concat([df1,df2],axis=1)
Out[25]:
X Y Z X Y A
0 0.093816 0.087879 0.539844 0.938714 0.122255 0.189125
1 0.087522 0.012905 0.446522 0.592859 0.459991 0.596478
2 0.269924 0.213385 0.900469 0.337845 0.977800 0.401993
3 0.004105 0.437186 0.817560 NaN NaN NaN
# Combine,后一个对象补齐前一个对象
# Series
s1 = Series([2,np.nan,4,np.nan], index=['A','B','C','D'])
s1
Out[29]:
A 2.0
B NaN
C 4.0
D NaN
dtype: float64
s2 = Series([1,2,3,4], index=['A','B','C','D'])
s2
Out[31]:
A 1
B 2
C 3
D 4
dtype: int64
# s1中没有的值被s2补齐了
s1.combine_first(s2)
Out[32]:
A 2.0
B 2.0
C 4.0
D 4.0
dtype: float64
# DataFrame,和Series类似
df1 = DataFrame({'X':[1,np.nan,3,np.nan], 'Y':[5,np.nan,7,np.nan], 'Z':[9,np.nan,11,np.nan]})
df1
Out[36]:
X Y Z
0 1.0 5.0 9.0
1 NaN NaN NaN
2 3.0 7.0 11.0
3 NaN NaN NaN
df2 = DataFrame({'Z':[np.nan,10,np.nan,12], 'A':[1,2,3,4]})
df2
Out[38]:
A Z
0 1 NaN
1 2 10.0
2 3 NaN
3 4 12.0
df1.combine_first(df2)
Out[39]:
A X Y Z
0 1.0 1.0 5.0 9.0
1 2.0 NaN NaN 10.0
2 3.0 3.0 7.0 11.0
3 4.0 NaN NaN 12.0