Pandas玩转数据(五) -- Concatenate和Combine

数据分析汇总学习

https://blog.csdn.net/weixin_39778570/article/details/81157884

import pandas as pd
import numpy as np
from pandas import Series, DataFrame

# arrange上的Concatenate
arr1 = np.arange(9).reshape(3,3)
arr1
Out[6]: 
array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])
arr2 = np.arange(9).reshape(3,3)
arr2
Out[9]: 
array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])
# 进行concatenate,axis参数表示结合方向,默认0是纵向结合
np.concatenate([arr1,arr2])
Out[10]: 
array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8],
       [0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])
np.concatenate([arr1,arr2], axis=1)
Out[11]: 
array([[0, 1, 2, 0, 1, 2],
       [3, 4, 5, 3, 4, 5],
       [6, 7, 8, 6, 7, 8]])

# Series上的concatenate
s1 = Series([1,2,3], index=['X','Y','Z'])
S2 = Series([4,5], index=['A','B'])
S2
Out[15]: 
A    4
B    5
dtype: int64

pd.concat([s1,S2])
Out[16]: 
X    1
Y    2
Z    3
A    4
B    5

# 缺失值会补齐为NaN
pd.concat([s1,S2], axis=1)
Out[17]: 
     0    1
A  NaN  4.0
B  NaN  5.0
X  1.0  NaN
Y  2.0  NaN
Z  3.0  NaN

# DataFrame上的comcatenate
df1 = DataFrame(np.random.rand(4,3), columns=['X','Y','Z'])
df1
Out[20]: 
          X         Y         Z
0  0.093816  0.087879  0.539844
1  0.087522  0.012905  0.446522
2  0.269924  0.213385  0.900469
3  0.004105  0.437186  0.817560

df2 = DataFrame(np.random.rand(3,3), columns=['X','Y','A'])
df2
Out[22]: 
          X         Y         A
0  0.938714  0.122255  0.189125
1  0.592859  0.459991  0.596478
2  0.337845  0.977800  0.401993

pd.concat([df1,df2])
Out[24]: 
          A         X         Y         Z
0       NaN  0.093816  0.087879  0.539844
1       NaN  0.087522  0.012905  0.446522
2       NaN  0.269924  0.213385  0.900469
3       NaN  0.004105  0.437186  0.817560
0  0.189125  0.938714  0.122255       NaN
1  0.596478  0.592859  0.459991       NaN
2  0.401993  0.337845  0.977800       NaN

pd.concat([df1,df2],axis=1)
Out[25]: 
          X         Y         Z         X         Y         A
0  0.093816  0.087879  0.539844  0.938714  0.122255  0.189125
1  0.087522  0.012905  0.446522  0.592859  0.459991  0.596478
2  0.269924  0.213385  0.900469  0.337845  0.977800  0.401993
3  0.004105  0.437186  0.817560       NaN       NaN       NaN

# Combine,后一个对象补齐前一个对象
# Series
s1 = Series([2,np.nan,4,np.nan], index=['A','B','C','D'])
s1
Out[29]: 
A    2.0
B    NaN
C    4.0
D    NaN
dtype: float64

s2 = Series([1,2,3,4], index=['A','B','C','D'])
s2
Out[31]: 
A    1
B    2
C    3
D    4
dtype: int64

# s1中没有的值被s2补齐了
s1.combine_first(s2)
Out[32]: 
A    2.0
B    2.0
C    4.0
D    4.0
dtype: float64

# DataFrame,和Series类似
df1 = DataFrame({'X':[1,np.nan,3,np.nan], 'Y':[5,np.nan,7,np.nan], 'Z':[9,np.nan,11,np.nan]})
df1
Out[36]: 
     X    Y     Z
0  1.0  5.0   9.0
1  NaN  NaN   NaN
2  3.0  7.0  11.0
3  NaN  NaN   NaN

df2 = DataFrame({'Z':[np.nan,10,np.nan,12], 'A':[1,2,3,4]})
df2
Out[38]: 
   A     Z
0  1   NaN
1  2  10.0
2  3   NaN
3  4  12.0

df1.combine_first(df2)
Out[39]: 
     A    X    Y     Z
0  1.0  1.0  5.0   9.0
1  2.0  NaN  NaN  10.0
2  3.0  3.0  7.0  11.0
3  4.0  NaN  NaN  12.0

你可能感兴趣的:(python数据科学)