pandas中concat用法

import numpy as np
import pandas as pd
arr=np.arange(12).reshape((3,4))
In[2]: arr
Out[2]: 
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In[2]: np.concatenate([arr,arr])#numpy合并
Out[3]: 
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])
s1=pd.Series([0,1],index=['a','b'])
s2=pd.Series([2,3,4],index=['c','d','e'])
s3=pd.Series([5,6],index=['f','g'])
In[4]: pd.concat([s1,s2,s3])#Series行合并
Out[4]: 
a    0
b    1
c    2
d    3
e    4
f    5
g    6
dtype: int64

In[5]: pd.concat([s1,s2,s3],axis=1)#Series列合并
Out[5]: 
     0    1    2
a  0.0  NaN  NaN
b  1.0  NaN  NaN
c  NaN  2.0  NaN
d  NaN  3.0  NaN
e  NaN  4.0  NaN
f  NaN  NaN  5.0
g  NaN  NaN  6.0

s4=pd.Series([0,5,5,6],index=['a','b','f','g'])
In[6]: pd.concat([s1,s4],axis=1,join='inner')#并集
Out[6]: 
   0  1
a  0  0
b  1  5
In[7]: pd.concat([s1,s4],axis=1,join_axes=[['a','c','b','e']])#指定索引
Out[7]: 
     0    1
a  0.0  0.0
c  NaN  NaN
b  1.0  5.0
e  NaN  NaN
In[9]:  pd.concat([s1,s2,s3],keys=['one','two','three'])#层次化索引
Out[9]: 
one    a    0
       b    1
two    c    2
       d    3
       e    4
three  f    5
       g    6
dtype: int64

In[10]: pd.concat([s1,s2,s3],axis=1,keys=['one','two','three'])#作为列
Out[10]: 
   one  two  three
a  0.0  NaN    NaN
b  1.0  NaN    NaN
c  NaN  2.0    NaN
d  NaN  3.0    NaN
e  NaN  4.0    NaN
f  NaN  NaN    5.0
g  NaN  NaN    6.0

df1=pd.DataFrame(np.arange(6).reshape(3,2),index=['a','b','c'],columns=['one','two'])
df2=pd.DataFrame(5 + np.arange(4).reshape(2,2),index=['a','c'],columns=['three','four'])
In[11]: pd.concat([df1,df2],axis=1,keys=['level1','level2'])#DataFrame列外索引
Out[11]: 
  level1     level2     
     one two  three four
a      0   1    5.0  6.0
b      2   3    NaN  NaN
c      4   5    7.0  8.0

In[12]: pd.concat({'level1':df1,'level2':df2},axis=1)#字典键做列外索引
Out[12]: 
  level1     level2     
     one two  three four
a      0   1    5.0  6.0
b      2   3    NaN  NaN
c      4   5    7.0  8.0

In[13]: pd.concat([df1,df2],axis=1,keys=['level1','level2'],names=['upper','lower'])#外内索引名字
Out[13]: 
upper level1     level2     
lower    one two  three four
a          0   1    5.0  6.0
b          2   3    NaN  NaN
c          4   5    7.0  8.0

In[14]: pd.concat([df1,df2],keys=['level1','level2'],names=['upper','lower'])
Out[14]: 
              four  one  three  two
upper  lower                       
level1 a       NaN  0.0    NaN  1.0
       b       NaN  2.0    NaN  3.0
       c       NaN  4.0    NaN  5.0
level2 a       6.0  NaN    5.0  NaN
       c       8.0  NaN    7.0  NaN



 
  


 
 

你可能感兴趣的:(python数据挖掘)