六、Pandas——合并 merge

 

import pandas as pd
import numpy as np
# merge
print("***************根据指定的单个列进行合并*******************")
left = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
                     'A': ['A0', 'A1', 'A2', 'A3'],
                     'B': ['B0', 'B1', 'B2', 'B3']})
right = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
                     'C': ['C0', 'C1', 'C2', 'C3'],
                      'D': ['D0', 'D1', 'D2', 'D3']})
print(left)
#   key   A   B
# 0  K0  A0  B0
# 1  K1  A1  B1
# 2  K2  A2  B2
# 3  K3  A3  B3
print(right)
#   key   C   D
# 0  K0  C0  D0
# 1  K1  C1  D1
# 2  K2  C2  D2
# 3  K3  C3  D3
print("===========merge 参数 on ,指定按照哪一列合并========")
res = pd.merge(left, right, on='key')
print(res)
#   key   A   B   C   D
# 0  K0  A0  B0  C0  D0
# 1  K1  A1  B1  C1  D1
# 2  K2  A2  B2  C2  D2
# 3  K3  A3  B3  C3  D3
print("***************根据指定的两个列进行合并*******************")
left1 = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'],
                      'key2': ['K0', 'K1', 'K0', 'K1'],
                     'A': ['A0', 'A1', 'A2', 'A3'],
                      'B': ['B0', 'B1', 'B2', 'B3']})
right1 = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'],
                       'key2': ['K0', 'K0', 'K0', 'K0'],
                       'C': ['C0', 'C1', 'C2', 'C3'],
                       'D': ['D0', 'D1', 'D2', 'D3']})
print(left1)
#   key1 key2   A   B
# 0   K0   K0  A0  B0
# 1   K0   K1  A1  B1
# 2   K1   K0  A2  B2
# 3   K2   K1  A3  B3
print(right1)
#   key1 key2   C   D
# 0   K0   K0  C0  D0
# 1   K1   K0  C1  D1
# 2   K1   K0  C2  D2
# 3   K2   K0  C3  D3
print("===========merge 参数 on ,指定按照哪两列合并===========")
# 指定合并的两个列有顺序???
# 默认 inner join ,即 how='inner'
# how = {'left', 'right', 'inner', 'outer'}
res = pd.merge(left1, right1, on=['key1','key2'], how='inner')
print(res)
#   key1 key2   A   B   C   D
# 0   K0   K0  A0  B0  C0  D0
# 1   K1   K0  A2  B2  C1  D1
# 2   K1   K0  A2  B2  C2  D2
print("*****************merge 参数 indicator **************")
df1 = pd.DataFrame({'col1': [0, 1],'col_left': ['a', 'b']})
df2 = pd.DataFrame({'col1': [1, 2, 2],'col_right': [2, 2, 2]})
print(df1)
#    col1 col_left
# 0     0        a
# 1     1        b
print(df2)
#    col1  col_right
# 0     1          2
# 1     2          2
# 2     2          2
print("===============是否显示结果数据来源=================")
# indicator 默认False,True时会增加一列来表示该行数据来自哪一个df
res = pd.merge(df1, df2, on='col1', how='outer', indicator=True)
print(res)
#    col1 col_left  col_right      _merge
# 0     0        a        NaN   left_only
# 1     1        b        2.0        both
# 2     2      NaN        2.0  right_only
# 3     2      NaN        2.0  right_only
print("===============显示来源时,指定来源列的名字================")
# indicator 默认False,True时会增加一列来表示该行数据来自哪一个df
res = pd.merge(df1, df2, on='col1', how='outer', indicator='source')
print(res)
#    col1 col_left  col_right      source
# 0     0        a        NaN   left_only
# 1     1        b        2.0        both
# 2     2      NaN        2.0  right_only
# 3     2      NaN        2.0  right_only
print("*****************merge 根据行名或列名合并*****************")
left2 = pd.DataFrame({'A': ['A0', 'A1', 'A2'],
                     'B': ['B0', 'B1', 'B2']},
                     index=['K0', 'K1', 'K2'])
right2 = pd.DataFrame({'C': ['C0', 'C2', 'C3'],
                      'D': ['D0', 'D2', 'D3']},
                      index=['K0', 'K2', 'K3'])
print(left2)
#      A   B
# K0  A0  B0
# K1  A1  B1
# K2  A2  B2
print(right2)
#      C   D
# K0  C0  D0
# K2  C2  D2
# K3  C3  D3
print("===============是否显示结果数据来源=================")
# left_index 默认NaN, True 时 指的是 根据行名 合并
# right_index 默认NaN , True时 指的是 根据列名 合并
res = pd.merge(left2, right2, left_index=True, right_index=True, how='outer')
print(res)
#       A    B    C    D
# K0   A0   B0   C0   D0
# K1   A1   B1  NaN  NaN
# K2   A2   B2   C2   D2
# K3  NaN  NaN   C3   D3
print("*****************merge suffixes 解决两个df 重复列问题*****************")
boys = pd.DataFrame({'k': ['K0', 'K1', 'K2'], 'age': [1, 2, 3]})
girls = pd.DataFrame({'k': ['K0', 'K0', 'K3'], 'age': [4, 5, 6]})
print(boys)
#     k  age
# 0  K0    1
# 1  K1    2
# 2  K2    3
print(girls)
#     k  age
# 0  K0    4
# 1  K0    5
# 2  K3    6
print("===========suffixes修改合并后列名===========")
# 在重复列名后 追加字符串,避免df中常出现相同的列名
res = pd.merge(boys, girls, on='k', suffixes=['_boys', '_girls'], how='inner')
print(res)
#     k  age_boys  age_girls
# 0  K0         1          4
# 1  K0         1          5


 

 

 

你可能感兴趣的:(Pandas)