Pandas处理DataFrame数据拼接


import pandas as pd

# 列拼接,axis=1
data1 = [{'col1': 1, 'col2': 2, 'col3': 4}, {'col1': 5, 'col2': 10, 'col3': 20}]  # 2-rows-3-cols
df1 = pd.DataFrame(data1)
print(df1)
"""
   col1  col2  col3
0     1     2     4
1     5    10    20
"""

data2 = [{'col4': 41, 'col5': 25, 'col6': 34}, {'col4': 25, 'col5': 16, 'col6': 70}]  # 2-rows-3-cols
df2 = pd.DataFrame(data2)
print(df2)
"""
   col4  col5  col6
0    41    25    34
1    25    16    70
"""

df3 = pd.concat([df1, df2], axis=1)  # 2-rows-6-cols
print(df3)
"""
   col1  col2  col3  col4  col5  col6
0     1     2     4    41    25    34
1     5    10    20    25    16    70
"""

# 行拼接,axis=0
data4 = [{'col1': 1, 'col2': 2, 'col3': 4}, {'col1': 5, 'col2': 10, 'col3': 20}]  # 2-rows-3-cols
df4 = pd.DataFrame(data4)
df4.index = [1, 2]  # 设置index
print(df4)
"""
   col1  col2  col3
1     1     2     4
2     5    10    20
"""

data5 = [{'col1': 41, 'col2': 25, 'col3': 34}, {'col1': 25, 'col2': 16, 'col3': 70}]  # 2-rows-3-cols
df5 = pd.DataFrame(data5)
df5.index = [3, 4]  # 设置index
print(df5)
"""
   col1  col2  col3
3    41    25    34
4    25    16    70
"""

df6 = pd.concat([df4, df5], axis=0)
print(df6)
"""
   col1  col2  col3
1     1     2     4
2     5    10    20
3    41    25    34
4    25    16    70
"""

# 按列求和、求极差、求均值
df7 = df6.copy()  # copy DataFrame
df7.loc[5] = df6.max()-df6.min()  # 增加index=5的数据
df7.loc[6] = df6.mean()  # 增加index=6的数据
df7.loc[7] = df6.sum()  # 增加index=7的数据
print(df7)
"""
   col1   col2   col3
1   1.0   2.00    4.0
2   5.0  10.00   20.0
3  41.0  25.00   34.0
4  25.0  16.00   70.0
5  40.0  23.00   66.0
6  18.0  13.25   32.0
7  72.0  53.00  128.0
"""

# 修改指定行列坐标的值
df7.loc[7, 'col1'] = 'a'  # index=7 列名=‘col1’对应的值改为‘a’
print(df7)
"""
  col1   col2   col3
1    1   2.00    4.0
2    5  10.00   20.0
3   41  25.00   34.0
4   25  16.00   70.0
5   40  23.00   66.0
6   18  13.25   32.0
7    a  53.00  128.0
"""

你可能感兴趣的:(python,python,数据挖掘)