python 以字典形式循环计算A组与B组的相关系数矩阵

import pandas as pd
import numpy as np

path = ‘D:\下载.xlsx’
df= pd.read_excel(path)
#df [‘ab_group’,‘id’,‘x1’,‘x2’,‘a1’,‘a2’,‘a3’]
gp = dict(list(df.groupby([‘ab_group’, ‘id’, ‘x1’,‘x2’])))
df_emp = pd.DataFrame(columns=[‘ab_group’, ‘id’,‘x1’,‘x2’,‘a1’,‘a2’,‘a3’,‘id_y’])
#计算实验组和对照组的a1,a2,a3的相关系数,并返回每一个实验组对象相关系数最大的对照组对象
for tp in gp:
df_tp = gp[tp]
df_treat = df_tp[~(df_tp[‘ab_group’].isin([“对照组”]))]
i = len(df_treat)
df_control = df_tp[(df_tp[‘ab_group’].isin([“对照组”]))].iloc[0:10*i]
if len(df_treat) == 0 or len(df_control) == 0:
pass
else:
df_control_new = df_control[‘a1’,‘a2’,‘a3’]
df_treat_new = df_treat[‘a1’,‘a2’,‘a3’]
l1, l2 = len(df_control_new), len(df_treat_new)
c = pd.concat([df_treat_new, df_control_new], axis=0)
m = c.T.corr()
d = m.iloc[0:l2, l2:l1 + l2]
df_treat[‘argmax_index’] = d.idxmax(axis=“columns”)
df_treat = df_treat.merge((df_control.reset_index())[[‘index’, ‘id’]], left_on=‘argmax_index’, right_on=‘index’).drop(columns=[“argmax_index”, “index”])
df_emp = pd.concat([df_emp, df_treat], axis=0, ignore_index=True)

你可能感兴趣的:(矩阵,python)