多分类绘制ROC,PR曲线

def PR(y_test, pred_y): # pred
    # pred_y 为softmax
    # 转为标签
    preds_t = np.array(pred_y).argmax(axis=1)
    a = np.array(pred_y)
    a = np.around(a, 3)
    thresholds = set(a.reshape(-1))
    thresholds = sorted(thresholds)  # 记录所有得分情况,并去重从小到大排序,寻找各个阈值点
    macro_precis = []
    macro_recall = []
    for threshold in thresholds:


        cls_n = pred_y[0].shape[0]
        true_p = [0 for _ in range(cls_n)]
        true_n = [0 for _ in range(cls_n)]
        false_p = [0 for _ in range(cls_n)]
        false_n = [0 for _ in range(cls_n)]

        for j in range(y_test.shape[0]):
            # cls, pd, [n0, n1, n2] = file.strip().split(" ")       # 分别计算比较各个类别的得分,分开计算,各自为二分类,
            # 最后求平均,得出宏pr

            cls, pd = y_test[j], preds_t[j]  # 最后求平均,得出宏pr
            n = pred_y[j]

            for c in range(len(n)):  ## 循环类别
                # 遍历所有样本,第0类为正样本,,其他类为负样本,
                if float(n[c]) >= float(threshold) and int(cls) == c: # 大于等于阈值,并且真实为正样本,即为真阳,
                    true_p[c] += 1
                elif float(n[c]) >= float(threshold) and int(cls) != c:  # 大于等于阈值,真实为负样本,即为假阳;
                    false_p[c] += 1
                elif float(n[c]) < float(threshold) and int(cls) == c:# 小于阈值,真实为正样本,即为假阴
                    false_n[c] += 1

        precsions = []
        # 计算各类别的精确率,小数防止分母为0
        for k in range(cls_n):
            precsion = (true_p[k] + 0.00000000001) / (true_p[k] + false_p[k] + 0.00000000001)
            precsions.append(precsion)

        # 计算各类别的召回率,小数防止分母为0
        recalls = []
        for k in range(cls_n):
            recall = (true_p[k] + 0.00000000001) / (true_p[k] + false_n[k] + 0.00000000001)
            recalls.append(recall)

        precision1 = sum(precsions) / cls_n
        recall = sum(recalls) / cls_n  # 多分类求得平均精确度和平均召回率,即宏macro_pr
        macro_precis.append(precision1)
        macro_recall.append(recall)

    macro_precis.append(1)
    macro_recall.append(0)
    # print(macro_precis)
    # print(macro_recall)

    x = np.array(macro_recall)
    y = np.array(macro_precis)
    plt.figure()
    plt.xlim([-0.01, 1.01])
    plt.ylim([-0.01, 1.01])
    plt.xlabel('recall')
    plt.ylabel('precision')
    plt.title('PR curve')
    plt.plot(x, y)
    # plt.show()


def ROC(y_test, pred_y):
    # pred 为softmax形式
    a = np.array(pred_y)
    a = np.around(a, 3)
    preds_t = np.array(pred_y).argmax(axis=1)
    thresholds = set(a.reshape(-1))
    thresholds = sorted(thresholds)  # 记录所有得分情况,并去重从小到大排序,寻找各个阈值点
    macro_FPR = []
    macro_TPR = []
    for threshold in thresholds:

        # true_p0 = 0         # 真阳
        # true_n0 = 0         # 真阴
        # false_p0 = 0        # 假阳
        # false_n0 = 0        # 假阴

        cls_n = pred_y[0].shape[0]
        true_p = [0 for _ in range(cls_n)]
        true_n = [0 for _ in range(cls_n)]
        false_p = [0 for _ in range(cls_n)]
        false_n = [0 for _ in range(cls_n)]

        for j in range(y_test.shape[0]):
          # cls, pd, [n0, n1, n2] = file.strip().split(" ")       # 分别计算比较各个类别的得分,分开计算,各自为二分类,
            # 最后求平均,得出宏pr

            cls, pd = y_test[j], preds_t[j]  # 最后求平均,得出宏pr
            n = pred_y[j]

            for c in range(len(n)):  ## 循环类别

                if float(n[c]) >= float(threshold) and int(cls) == c:  # 遍历所有样本,第0类为正样本,其他类为负样本,
                    true_p[c] += 1  # 大于等于阈值,并且真实为正样本,即为真阳,
                elif float(n[c]) >= float(threshold) and int(cls) != c:  # 大于等于阈值,真实为负样本,即为假阳;
                    false_p[c] += 1  # 小于阈值,真实为正样本,即为假阴
                elif float(n[c]) < float(threshold) and int(cls) == c:
                    false_n[c] += 1
                elif float(n[c]) <= float(threshold) and int(cls) != c: # 真实为负样本, 预测也为负样本
                    true_n[c] += 1


        # TPR = tp / (tp + fn)
        # FPR = fp / (fp + tn)

        TPRS = []
        # 计算各类别的精确率,小数防止分母为0
        for k in range(cls_n):
            TPR = (true_p[k] + 0.00000000001) / (true_p[k] + false_n[k] + 0.00000000001)
            TPRS.append(TPR)

        # 计算各类别的召回率,小数防止分母为0
        FPRS = []
        for k in range(cls_n):
            FPR = (false_p[k] + 0.00000000001) / (false_p[k] + true_n[k] + 0.00000000001)
            FPRS.append(FPR)

        TPR1 = sum(TPRS) / cls_n
        FPR1 = sum(FPRS) / cls_n  # 多分类求得平均精确度和平均召回率,即宏macro_pr
        macro_TPR.append(TPR1)
        macro_FPR.append(FPR1)
    x = np.array(macro_FPR)
    y = np.array(macro_TPR)
    plt.figure()
    plt.xlim([-0.01, 1.01])
    plt.ylim([-0.01, 1.01])
    plt.xlabel('FPR')
    plt.ylabel('TPR')
    plt.title('ROC curve')
    plt.plot(x, y)
    plt.show()

 

 

你可能感兴趣的:(分类,python,机器学习)