auc evaluation

an approximate way to calculate auc(area under the curve):

 

import fire
import numpy as np
import pandas as pd
from scipy.misc import comb
from sklearn.metrics import roc_auc_score


def auc_1(y_true, y_pred):
    df = pd.DataFrame.from_dict({
        'y_true': y_true,
        'y_pred': y_pred
    })

    df.sort_values(by='y_pred', ascending=False, inplace=True)
    df.index = np.arange(df.shape[0], 0, -1)
    pos_rank = sum(df[df['y_true'] == 1].index)

    pair = df['y_true'].value_counts()
    n_neg, n_pos = pair[0], pair[1]

    return 1.0 * (pos_rank - comb(n_pos + 1, 2)) / (n_pos * n_neg)


def work(file_name):
    df = pd.read_csv(
        file_name,
        sep=' ',
        index_col=None,
        header=None,
        names=['y_true', 'y_pred'],
        dtype={
            'y_true': np.int,
            'y_pred': np.float
        }
    )

    y_true, y_pred = df['y_true'], df['y_pred']

    auc1 = auc_1(y_true, y_pred)
    print('auc1 =', auc1, flush=True)

    auc2 = roc_auc_score(y_true, y_pred)
    print('auc2 =', auc2, flush=True)


if __name__ == '__main__':
    fire.Fire(work)

 

你可能感兴趣的:(python)