an approximate way to calculate auc(area under the curve):
import fire
import numpy as np
import pandas as pd
from scipy.misc import comb
from sklearn.metrics import roc_auc_score
def auc_1(y_true, y_pred):
df = pd.DataFrame.from_dict({
'y_true': y_true,
'y_pred': y_pred
})
df.sort_values(by='y_pred', ascending=False, inplace=True)
df.index = np.arange(df.shape[0], 0, -1)
pos_rank = sum(df[df['y_true'] == 1].index)
pair = df['y_true'].value_counts()
n_neg, n_pos = pair[0], pair[1]
return 1.0 * (pos_rank - comb(n_pos + 1, 2)) / (n_pos * n_neg)
def work(file_name):
df = pd.read_csv(
file_name,
sep=' ',
index_col=None,
header=None,
names=['y_true', 'y_pred'],
dtype={
'y_true': np.int,
'y_pred': np.float
}
)
y_true, y_pred = df['y_true'], df['y_pred']
auc1 = auc_1(y_true, y_pred)
print('auc1 =', auc1, flush=True)
auc2 = roc_auc_score(y_true, y_pred)
print('auc2 =', auc2, flush=True)
if __name__ == '__main__':
fire.Fire(work)