python利用numpy、tensorflow计算ks

ks的理解:就是根据不同的阈值计算好坏样本的召回率(概率分布),取好坏样本召回率差最大的就是ks

1、numpy

def calc_ks_np(y_true,y_pred):
    y_true = y_true.reshape(-1,)
    y_pred = y_pred.reshape(-1,)
    sort_index = np.argsort(y_pred, kind="mergesort")[::-1]
    y_pred = y_pred[sort_index]
    y_true = y_true[sort_index]
    
    # 获取不同的值
    diff = np.diff(y_pred) 
    distinct_value_indices = np.where(np.diff(y_pred))[0]
    threshold_idxs = np.r_[distinct_value_indices, y_pred.size - 1]
    tps = np.cumsum(y_true)[threshold_idxs]
    fps = np.cumsum((1 - y_true))[threshold_idxs]
    
    threshold = y_pred[threshold_idxs]
    
    tps = np.r_[0, tps]
    fps = np.r_[0, fps]
    
    tpr = tps / (tps[-1] + 1e-32)
    fpr = fps / (fps[-1] + 1e-32)
    return max(tpr-fpr)

 

2、tensorflow

def calc_ks_tf(y_true,y_pred):
    length = tf.shape(y_true)[0] - 1
    y_true = tf.reshape(y_true,shape=[-1,])
    y_pred = tf.reshape(y_pred,shape=[-1,])
    
    # sorted
    idx = tf.argsort(y_pred,direction='DESCENDING',axis=0)
    y_pred = tf.gather(y_pred, idx)
    y_true = tf.gather(y_true, idx)
    
    diff = y_pred[1:] - y_pred[:-1]
    distinct_value_indices = tf.where(diff)
    size = tf.cast([[length]],tf.int64)
    threshold_idxs = tf.concat([distinct_value_indices,size],0)
    threshold_idxs = tf.reshape(threshold_idxs,shape=[-1,])
    tps = tf.gather(tf.cumsum(y_true),threshold_idxs)
    fps = tf.gather(tf.cumsum(1-y_true),threshold_idxs)
    tps = tf.cast(tps,tf.float32)
    fps = tf.cast(fps,tf.float32)
    tpr = tps / (tps[-1]+tf.constant(1e-32))
    fpr = fps / (fps[-1]+tf.constant(1e-32))
    return tf.reduce_max(tpr - fpr)

你可能感兴趣的:(python利用numpy、tensorflow计算ks)