用tensorflow画ROC曲线


1. 先准备好你的数据文件,csv格式,该文件共3列,第一列是数据id,第2列是预测分数(0到1),第3列是数据的label(0或1)

2. 运行下面的python程序:python tf_roc.py /tmp/predict_label.csv 200 /tmp/tb_roc

3. 其中第2个参数200表示画ROC曲线的精度,越大,曲线越精细。

4. 启动tensorboard: tensorboard --logdir=/tmp/tb_roc

5. 打开浏览器http://127.0.0.1:6006/, 结果如下图

用tensorflow画ROC曲线_第1张图片


6. 关于画图方法。其实tensorboard无法画散点图。因此这里采用SummaryWriter的add_summary中第2个参数global_step来替代ROC中的横坐标fpr。globa_step原来是用来表示训练时的迭代次数的。为了能在tb中显示出来,需要把tpr放大100倍,或是1000倍。

7. 代码如下

import sys
import tensorflow as tf

class tf_roc():
    def __init__(self, predict_label_file, threshold_num, save_dir):
        '''file format: dataid,predict_score,label
        the predict_score should be between 0 and 1
        the label should be 0 or 1
        threshold_num: number of threshold will plot'''
        #load predict_label_file to predicts and labels
        fd = open(predict_label_file)
        fdl = fd.readline()
        self.predicts = []
        self.labels = []
        self.total = 0
        while len(fdl) > 0:
            val = fdl.split(',')
            self.predicts.append(float(val[1])) 
            self.labels.append(True if int(val[2]) == 1 else False) 
            fdl = fd.readline()
            self.total += 1
        fd.close()
        self.threshold_num = threshold_num
        self.trues = 0 #total of True labels 
        self.fpr = [] #false positive
        self.tpr = [] #true positive
        self.ths = [] #thresholds
        self.save_dir = save_dir
        self.writer = tf.train.SummaryWriter(self.save_dir)

    def calc(self):
        for label in self.labels:
            if label:
                self.trues += 1
        threshold_step = 1. / self.threshold_num
        for t in range(self.threshold_num + 1):
            th = 1 - threshold_step * t
            tn, tp, fp, fpr, tpr = self._calc_once(th)
            self.fpr.append(fpr)
            self.tpr.append(tpr)
            self.ths.append(th)
            self._save(fpr, tpr)
        print self.fpr
        print self.tpr
        print self.ths

    def _save(self, fpr, tpr):
        summt = tf.Summary()
        summt.value.add(tag="roc", simple_value = tpr)
        self.writer.add_summary(summt, fpr * 100) #for tensorboard step drawable
        self.writer.flush()

    def _calc_once(self, t):
        fp = 0
        tp = 0
        tn = 0
        for i in range(self.total):
            if not self.labels[i]:
                if self.predicts[i] >= t:
                    fp += 1
                else:
                    tn += 1
            elif self.predicts[i] >= t:
                tp += 1
        #fpr = fp / float(fp + tn) #precision
        fpr = fp / float(fp + tp) #detection
        tpr = tp / float(self.trues)
        return tn, tp, fp, fpr, tpr

        
if __name__ == '__main__':
    predict_label_file, threshold_num, save_dir = sys.argv[1:4]
    roc = tf_roc(predict_label_file, int(threshold_num), save_dir)
    roc.calc()


你可能感兴趣的:(深度学习)