SVM 二分类与模型评估参数

code:

#正常输出中文  
import io  
import sys  
sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding = 'utf-8')  
  
#Accuracy、AUC、Recall、Precision、F1-Score  
from sklearn.datasets import make_classification  
from sklearn import svm  
from sklearn import metrics  
import pandas as pd   
import numpy as np  

def Get_Accuracy(y_true, y_pred): #Accuracy 准确率:分类器正确分类的样本数与总样本数之比 
    accuracy = metrics.accuracy_score(y_true,y_pred)  
    return accuracy

def Get_Precision_score(y_true, y_pred): #Precision:精准率 正确被预测的正样本(TP)占所有被预测为正样本(TP+FP)的比例. 
    precision = metrics.precision_score(y_true,y_pred)  
    return precision

def Get_Recall(y_true, y_pred): #Recall 召回率 正确被预测的正样本(TP)占所有真正 正样本(TP+FN)的比例.  
    Recall = metrics.recall_score(y_true,y_pred)  
    return Recall 

def Get_f1_score(y_true, y_pred): #F1-score: 精确率(precision)和召回率(Recall)的调和平均数  
    f1_score = metrics.f1_score(y_true,y_pred)  
    return f1_score

def Get_Auc_value(y_true,y_proba):  
    #fpr, tpr, thresholds = metrics.roc_curve(samples_test_y, proba_pred_y, pos_label=2)    
    auc = metrics.roc_auc_score(y_true, y_proba)  
    return auc

def main():    
    #随机生成10000个样本数据,每个样本包含500个特征  
    samples = make_classification(n_samples = 10000, n_features = 500,  
                                n_classes = 2, random_state = 1,  
                                weights = [0.15], flip_y = 0.1)  
    #截取前面9000个样本作为训练集,末尾1000个样本作为测试集                              
    samples_train_x = samples[0][:-1000]  
    samples_train_y = samples[1][:-1000]  
    samples_test_x = samples[0][-1000:]  
    samples_test_y = samples[1][-1000:]   
    
    #print(samples)  
    #df = pd.DataFrame(samples[0],samples[1])  
    #df.to_csv("samples.csv")  
    
    #构造并训练SVM分类器模型  
    clf = svm.SVC(probability=True)  
    clf.fit(samples_train_x,samples_train_y)  

    #预测最后1000个样本的分类结果  
    y_predict = clf.predict(samples_test_x)  
 #   print("SVM测试集预测结果:")  
 #   print(y_predict)  

    y_pred_probability = clf.predict_proba(samples_test_x)  
    print(y_pred_probability)  
    
    df2 = pd.DataFrame(y_pred_probability)  
    proba_pred_y = np.array(df2[1])  #截取样本点预测为正样本的预测概率
    #df2.to_csv("pred_probability.csv")  
    #print(proba_pred_y) 
     
    score = clf.score(samples_test_x,samples_test_y)  
    print("SVM 模型打分: Score = %f"%score)  
    accuracy = Get_Accuracy(samples_test_y,y_predict)
    print("SVM Accuracy_Score = %f"%accuracy)  
    precision = Get_Precision_score(samples_test_y,y_predict)
    print("SVM Precision = %f"%precision)
    recall = Get_Recall(samples_test_y,y_predict)
    print("SVM Recall = %f"%recall) 
    f1_score = Get_f1_score(samples_test_y,y_predict)
    print("SVM F1-Score  = %f"%f1_score) 
    auc = Get_Auc_value(samples_test_y, proba_pred_y)
    print("SVM AUC value: AUC = %f"%auc)  

if __name__ == '__main__':
    main()

运行截图:

SVM 二分类与模型评估参数_第1张图片

你可能感兴趣的:(机器学习)