机器学习-寻找最优超参数

from sklearn.cross_validation import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.grid_search import GridSearchCV
from loadFile import load_data
from plot_classifier import plot_classifier
import numpy as np
import matplotlib.pyplot as plt

input_file = 'data_multivar_imbalance.txt'
X, y = load_data(input_file)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=33)
#通过交叉验证设置参数
parameter_grid = [  {'kernel': ['linear'], 'C': [1, 10, 50, 600]},
                    {'kernel': ['poly'], 'degree': [2, 3]},
                    {'kernel': ['rbf'], 'gamma': [0.01, 0.001], 'C': [1, 10, 50, 600]},
                 ]
#定义需要使用的指标
metrics = ['precision', 'recall_weighted']
#为每个指标搜索最优超参数
for metric in metrics:
    print('Searching optimal hyperparameters for',metric)
    classifier = GridSearchCV(SVC(C=1),parameter_grid, cv=5, scoring=metric)
    classifier.fit(X_train, y_train)
    print("\nScores across the parameter grid:")
    for params, avg_score, _ in classifier.grid_scores_:
        print(params, '-->', round(avg_score, 3))

    print("\nHighest scoring parameter set:", classifier.best_params_)

    y_true, y_pred = y_test, classifier.predict(X_test)
    print("\nFull performance report:\n")
    print(classification_report(y_true, y_pred))

你可能感兴趣的:(Python机器学习及实践)