输入的数据为csv格式,可输出混淆矩阵,可将训练好的模型给保存
import sklearn.utils
from sklearn.svm import SVC
import pandas as pd
from sklearn.utils.multiclass import unique_labels
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import numpy as np
import joblib
def plot_confusion_matrix(y_true, y_pred, classes,
normalize=False,
title=None,
cmap=plt.cm.Blues):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
if not title:
if normalize:
title = 'Normalized confusion matrix'
else:
title = 'Confusion matrix, without normalization'
# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred)
# Only use the labels that appear in the data
classes = classes[unique_labels(y_true, y_pred)]
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
#print("Normalized confusion matrix")
else:
pass
#print('Confusion matrix, without normalization')
#print(cm)
fig, ax = plt.subplots()
im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
ax.figure.colorbar(im, ax=ax)
# We want to show all ticks...
ax.set(xticks=np.arange(cm.shape[1]),
yticks=np.arange(cm.shape[0]),
# ... and label them with the respective list entries
xticklabels=classes, yticklabels=classes,
title=title,
ylabel='True label',
xlabel='Predicted label')
ax.set_ylim(len(classes)-0.5, -0.5)
# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
rotation_mode="anchor")
# Loop over data dimensions and create text annotations.
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i in range(cm.shape[0]):
for j in range(cm.shape[1]):
ax.text(j, i, format(cm[i, j], fmt),
ha="center", va="center",
color="white" if cm[i, j] > thresh else "black")
fig.tight_layout()
plt.show()
return ax
x_1=pd.read_csv('D:/桌面文件夹/重采样后/800维信号数据(csv)/train/train_data.csv')
x_train=x_1.iloc[:,0:23]
x_train_data = x_train.values
y_1 = pd.read_csv('D:/桌面文件夹/重采样后/800维信号数据(csv)/train/train_label.csv')
y_train = y_1.iloc[:,0:1]
y_train_data = y_train.values
x_2 = pd.read_csv('D:/桌面文件夹/重采样后/800维信号数据(csv)/test/test_data.csv')
x_test = x_2.iloc[:,0:23]
x_test_data = x_test.values
y_2 = pd.read_csv('D:/桌面文件夹/重采样后/800维信号数据(csv)/test/test_label.csv')
y_test = y_2.iloc[:,0:1]
y_test_data = y_test.values
y_train = np.ravel(y_train)
y_test = np.ravel(y_test)
x_train_data,y_train = sklearn.utils.shuffle(x_train_data,y_train,random_state=1)
x_test_data,y_test = sklearn.utils.shuffle(x_test_data,y_test,random_state=1)
clf1 = SVC(kernel='linear',probability=True)#不采用核函数
clf2 = SVC(kernel='rbf',C=0.1, gamma=0.1,probability=True)#采用高斯核函数
svm_1 = clf1.fit(x_train_data, y_train)
#scaler_filename = 'D:\AI\课题模拟\svm模型可输出概率/down_speed_svm.save'
#joblib.dump(svm_1, scaler_filename)
svm_2 = clf2.fit(x_train_data, y_train)
#joblib.dump(svm_2, scaler_filename)
clf1_output = clf1.score(x_test_data, y_test)
clf2_output = clf2.score(x_test_data, y_test)
clf2_predict = clf2.predict(x_test_data)
clf1_predict = clf1.predict(x_test_data)
print(clf1_predict)
class_names= np.array(['0','1','2','3','4','5','6','7','8','9'])
plot_confusion_matrix(y_test, clf1_predict, classes=class_names, normalize=False)
plot_confusion_matrix(y_test, clf2_predict, classes=class_names, normalize=False)
print('线性SVM的精度为{}:'.format(clf1_output))
print('高斯核函数SVM的精度为{}:'.format(clf2_output))
import sklearn.utils
from sklearn.svm import SVC
import pandas as pd
import numpy as np
x_1=pd.read_csv("D:\桌面文件夹\重采样后/after_train/train_data.csv")
x_train=x_1.iloc[:,0:23]
x_train_data = x_train.values
y_1 = pd.read_csv('D:\桌面文件夹\重采样后/after_train/train_label.csv')
y_train = y_1.iloc[:,0:1]
y_train_data = y_train.values
x_2 = pd.read_csv('D:\桌面文件夹\重采样后/after_test/test_data.csv')
x_test = x_2.iloc[:,0:23]
x_test_data = x_test.values
y_2 = pd.read_csv('D:\桌面文件夹\重采样后/after_test/test_label.csv')
y_test = y_2.iloc[:,0:1]
y_test_data = y_test.values
y_train = np.ravel(y_train)
y_test = np.ravel(y_test)
x_train_data,y_train = sklearn.utils.shuffle(x_train_data,y_train,random_state=1)
x_test_data,y_test = sklearn.utils.shuffle(x_test_data,y_test,random_state=1)
c = [i for i in np.arange(0.1,2,0.01)]
g = [i for i in np.arange(0.1,2,0.01)]
result = {}
score_list = []
x=1
for i in c:
for j in g:
clf2 = SVC(kernel='rbf', C=i, gamma=j)
clf2.fit(x_train_data, y_train)
clf2_output = clf2.score(x_test_data, y_test)
key_value={clf2_output:[i,j]}
result.update(key_value)
score_list.append(clf2_output)
print("第{}次,准确率为{}".format(x,clf2_output))
x+=1
max_key = max(score_list)
print('最高得分{}'.format(max_key))
print('参数值{}'.format(result[max_key]))
print('计算次数{}'.format(len(c)*len(g)))