机器学习算法完整版见fenghaootong-github
MINST for SVM
导入模块
from sklearn import svm
import pandas as pd
import time
导入数据
df = pd.read_csv('../DATA/train.csv')
labels = df.as_matrix(columns=['label'])#find lable to transform to matrix
dataset = df.drop('label', axis=1).as_matrix()#transform dataset to matrxi without drop lable
dataset = dataset / (28.0*28.0)
int(len(labels.ravel()) * 0.75)
31500
数据分为训练和验证集
train_len = int(len(labels.ravel()) * 0.75)
train_dataset = dataset[:train_len]
train_labels = labels[:train_len]
valid_dataset = dataset[train_len:]
valid_labels = labels[train_len:]
train_labels.ravel()
array([1, 0, 1, ..., 2, 9, 5])
模型训练
t0 = time.time()
clf = svm.SVC(C=10000.0,kernel='rbf')
clf.fit(train_dataset, train_labels.ravel())
print("train-time:",round(time.time() - t0, 3), "s")
train-time: 115.624 s
模型预测
predictions = [int(a) for a in clf.predict(valid_dataset)]
#predictions
sum = 0
for a, y in zip(predictions, valid_labels.ravel()):
if a == y:
sum = sum + 1
print ("%s of %s test values correct.\ntest accuracy: %f" % (sum, len(valid_labels.ravel()), sum / len(valid_labels.ravel())))
950 of 1050 test values correct.
test accuracy: 0.904762
换参比较
def svm_baseline(kernel):
sum = 0
t0 = time.time()
clf = svm.SVC(C=10000.0,kernel=kernel)
clf.fit(train_dataset, train_labels.ravel())
print("train-time:",round(time.time() - t0, 3), "s")
predictions = [int(a) for a in clf.predict(valid_dataset)]
for a, y in zip(predictions, valid_labels.ravel()):
if a == y:
sum = sum + 1
print ("%s of %s test values correct.\ntest accuracy: %f" % (sum, len(valid_labels.ravel()), sum / len(valid_labels.ravel())))
kernels = ['rbf','linear']
for kernel in kernels:
svm_baseline(kernel)
train-time: 2.6730403900146484 s
951 of 1050 test values correct.
test accuracy: 0.905714
train-time: 2.0404305458068848 s
950 of 1050 test values correct.
test accuracy: 0.904762
SVM参数