基于sklearn的多元分类的svm
from data_test_kNN import file2matrix
from sklearn import svm
'''将文件数据转化为测试数据和训练数据'''
def get_trian_testdata(filename):
dataMat,dataClassLabel=file2matrix(filename)
return dataMat,dataClassLabel
'''训练svm模型'''
def train_svm(trainMat,trainClassLabel,decision_function_shape):
'''kernel : string, optional (default='rbf')
Specifies the kernel type to be used in the algorithm.
It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or
a callable.
If none is given, 'rbf' will be used. If a callable is given it is
used to pre-compute the kernel matrix from data matrices; that matrix
should be an array of shape ``(n_samples, n_samples)``.
'''
clf=svm.SVC(decision_function_shape)
clf.fit(trainMat,trainClassLabel)
return clf
'''识别率'''
def testAcc(clf,testMat,testClassLabel):
testAcc=clf.score(testMat,testClassLabel)
return testAcc
'''预测数据'''
def predict(clf,datMat):
res=clf.predict(datMat)
return res
'''主函数'''
if __name__ == "__main__":
trainMat,trainClassLabel=get_trian_testdata('trainingDigits/train.txt')
testMat,testClassLabel=get_trian_testdata('testDigits/test.txt')
print '训练数据集为:',len(trainClassLabel)
print '测试数据集为:',len(testClassLabel)
'''使用一对一的方式实现svm多元分类'''
#核函数为rbf,c=1
clf=svm.SVC(decision_function_shape='ovo')
clf.fit(trainMat,trainClassLabel)
test=clf.score(testMat,testClassLabel)
print '核函数为rbf的识别率:',test
#核函数为linear,c=1
clf=svm.SVC(decision_function_shape='ovo',kernel='linear')
clf.fit(trainMat,trainClassLabel)
test=clf.score(testMat,testClassLabel)
print '核函数为linear的识别率:',test
#核函数为poly,c=1
clf=svm.SVC(decision_function_shape='ovo',kernel='poly')
clf.fit(trainMat,trainClassLabel)
test=clf.score(testMat,testClassLabel)
print '核函数为poly的识别率:',test
#核函数为sigmoid,c=1
clf=svm.SVC(decision_function_shape='ovo',kernel='sigmoid')
clf.fit(trainMat,trainClassLabel)
test=clf.score(testMat,testClassLabel)
print '核函数为sigmoid的识别率:',test
'
''使用一对多的方式实现svm多元分类'''
#核函数为rbf,c=1
clf=svm.SVC(decision_function_shape='ovr')
clf.fit(trainMat,trainClassLabel)
test=clf.score(testMat,testClassLabel)
print '核函数为rbf的识别率:',test
#核函数为linear,c=1
clf=svm.SVC(decision_function_shape='ovr',kernel='linear')
clf.fit(trainMat,trainClassLabel)
test=clf.score(testMat,testClassLabel)
print '核函数为linear的识别率:',test
#核函数为poly,c=1
clf=svm.SVC(decision_function_shape='ovr',kernel='poly')
clf.fit(trainMat,trainClassLabel)
test=clf.score(testMat,testClassLabel)
print '核函数为poly的识别率:',test
#核函数为sigmoid,c=1
clf=svm.SVC(decision_function_shape='ovr',kernel='sigmoid')
clf.fit(trainMat,trainClassLabel)
test=clf.score(testMat,testClassLabel)
print '核函数为sigmoid的识别率:',test
基于sklearn的决策树
from data_test_kNN import file2matrix
from sklearn import tree
'''将文件数据转化为测试数据和训练数据'''
def get_trian_testdata(filename):
dataMat,dataClassLabel=file2matrix(filename)
return dataMat,dataClassLabel
'''
max_depth : int or None, optional (default=None)
The maximum depth of the tree. If None, then nodes are expanded until
all leaves are pure or until all leaves contain less than
min_samples_split samples(default=2).
Ignored if ``max_leaf_nodes`` is not None.
'''
'''主函数'''
if __name__ == "__main__":
trainMat,trainClassLabel=get_trian_testdata('trainingDigits/train.txt')
testMat,testClassLabel=get_trian_testdata('testDigits/test.txt')
print '训练数据集为:',len(trainClassLabel)
print '测试数据集为:',len(testClassLabel)
'''max_depth没被设置'''
clf=tree.DecisionTreeClassifier()
clf=clf.fit(trainMat,trainClassLabel)
testAcc=clf.score(testMat,testClassLabel)
print testAcc