基于sklearn几种分类算法

基于sklearn的多元分类的svm

from data_test_kNN import file2matrix

from sklearn import svm
'''将文件数据转化为测试数据和训练数据'''
def get_trian_testdata(filename):
    dataMat,dataClassLabel=file2matrix(filename)
    return dataMat,dataClassLabel


'''训练svm模型'''
def train_svm(trainMat,trainClassLabel,decision_function_shape):
    '''kernel : string, optional (default='rbf')
         Specifies the kernel type to be used in the algorithm.
         It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or
         a callable.
         If none is given, 'rbf' will be used. If a callable is given it is
         used to pre-compute the kernel matrix from data matrices; that matrix
         should be an array of shape ``(n_samples, n_samples)``.
    '''
    clf=svm.SVC(decision_function_shape)
    clf.fit(trainMat,trainClassLabel)
    return clf


'''识别率'''
def testAcc(clf,testMat,testClassLabel):
    testAcc=clf.score(testMat,testClassLabel)
    return testAcc


'''预测数据'''
def predict(clf,datMat):
    res=clf.predict(datMat)
    return res  


'''主函数'''    
if __name__ == "__main__":
    trainMat,trainClassLabel=get_trian_testdata('trainingDigits/train.txt')
    testMat,testClassLabel=get_trian_testdata('testDigits/test.txt')
    print '训练数据集为:',len(trainClassLabel)

    print '测试数据集为:',len(testClassLabel)


   '''使用一对一的方式实现svm多元分类'''

    #核函数为rbf,c=1
    clf=svm.SVC(decision_function_shape='ovo')
    clf.fit(trainMat,trainClassLabel)
    test=clf.score(testMat,testClassLabel)
    print '核函数为rbf的识别率:',test
    #核函数为linear,c=1
    clf=svm.SVC(decision_function_shape='ovo',kernel='linear')
    clf.fit(trainMat,trainClassLabel)
    test=clf.score(testMat,testClassLabel)
    print '核函数为linear的识别率:',test
    #核函数为poly,c=1
    clf=svm.SVC(decision_function_shape='ovo',kernel='poly')
    clf.fit(trainMat,trainClassLabel)
    test=clf.score(testMat,testClassLabel)
    print '核函数为poly的识别率:',test
    #核函数为sigmoid,c=1
    clf=svm.SVC(decision_function_shape='ovo',kernel='sigmoid')
    clf.fit(trainMat,trainClassLabel)
    test=clf.score(testMat,testClassLabel)
    print '核函数为sigmoid的识别率:',test


    ' ''使用一对多的方式实现svm多元分类'''
    #核函数为rbf,c=1
    clf=svm.SVC(decision_function_shape='ovr')
    clf.fit(trainMat,trainClassLabel)
    test=clf.score(testMat,testClassLabel)
    print '核函数为rbf的识别率:',test
    #核函数为linear,c=1
    clf=svm.SVC(decision_function_shape='ovr',kernel='linear')
    clf.fit(trainMat,trainClassLabel)
    test=clf.score(testMat,testClassLabel)
    print '核函数为linear的识别率:',test
    #核函数为poly,c=1
    clf=svm.SVC(decision_function_shape='ovr',kernel='poly')
    clf.fit(trainMat,trainClassLabel)
    test=clf.score(testMat,testClassLabel)
    print '核函数为poly的识别率:',test
    #核函数为sigmoid,c=1
    clf=svm.SVC(decision_function_shape='ovr',kernel='sigmoid')
    clf.fit(trainMat,trainClassLabel)
    test=clf.score(testMat,testClassLabel)

    print '核函数为sigmoid的识别率:',test


基于sklearn的决策树

from data_test_kNN import file2matrix
from sklearn import tree


'''将文件数据转化为测试数据和训练数据'''
def get_trian_testdata(filename):
    dataMat,dataClassLabel=file2matrix(filename)
    return dataMat,dataClassLabel
'''
   max_depth : int or None, optional (default=None)
        The maximum depth of the tree. If None, then nodes are expanded until
        all leaves are pure or until all leaves contain less than
        min_samples_split samples(default=2).
        Ignored if ``max_leaf_nodes`` is not None.
'''
'''主函数'''    
if __name__ == "__main__":
    trainMat,trainClassLabel=get_trian_testdata('trainingDigits/train.txt')
    testMat,testClassLabel=get_trian_testdata('testDigits/test.txt')
    print '训练数据集为:',len(trainClassLabel)
    print '测试数据集为:',len(testClassLabel)
    '''max_depth没被设置'''
    clf=tree.DecisionTreeClassifier()
    clf=clf.fit(trainMat,trainClassLabel)
    testAcc=clf.score(testMat,testClassLabel)
    print testAcc

    

你可能感兴趣的:(机器学习)