机器学习学习笔记--朴素贝叶斯算法来识别验证码

# -*- coding:utf-8 -*-

import re

import matplotlib.pyplot as plt

import os

from sklearn.feature_extraction.text import CountVectorizer

from sklearn import cross_validation

import os

from sklearn.naive_bayes import GaussianNB

import pickle#这个库可以用来持久化输入和读出

import gzip

def load_data():

with gzip.open('/home/qin/code/python/web-ml/1book-master/data/MNIST/mnist.pkl.gz') as fp:

#使用了 MNIST 这个入门级的计算机视觉数据集 包含各种手写数字

training_data,valid_data,test_data = pickle.load(fp)

return training_data,valid_data,test_data

if __name__ == '__main__':

training_data,valid_data,test_data=load_data()

x1,y1=training_data

x2,y2=test_data

clf = GaussianNB()

clf.fit(x1,y1)#用高斯朴素贝叶斯训练训练集x1 y1

print cross_validation.cross_val_score(clf,x2,y2,scoring="accuracy")

#用x2,y2作为测试集 用交叉熵来得到训练的分数

输出

qin@qin-X556UB:~/code/python/beiyesi$ python jiancheyazhengma.py

/usr/local/lib/python2.7/dist-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.

"This module will be removed in 0.20.", DeprecationWarning)

[ 0.53684841  0.58385839  0.6043857 ]

你可能感兴趣的:(机器学习学习笔记--朴素贝叶斯算法来识别验证码)