# 将自己制作的图片转换成csv可以用的格式
import scipy.misc
import numpy
import matplotlib.pyplot
image_file_name = "D:\\vsc_pro\\CNN\\data\\3\\0.png"
img_array = scipy.misc.imread(image_file_name, flatten=True)
img_data = 255.0-img_array.reshape(784)
img_array = img_data.reshape((28, 28))
# img_data = (img_data/255.0*0.99)+0.01
# 绘制数组,第一个参数是数组,第二个参数是绘制方式,Greys是灰度调色板
matplotlib.pyplot.imshow(img_array, cmap='Greys', interpolation='None')
# 显示上面的画的图像
# 书上作者提供的读取PNG文件的代码
# helper to load data from PNG image files
# imageio模块可以用于图片的读写
import imageio
# glob helps select multiple files using patterns
# glob模块可以查找符合特定规则的文件路径名。
import glob
import numpy
import matplotlib.pyplot
# our own image test data set
# 存放写入文件的内容
our_own_dataset = []
# 读入每个文件,从一个模糊匹配的文件名中查找,应该是glob专有
for image_file_name in glob.glob('D:\\vsc_pro\\CNN\\data\\5\\?.png'):
# 输出一行提示
print("loading ... ", image_file_name)
# use the filename to set the correct label
# 提取标签
label = int(image_file_name[-5:-4])
# load image data from png files into an array
# 使用imageio模块的imread函数,第一个是文件名,第二个是设置灰度
img_array = imageio.imread(image_file_name, as_gray=True)
# reshape from 28x28 to list of 784 values, invert values
# 常规而言,0是黑色,255是白色,但是MNIST数据集用相反的方式表示,所以要逆转过来
# 将从文件中读出的28*28的数据变成一行784个数
img_data = 255.0 - img_array.reshape(784)
# then scale data to range from 0.01 to 1.0
# 转变成需要的输入,这个我在训练函数和测试函数中写了
img_data = (img_data / 255.0 * 0.99) + 0.01
# 输出最大值和最小值,用于提示
# append label and image data to test data set
# 把标签和内容连接起来,numpy的append函数,numpy不能改变长度,所以只好两个一起连
record = numpy.append(label, img_data)
# 输出当前记录
# 连接到总共的数据中
28, 28), cmap='Greys', interpolation='None')
# 用自己写的图片的数据进行测试
# make your own neural network
# code for a 3-layer neural network, and code for learning the MNIST dataset
import numpy
# scipy.special for the sigmoid function expit()
import scipy.special
# library for plotting arrays
import matplotlib.pyplot
# 这个用于判读文件是否存在
import os
# neural network class definition
class neuralNetwork:
# initialise the neural network
# 初始化神经网络
# inputnodes,hiddennodes,outputnodes分别是输入层,隐藏层和输出层网络节点的个数
# learningrate是学习率
def __init__(self, inputnodes, hiddennodes, outputnodes, learningrate):
# set number of nodes in each input, hidden, output layer
# 设置输入,隐藏和输出层节点的数量
self.inodes = inputnodes
self.hnodes = hiddennodes
self.onodes = outputnodes
# link weight matrices, wih an who
# 链接权重矩阵
# wih是输入层和隐藏层之间的链接权重矩阵W_input_hidden
# who是输入层和隐藏层之间的链接权重矩阵W_hidden_output
# weights inside the arrays are w_i_j, where link is from node i to node j in the next layer
# 数组里的权重是wij,其中链接是从节点i到节点j的下一层
# numpy.random.normal(a,b,(X,Y))的意思是生成一个随机数组,数组大小为X*Y,内容服从中心值为a,方差为b
# 如果有存入文件的权值,就使用,否自初始化一个
if os.access("wih.csv", os.F_OK):
self.wih = numpy.loadtxt(
open("wih.csv", "rb"), delimiter=",", skiprows=0)
self.wih = numpy.random.normal(
0.0, pow(self.inodes, -0.5), (self.hnodes, self.inodes))
if os.access("who.csv", os.F_OK):
self.who = numpy.loadtxt(
open("who.csv", "rb"), delimiter=",", skiprows=0)
self.who = numpy.random.normal(
0.0, pow(self.inodes, -0.5), (self.onodes, self.hnodes))
# learning rate
# 设置学习率
self.lr = learningrate
# activation function is the sigmod function
self.activation_function = lambda x: scipy.special.expit(x)
# train the neural network
def train(self, inputs_list, targets_list):
# vonvert inputs list to 2d array
inputs = numpy.array(inputs_list, ndmin=2).T
targets = numpy.array(targets_list, ndmin=2).T
# calculate signals into hidden layer
# 计算隐藏层的输入
# numpy.dot(X,Y)的意思是两个数组的点乘
hidden_inputs = numpy.dot(self.wih, inputs)
# calculate the signals emerging from hidden layer
# 计算隐藏层的输出
hidden_outputs = self.activation_function(hidden_inputs)
# calculate signals into final output layer
# 计算输出层的输入
final_inputs = numpy.dot(self.who, hidden_outputs)
# calculate the signals emerging from final output layer
# 计算输出层的输出
final_outputs = self.activation_function(final_inputs)
# output layer error is the (target-actual)
# 计算输出层的误差
output_error = targets-final_outputs
# hidden layer error is the output_error, split by weight,recombined at hidden nodes
# 计算隐藏层的误差
hidden_error = numpy.dot(self.who.T, output_error)
# update the weights for the links between the hidden and output layers
# 更新隐藏层到输出层的权重
# 下面行末尾加的反斜杠\的意思是编译的时候忽略换行符
# 如果一行写不下,在代码末尾加上“\”即可
# 另外,在括号() {} [] 中的代码不需要换行符“\”,直接换行即可达到同样的效果
self.who += self.lr * \
# update the weights for the links between the input and hidden layers
# 更新输入层到隐藏层的权重
self.wih += self.lr * \
numpy.dot((hidden_error*hidden_outputs *
(1.0-hidden_outputs)), numpy.transpose(inputs))
# query the neural network
def query(self, inputs_list):
# convert inputs list to 2d array
inputs = numpy.array(inputs_list, ndmin=2).T
# calculate signals into hidden layer
# 计算隐藏层的输入
# numpy.dot(X,Y)的意思是两个数组的点乘
hidden_inputs = numpy.dot(self.wih, inputs)
# calculate the signals emerging from hidden layer
# 计算隐藏层的输出
hidden_outputs = self.activation_function(hidden_inputs)
# calculate signals into final output layer
# 计算输出层的输入
final_inputs = numpy.dot(self.who, hidden_outputs)
# calculate the signals emerging from final output layer
# 计算输出层的输出
final_outputs = self.activation_function(final_inputs)
return final_outputs
# number of input, hidden and output nodes
# 设置输入,隐藏和输出层节点的数量
# 输出层有28*28=784个数据
input_nodes = 784
# 这个自己随便设的
hidden_nodes = 200
# 按本例子的的方案,输出有10中,结点有10个
output_nodes = 10
# learning rate is 0.3
# 设置学习率为0.3
learning_rate = 0.2
# create instance of neural network
# 创建一个神经网络的实例
n = neuralNetwork(input_nodes, hidden_nodes, output_nodes, learning_rate)
# 测试部分代码
# load the mnist test data CSV file into a list
# 读测试数据文件
test_data_file = open("data.csv", 'r')
test_data_list = test_data_file.readlines()
no = 0
# test the neural network
# scorecard for how well the network perform, initially empty
scorecard = []
# go through all the records in the test data set
for record in test_data_list:
# split the record by the ',' commas
all_values = record.split(',')
# correct answer is first value
correct_label = float(all_values[0])
correct_label = int(correct_label)
# print(correct_label, "correct label")
# scale and shift the inputs
inputs = (numpy.asfarray(all_values[1:])/255.0*0.99)+0.01
# query the network
outputs = n.query(inputs)
# the index of the highest value corresponds to the label
label = numpy.argmax(outputs)
# print(label, "network's answer")
# append correct or incorrect to list
if (label == correct_label):
# network's answer matches correct answer, add 1 to scorecard
# network's answer doesn't match correct answer, add O to scorecard
print(correct_label, "correct label")
print(label, "network's answer")
image_array = numpy.asfarray(all_values[1:]).reshape((28, 28))
image_array, cmap='Greys', interpolation='None')
no += 1
# calculate the performance score, the fraction of correct answers
scorecard_array = numpy.asarray(scorecard)
print("performance = ", scorecard_array.sum()/scorecard_array.size)
# number of input, hidden and output nodes
input_nodes = 784
hidden_nodes = 200
output_nodes = 10
# learning rate
learning_rate = 0.1
# create instance of neural network
n = neuralNetwork(input_nodes,hidden_nodes,output_nodes, learning_rate)
# load the mnist training data CSV file into a list
training_data_file = open("mnist_dataset/mnist_train.csv", 'r')
training_data_list = training_data_file.readlines()
# train the neural network
# epochs is the number of times the training data set is used for training
epochs = 5
for e in range(epochs):
# go through all records in the training data set
for record in training_data_list:
# split the record by the ',' commas
all_values = record.split(',')
# scale and shift the inputs
inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
# create the target output values (all 0.01, except the desired label which is 0.99)
targets = numpy.zeros(output_nodes) + 0.01
# all_values[0] is the target label for this record
targets[int(all_values[0])] = 0.99
n.train(inputs, targets)
# load the mnist test data CSV file into a list
test_data_file = open("mnist_dataset/mnist_test.csv", 'r')
test_data_list = test_data_file.readlines()
# test the neural network
# scorecard for how well the network performs, initially empty
scorecard = []
# go through all the records in the test data set
for record in test_data_list:
# split the record by the ',' commas
all_values = record.split(',')
# correct answer is first value
correct_label = int(all_values[0])
# scale and shift the inputs
inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
# query the network
outputs = n.query(inputs)
# the index of the highest value corresponds to the label
label = numpy.argmax(outputs)
# append correct or incorrect to list
if (label == correct_label):
# network's answer matches correct answer, add 1 to scorecard
# network's answer doesn't match correct answer, add 0 to scorecard
# calculate the performance score, the fraction of correct answers
scorecard_array = numpy.asarray(scorecard)
print ("performance = ", scorecard_array.sum() / scorecard_array.size)
# run the network backwards, given a label, see what image it produces
# label to test
label = 0
# create the output signals for this label
targets = numpy.zeros(output_nodes) + 0.01
# all_values[0] is the target label for this record
targets[label] = 0.99
# get image data
image_data = n.backquery(targets)
# plot image data
matplotlib.pyplot.imshow(image_data.reshape(28,28), cmap='Greys', interpolation='None')
# number of input, hidden and output nodes
# 设置输入,隐藏和输出层节点的数量
# 输出层有28*28=784个数据
input_nodes = 784
# 这个自己随便设的
hidden_nodes = 200
# 按本例子的的方案,输出有10中,结点有10个
output_nodes = 10
# learning rate is 0.2
# 设置学习率为0.2
learning_rate = 0.2
# create instance of neural network
# 创建一个神经网络的实例
n = neuralNetwork(input_nodes, hidden_nodes, output_nodes, learning_rate)
# run the network backwards, given a label, see what image it produces
for i in range(10):
# label to test
label = i
# create the output signals for this label
targets = numpy.zeros(output_nodes) + 0.01
# all_values[0] is the target label for this record
targets[label] = 0.99
# get image data
image_data = n.backquery(targets)
# plot image data
28, 28), cmap='Greys', interpolation='None')
# 旋转图片作者的代码
import numpy
# 显示图像的操作需要用到这个模块
import matplotlib.pyplot
# scipy.ndimage for rotating image arrays
# 旋转图片的操作需要用到这个模块
import scipy.ndimage
# open the CSV file and read its contents into a list
# 打开测试集的文件,获取数据
data_file = open("mnist_dataset/mnist_test.csv", 'r')
data_list = data_file.readlines()
# which record will be use
# 选择某条记录
record = 6
# scale input to range 0.01 to 1.00
# 对选定的记录进行处理
all_values = data_list[record].split(',')
scaled_input = (
(numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01).reshape(28, 28)
# plot the original image
# 显示原图像
matplotlib.pyplot.imshow(scaled_input, cmap='Greys', interpolation='None')
# create rotated variations
# 创建旋转后的数组
# rotated anticlockwise by 10 degrees
# 逆时针旋转10度后的数组
# 第一个参数为原数据,第二个参数为旋转的度数,顺时针为负,逆时针为证
# 第三个参数为空白区域填充的数值,第四个参数为几次方样条数据差值(不明觉厉),范围是0~5
# 第五个参数为是否改变形状,false是否
inputs_plus10_img = scipy.ndimage.rotate(
scaled_input, 10.0, cval=0.01, order=1, reshape=False)
# rotated clockwise by 10 degrees
# 顺时针旋转10度后的数组
inputs_minus10_img = scipy.ndimage.rotate(
scaled_input, -10.0, cval=0.01, order=1, reshape=False)
# plot the +10 degree rotated variation
# 逆时针旋转10度后的图像
# 第一个参数是要画图的数据,第二个参数是灰度显示,
# 第三个参数是interpolation=None(无差值),
matplotlib.pyplot.imshow(inputs_plus10_img, cmap='Greys', interpolation='None')
# plot the +10 degree rotated variation
# 顺时针旋转10度后的图像
inputs_minus10_img, cmap='Greys', interpolation='None')
