Python实现简单神经网络

使用Python + NumPy库,实现神经网络。

该网络具有一层输入层,一层隐藏层,一层输出层。

求梯度采用的方法是数值微分求梯度。

由于该算法效率较差,所以只训练了10次。

最终的正确率停留在9%左右。

唯一的感觉就是数值积分确实好理解,但是太慢了!

from dataset.mnist import load_mnist
import numpy as np

batch_size = 100
learning_rate = 0.01
hidden_size = 10

# 隐藏层激活函数
def sigmoid(x):
	return 1 / (1 + np.exp(-x))

# 输出层激活函数 分类一般使用softmax
# 输入信号的指数 / 所有信号的指数和
def softmax(x):
	t = np.max(x)
	exp_a = np.exp(x - t)
	sum_exp_a = np.sum(exp_a)
	return exp_a / sum_exp_a

# 交叉熵损失函数
def cross_entropy_loss(y, t):
	if y.ndim == 1:
		t = t.reshape(1, t.size)
		y = y.reshape(1, y.size)

	batch_size = y.shape[0]
	return -np.sum(t * np.log(y + 1e-7)) / batch_size

# 梯度下降 计算损失函数对指定变量的数值微分
# f'(x) = (f(x+h) - f(x-h)) / 2h
def numerical_gradient(f, x):
	h = 1e-4
	gradient = np.zeros_like(x)

	it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
	# while it.hasnext():
	# 遍历参数x里的每一个值,进行微量修改
	while not it.finished:
		idx = it.multi_index
		tmp_val = x[idx]
		
		x[idx] = tmp_val + h
		fxh1 = f(x) # 计算 f(x+h)
		x[idx] = tmp_val - h
		fxh2 = f(x) # 计算 f(x-h)

		# 数值微分求导
		gradient[idx] = (fxh1 - fxh2) / (2*h)
		x[idx] = tmp_val
		it.iternext()

	return gradient


# 神经网络
class NN:

	# 定义参数 权重和偏置
	def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
		self.param = {}
		self.param['W1'] = np.random.randn(input_size, hidden_size) * weight_init_std
		self.param['b1'] = np.zeros(hidden_size)
		self.param['W2'] = np.random.randn(hidden_size, output_size) * weight_init_std
		self.param['b2'] = np.zeros(output_size)

	# 预测 前向传播
	def predict(self, x):
		z = sigmoid(np.dot(x, self.param['W1']) + self.param['b1'])
		y = softmax(np.dot(z, self.param['W2']) + self.param['b2'])
		return y

	# 计算损失函数
	def loss(self, x, y_):
		return cross_entropy_loss(self.predict(x), y_)

	# 训练 反向传播修正参数
	def train(self, x, y_):
		# 这里定义lambda表达式会自动保存一份(x, y_)
		# 所以即使在别的区域调用该函数也不会因为变量生存期而出错
		loss_f = lambda W: self.loss(x, y_)
		self.param['W1'] += learning_rate * numerical_gradient(loss_f, self.param['W1'])
		self.param['b1'] += learning_rate * numerical_gradient(loss_f, self.param['b1'])
		self.param['W2'] += learning_rate * numerical_gradient(loss_f, self.param['W2'])
		self.param['b2'] += learning_rate * numerical_gradient(loss_f, self.param['b2'])

	# 测试 测试准确率
	def test(self, x, y_):
		y = self.predict(x)
		return np.sum(np.argmax(y, axis=1) == np.argmax(y_, axis=1)) / x.shape[0]

# 随机从数据中取出一个小批量数据集
def next_batch(x, t, batch_size):
	batch_mask = np.random.choice(x.shape[0], batch_size)
	x_batch = x[batch_mask]
	t_batch = t[batch_mask]
	return x_batch, t_batch


(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, flatten=True, one_hot_label=True)
print(x_train.shape)	# (60000, 784)
print(t_train.shape)	# (60000, 10)
print(x_test.shape)		# (10000, 784)
print(t_test.shape)		# (10000, 10)


nn = NN(784, hidden_size, 10)

# 训练比较慢,所以只训练10次
for step in range(10):
	print("training %i step"%step)
	mini_batch = next_batch(x_train, t_train, batch_size)
	nn.train(mini_batch[0], mini_batch[1])

print(nn.test(x_test, t_test))

 

你可能感兴趣的:(python,神经网络)