神经网络用于解决XOR问题也算是一个神经网络的一个重大突破。
首先,我们需要知道什么是抑或问题,这里我就不多说了。
抑或问题有4个输入,1个输出。输入是二维的,且每维要么是0要么是1,输出是一维的,要么是0要么是1.
当输入为0,0或者1,1时,输出为0,当输入为1,0或者0,1是输出我1.
各位可以自行将其画到直角坐标系中,这是用线性分类器无法解决的。
本文想搭建一个神经网络架构来训练参数。
但是要求,架构是可调的,也就是说隐层的数量,学习率等都是可调的。
最后得到参数并进行预测。
在预测时,将结果使用3D图画出来,以表示分类结果。
这里的3D是指,虽然输入中每个维度要么是0要么是1,但是预测时,每一个维度可以是0到1之间的一个小数。
比如,测试数据是0.2,0.2,我们知道这个数据距离0,0这点事最近的,我们会将其近似的看做就是0,0这个点,然后也是有对应输出的,比如和0,0这个点相同。
这里我将这个问题看做分类问题,所以最后一层采用Softmax分类器。
激活函数就使用最原始的sigmoid函数。
这里程序分为两个部分,一个部分负责搭建框架,另一个部分负责输入数据并进行训练。
代码可以直接去CSDN下载中搜索后下载。
框架程序:
(基本上根据cs231n的课程作业修改得到)
# coding=utf-8
import numpy as np
def basic_forard(x, w, b):
x = x.reshape(x.shape[0], -1)
out = np.dot(x, w) + b
cache = (x, w, b)
return out, cache
def basic_backward(dout, cache):
x, w, b = cache
dx = np.dot(dout, w.T)
# dx = np.reshape(dx, x.shape)
# x = x.reshape(x.shape[0], -1)
dw = np.dot(x.T, dout)
db = np.reshape(np.sum(dout, axis=0), b.shape)
return dx, dw, db
def sigmoid_forward(x):
x = x.reshape(x.shape[0], -1)
out = 1 / (1 + np.exp(-1 * x))
cache = out
return out, cache
def sigmoid_backward(dout, cache):
out = cache
dx = out * (1 - out)
dx *= dout
return dx
def basic_sigmoid_forward(x, w, b):
basic_out, basic_cache = basic_forard(x, w, b)
sigmoid_out, sigmoid_cache = sigmoid_forward(basic_out)
cache = (basic_cache, sigmoid_cache)
return sigmoid_out, cache
def basic_sigmoid_backward(dout, cache):
basic_cache, sigmoid_cache = cache
dx_sigmoid = sigmoid_backward(dout, sigmoid_cache)
dx, dw, db = basic_backward(dx_sigmoid, basic_cache)
return dx, dw, db
def softmax_loss(x, y):
shifted_logits = x - np.max(x, axis=1, keepdims=True)
Z = np.sum(np.exp(shifted_logits), axis=1, keepdims=True)
log_probs = shifted_logits - np.log(Z)
probs = np.exp(log_probs)
N = x.shape[0]
loss = -np.sum(log_probs[np.arange(N), y]) / N
dx = probs.copy()
dx[np.arange(N), y] -= 1
dx /= N
# print(x.shape)
# print(y.shape)
# print(dx.shape)
return loss, dx
class muliti_layer_net(object):
def __init__(self, hidden_dim, input_dim=2, num_classes=2, dtype=np.float32, seed=None, reg=0.0):
self.num_layers = 1 + len(hidden_dim)
self.dtype = dtype
self.reg = reg
self.params = {}
# init all parameters
layers_dims = [input_dim] + hidden_dim + [num_classes]
for i in range(self.num_layers):
self.params['W' + str(i + 1)] = np.random.randn(layers_dims[i], layers_dims[i + 1])
self.params['b' + str(i + 1)] = np.zeros((1, layers_dims[i + 1]))
def loss(self, X, y=None):
X = X.astype(self.dtype)
mode = 'test' if y is None else 'train'
# compute the forward data and cache
basic_sigmoid_cache = {}
layer_input = X
for lay in range(self.num_layers):
layer_input, basic_sigmoid_cache[lay] = basic_sigmoid_forward(layer_input,
self.params['W' + str(lay + 1)],
self.params['b' + str(lay + 1)])
score = layer_input
# print(score.shape)
if mode == 'test':
return score
# compute the gradient
loss, dscore = softmax_loss(score, y)
dx = dscore
grads = {}
for index in range(self.num_layers):
lay = self.num_layers - index - 1
loss += 0.5 * self.reg * np.sum(self.params['W' + str(lay + 1)] * self.params['b' + str(lay + 1)])
dx, dw, db = basic_sigmoid_backward(dx, basic_sigmoid_cache[lay])
grads['W' + str(lay + 1)] = dw + self.reg * self.params['W' + str(lay + 1)]
grads['b' + str(lay + 1)] = db
return loss, grads
def sgd_momentum(w, dw, config=None):
if config is None: config = {}
config.setdefault('learning_rate', 1e-2)
config.setdefault('momentum', 0.9)
v = config.get('velocity', np.zeros_like(w))
v = config['momentum'] * v - config['learning_rate'] * dw
next_w = w + v
config['velocity'] = v
return next_w, config
class Solver(object):
def __init__(self, model, data, **kwargs):
self.model = model
self.X_train = data['X_train']
self.y_train = data['y_train']
self.X_val = data['X_val']
self.y_val = data['y_val']
self.update_rule = kwargs.pop('update_rule', 'sgd_momentum')
self.optim_config = kwargs.pop('optim_config', {})
self.lr_decay = kwargs.pop('lr_decay', 1.0)
self.batch_size = kwargs.pop('batch_size', 100)
self.num_epochs = kwargs.pop('num_epochs', 10)
self.print_every = kwargs.pop('print_every', 10)
self.verbose = kwargs.pop('verbose', True)
if len(kwargs) > 0:
extra = ', '.join('"%s"' % k for k in kwargs.keys())
raise ValueError('Unrecognized argements %s' % extra)
# if not hasattr(optim, self.update_rule):
# raise ValueError('Invalid update_rule "%s"' % self.update_rule)
# self.update_rule = getattr(optim, self.update_rule)
self._reset()
def _reset(self):
"""
Set up some book-keeping variables for optimization. Don't call this
manually.
"""
# Set up some variables for book-keeping
self.epoch = 0
self.best_val_acc = 0
self.best_params = {}
self.loss_history = []
self.train_acc_history = []
self.val_acc_history = []
self.optim_configs = {}
for p in self.model.params:
d = {k: v for k, v in self.optim_config.items()}
self.optim_configs[p] = d
def _step(self):
num_train = self.X_train.shape[0]
batch_mask = np.random.choice(num_train, self.batch_size)
X_batch = self.X_train[batch_mask]
y_batch = self.y_train[batch_mask]
loss, grads = self.model.loss(X_batch, y_batch)
self.loss_history.append(loss)
for p, w in self.model.params.items():
dw = grads[p]
config = self.optim_configs[p]
next_w, next_config = sgd_momentum(w, dw, config)
self.model.params[p] = next_w
self.optim_configs[p] = next_config
def check_accuracy(self, X, y, num_samples=None, batch_size=100):
N = X.shape[0]
if num_samples is not None and N > num_samples:
mask = np.random.choice(N, num_samples)
N = num_samples
X = X[mask]
y = y[mask]
num_batches = N / batch_size
if N % batch_size != 0:
num_batches += 1
y_pred = []
for i in range(int(num_batches)):
start = i * batch_size
end = (i + 1) * batch_size
scores = self.model.loss(X[start:end])
y_pred.append(np.argmax(scores, axis=1))
y_pred = np.hstack(y_pred)
acc = np.mean(y_pred == y)
return acc
def train(self):
num_train = self.X_train.shape[0]
iterations_per_epoch = max(num_train / self.batch_size, 1)
num_iterations = self.num_epochs * iterations_per_epoch
for t in range(int(num_iterations)):
self._step()
if self.verbose and self.print_every == 0:
print('Iteration {:d} / {:d}, loss: {:f}'.format(t+1, num_iterations, self.loss_history[-1]))
epoch_end = (t + 1) % iterations_per_epoch == 0
if epoch_end:
self.epoch += 1
for k in self.optim_configs:
self.optim_configs[k]['learning_rate'] *= self.lr_decay
first_it = (t == 0)
last_it = (t == num_iterations + 1)
if first_it or last_it or epoch_end:
train_acc = self.check_accuracy(self.X_train, self.y_train, num_samples=10)
val_acc = self.check_accuracy(self.X_val, self.y_val)
self.train_acc_history.append(train_acc)
self.val_acc_history.append(val_acc)
if self.verbose:
print('Epoch {:d} / {:d}, train_acc: {:f}, val_acc: {:f}'.format(self.epoch, self.num_epochs, train_acc, val_acc))
if val_acc > self.best_val_acc:
self.best_val_acc = val_acc
self.best_params = {}
for k,v in self.model.params.items():
self.best_params[k] = v.copy()
self.model.params = self.best_params
训练和测试程序
import layers
import numpy as np
import matplotlib.pyplot as plt
small_data = {
'X_train': np.array([[0, 0], [0, 1], [1, 1], [1, 0]]),
'y_train': np.array([0, 1, 0, 1]),
'X_val': np.array([[0, 0], [0, 1], [1, 1], [1, 0]]),
'y_val': np.array([0, 1, 0, 1]),
}
learning_rate = 0.2
reg = 0.0
model = layers.muliti_layer_net(hidden_dim=[2,2], input_dim=2, num_classes=2, reg=reg, dtype=np.float64)
solver = layers.Solver(model, small_data,
print_every=1, num_epochs=5000, batch_size=4,
update_rule='sgd_momentum',
optim_config={'learning_rate': learning_rate})
solver.train()
print(model.params)
best_model = model
# plt.plot(solver.loss_history, 'o')
# plt.title('Training loss history')
# plt.xlabel('Iteration')
# plt.ylabel('Training loss')
# plt.show()
# x_ = [x_1, x_2]
# x_ = np.array(x_)
# x_ = x_.T
# print(x_.shape)
# # print(x_[20])
# test_pred = np.argmax(best_model.loss(x_), axis=1)
# print(test_pred)
x_1 = np.arange(0, 1, 0.01)
x_2 = np.arange(0, 1, 0.01)
x_test = np.zeros((len(x_1)*len(x_2), 2))
print(x_test.shape)
index = 0
for i in range(len(x_1)):
for j in range(len(x_2)):
x_test[int(index), 0] = x_1[int(i)]
x_test[int(index), 1] = x_2[int(j)]
index += 1
print(x_test[0])
print(x_test[903])
print(x_test[5203])
test_pred = np.argmax(best_model.loss(x_test), axis=1)
print(test_pred)
from mpl_toolkits.mplot3d import Axes3D
x_1, x_2 = np.meshgrid(x_1, x_2)
figure = plt.figure()
ax = Axes3D(figure)
test_pred = test_pred.reshape(len(x_1), len(x_2))
ax.plot_surface(x_1, x_2, test_pred, rstride=1, cstride=1, cmap='rainbow')
plt.show()
一些补充
1.代码并不是总能得到得到百分之百的正确率
2.如果迭代次数过少,正确率也会比较低,epoch一般都要3000以上才能得到100准确率
3.一些参数对正确率也有一定影响
4.本实验对于了解基本神经网络有一定作用