Softmax 从零开始实现
导入必要的包
from mxnet import gluon
from mxnet import nd
from mxnet.gluon import data as gdata,loss as gloss
import d2lzh as d2l
from mxnet import autograd as ag
导入数据
# 下载训练集 和 下载测试集
# 并且读取小批量数据
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
# 查看数据导入的情况
print(len(train_iter),len(test_iter))
for x,y in train_iter:
print(x,y)
break
235 40
[[[[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
...
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]]]
[[[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
...
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]]]
[[[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
...
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]]]
...
[[[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
...
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]]]
[[[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
...
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]]]
[[[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
...
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]]]]
[6 9 7 2 1 7 6 6 9 0 6 3 1 4 3 4 4 2 6 3 6 9 0 2 1 2 2 7 7 1 8 0 1 7 6 1 4
9 2 6 3 7 9 0 5 9 7 0 8 8 1 8 6 1 6 9 4 7 7 4 1 4 4 5 7 8 7 7 5 6 4 2 9 0
0 6 0 5 7 8 7 8 9 3 3 7 1 0 9 6 5 4 9 4 4 9 4 2 4 7 7 4 5 9 6 8 7 5 1 4 4
3 5 3 5 0 7 1 0 5 6 1 6 5 4 9 4 7 7 3 8 7 7 7 0 5 4 2 3 2 2 0 9 0 3 8 0 6
4 4 4 5 8 9 8 7 5 6 0 6 5 6 8 2 6 9 9 5 2 0 9 4 3 4 8 0 5 5 8 2 4 1 8 8 9
7 9 1 7 2 8 7 8 6 4 7 7 3 0 8 0 9 0 0 5 9 0 8 2 8 6 0 9 2 7 5 7 9 7 5 4 0
3 8 7 5 4 9 1 2 7 8 1 7 9 8 8 8 0 0 0 9 6 6 7 8 1 4 1 7 6 1 1 8 6 3]
初始化模型参数
num_inputs = 784
num_outputs = 10
w = nd.random.normal(scale=0.01,shape=(num_inputs,num_outputs))
b = nd.zeros(shape=num_outputs)
#附上梯度
w.attach_grad()
b.attach_grad()
定义模型
#实现softmax运算
def softmax(X):
X_exp = X.exp()
partition = X_exp.sum(axis=1,keepdims=True)
return X_exp/partition
#定义神经网络计算
def net(X):
return softmax(nd.dot(X.reshape(-1,num_inputs),w)+b)
定义损失函数
def cross_entropy(y_hat,y):
return -nd.pick(y_hat,y).log()
确定精度
def accuracy(y_hat,y):
return (y_hat.argmax(axis=1) == y.astype('float32')).mean().asscalar()
#准确度
def evaluate_accuracy(data_iter,net):
acc_sum,n = 0.0,0
for X,y in data_iter:
y_hat = net(X)
acc_sum += (y_hat.argmax(axis=1) == y.astype('float32')).mean().asscalar()
n += y.size
return acc_sum/n
优化函数
def sgd(params,lr,batch_size):
for param in params:
param[:] = param - lr*param.grad/batch_size
训练模型
num_epochs,lr = 5,0.1
def train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,params=None,lr=None,trainer=None):
for epoch in range(1,num_epochs+1):
train_l_sum,train_acc_sum,n = 0.0,0.0,0
for X,y in train_iter:
with ag.record():
y_hat = net(X)
l = loss(y_hat,y).sum()
l.backward()
if trainer is None:
sgd(params,lr,batch_size)
else:
trainer.step(batch_size)
y = y.astype('float32')
train_l_sum += y.sum().asscalar()
train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
n += y.size
test_acc = evaluate_accuracy(test_iter,net)
print("epoch %d ,loss %f ,train_acc %f ,test_acc %f" % (epoch,train_l_sum/n,train_acc_sum/n,test_acc))
train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size,[w, b], lr)
epoch 1 ,loss 4.500000 ,train_acc 0.747417 ,test_acc 0.003201
epoch 2 ,loss 4.500000 ,train_acc 0.810550 ,test_acc 0.003296
epoch 3 ,loss 4.500000 ,train_acc 0.823350 ,test_acc 0.003323
epoch 4 ,loss 4.500000 ,train_acc 0.829450 ,test_acc 0.003361
epoch 5 ,loss 4.500000 ,train_acc 0.834900 ,test_acc 0.003365
展示图片
for X, y in test_iter:
break
true_labels = d2l.get_fashion_mnist_labels(y.asnumpy())
pred_labels = d2l.get_fashion_mnist_labels(net(X).argmax(axis=1).asnumpy())
titles = [true + '\n' + pred for true, pred in zip(true_labels, pred_labels)]
d2l.show_fashion_mnist(X[0:9], titles[0:9])