最近在B站看沐神的动手学深度学习视频,记录一下学习过程
查看本文的jupyter notebook格式,更加清晰美观哦!
%matplotlib inline
from mxnet import gluon, nd, autograd, init
from mxnet.gluon import loss as gloss, nn, data as gdata
import d2lzh as d2l
n_train, n_test, num_inputs = 20, 100, 200
features = nd.random.normal(shape=(n_train+n_test, num_inputs))
true_w, true_b = nd.zeros(shape=(num_inputs, 1))*0.01, 0.05
labels = nd.dot(features, true_w)+true_b
labels += nd.random.normal(scale = 0.01, shape=labels.shape)
train_features, test_features = features[:n_train, :], features[n_train:, :]
train_labels, test_labels = labels[:n_train], labels[n_train:]
true_w.shape, nd.dot(features, true_w).shape
((200, 1), (120, 1))
def init_params():
w = nd.random.normal(scale=1, shape=(num_inputs, 1))
b = nd.zeros(shape=(1,))
w.attach_grad()
b.attach_grad()
return [w, b]
def l2_penalty(w):
return (w**2).sum()/2
batch_size, num_epochs, lr = 5, 100, 0.03
net, loss = d2l.linreg, d2l.squared_loss
train_iter = gdata.DataLoader(gdata.ArrayDataset(train_features, train_labels),
batch_size = batch_size, shuffle = True)
def fit_and_plot(lambd):
train_ls, test_ls = [], []
w, b = init_params()
w_norm = []
for _ in range(num_epochs):
for X, y in train_iter:
with autograd.record():
l = loss(net(X, w, b), y)+lambd*l2_penalty(w)
l.backward()
d2l.sgd([w, b], lr, batch_size)
train_ls.append(loss(net(train_features, w, b), train_labels).mean().asscalar())
test_ls.append(loss(net(test_features, w, b), test_labels).mean().asscalar())
w_norm.append(w.norm().asscalar())
d2l.semilogy(range(1, num_epochs+1), train_ls, 'epochs', 'loss',
range(1, num_epochs+1), test_ls, ['train', 'test'])
d2l.semilogy(range(1, num_epochs+1), w_norm, 'epochs', 'L2 norm of w')
print('L2 norm of w:', w.norm().asscalar())
fit_and_plot(lambd=0)
L2 norm of w: 13.683592
fit_and_plot(lambd=3)
L2 norm of w: 0.0048264284
def fit_and_plot_gluon(wd):
net = nn.Sequential()
net.add(nn.Dense(1))
net.initialize(init.Normal(sigma=1))
# 对权重参数衰减。权重名称一般是以weight结尾
trainer_w = gluon.Trainer(net.collect_params('.*weight'), 'sgd',
{'learning_rate':lr,'wd':wd})
# 不对偏差参数衰减。偏差名称一般是以bias结尾
trainer_b = gluon.Trainer(net.collect_params('.*bias'), 'sgd',
{'learning_rate':lr})
train_ls, test_ls = [], []
for _ in range(num_epochs):
for X, y in train_iter:
with autograd.record():
l = loss(net(X), y)
l.backward()
trainer_w.step(batch_size)
trainer_b.step(batch_size)
train_ls.append((loss(net(train_features), train_labels)).mean().asscalar())
test_ls.append((loss(net(test_features), test_labels)).mean().asscalar())
d2l.semilogy(range(1, num_epochs+1), train_ls, 'epochs', 'loss',
range(1, num_epochs+1), test_ls, ['train', 'test'])
print('L2 norm of w:', net[0].weight.data().norm().asscalar())
fit_and_plot_gluon(0)
L2 norm of w: 13.395039
fit_and_plot_gluon(3)
L2 norm of w: 0.0030889509