正则化——gluon

from mxnet import ndarray as nd
from mxnet import autograd
from mxnet import gluon
import mxnet as mx

import random

num_train = 20
num_test = 100
num_inputs = 200

true_w = nd.ones((num_inputs, 1)) * 0.01
true_b = 0.05
#生成训练数据和测试数据集
X = nd.random.normal(shape=(num_train + num_test, num_inputs))
y = nd.dot(X, true_w) + true_b
y += .01 * nd.random.normal(shape=y.shape)

X_train, X_test = X[:num_train, :], X[num_train:, :]
y_train, y_test = y[:num_train], y[num_train:]

#迭代器---定义一个函数它每次返回batch_size个随机的样本和对应的目标
batch_size = 1
def data_iter(num_examples):
    idx = list(range(num_examples))
    random.shuffle(idx)
    for i in range(0, num_examples, batch_size):
        j = nd.array(idx[i:min(i+batch_size,num_examples)])
        yield X.take(j), y.take(j)
D:\Users\Administrator\Anaconda3\lib\site-packages\h5py\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters

#初始化模型参数
def init_params():
    w = nd.random_normal(scale=1, shape=(num_inputs, 1))
    b = nd.zeros(shape=(1,))
    params = [w, b]
    for param in params:
        param.attach_grad()
    return params

#L2范数正则化
def L2_penalty(w, b):
    return ((w**2).sum() + b**2) / 2

#定义训练和测试
%matplotlib inline
import matplotlib as mpl
mpl.rcParams['figure.dpi']= 120
import matplotlib.pyplot as plt
import numpy as np

def net(X, w, b):
    return nd.dot(X, w) + b

def square_loss(yhat, y):
    return (yhat - y.reshape(yhat.shape)) ** 2 / 2

def sgd(params, lr, batch_size):
    for param in params:
        param[:] = param - lr * param.grad / batch_size

def test(net, params, X, y):
    return square_loss(net(X, *params), y).mean().asscalar()
    #return np.mean(square_loss(net(X, *params), y).asnumpy())

def train(lambd):
    epochs = 10
    learning_rate = 0.005
    w, b = params = init_params()
    train_loss = []
    test_loss = []
    for e in range(epochs):
        for data, label in data_iter(num_train):
            with autograd.record():
                output = net(data, *params)
                loss = square_loss(
                    output, label) + lambd * L2_penalty(*params)
            loss.backward()
            sgd(params, learning_rate, batch_size)
        train_loss.append(test(net, params, X_train, y_train))
        test_loss.append(test(net, params, X_test, y_test))
    plt.plot(train_loss)
    plt.plot(test_loss)
    plt.legend(['train', 'test'])
    plt.show()
    return 'learned w[:10]:', w[:10].T, 'learned b:', b

train(0)
正则化——gluon_第1张图片
('learned w[:10]:',
[[ 0.30372193 -0.08122482 0.6469477 -1.5169737 0.16486691 0.42008066
    0.4117703   0.8333592  -0.66504014  3.563324  ]]
 , 'learned b:', 
 [0.12570551]
 )

train(5)
正则化——gluon_第2张图片
('learned w[:10]:',
 [[ 0.01271132  0.00323896 -0.01058104  0.00074889 -0.00690441  0.00848945
    0.01372818 -0.00713685  0.00431851 -0.00840096]]
 , 'learned b:', 
 [0.00537223]
 )

#gluon 
#高维线性回归数据集
from mxnet import ndarray as nd
from mxnet import autograd
from mxnet import gluon
import mxnet as mx

num_train = 20
num_test = 100
num_inputs = 200

true_w = nd.ones((num_inputs, 1)) * 0.01
true_b = 0.05

X = nd.random.normal(shape=(num_train + num_test, num_inputs))
y = nd.dot(X, true_w) + true_b
y += .01 * nd.random.normal(shape=y.shape)

X_train, X_test = X[:num_train, :], X[num_train:, :]
y_train, y_test = y[:num_train], y[num_train:]

#定义训练和测试
%matplotlib inline
import matplotlib as mpl
mpl.rcParams['figure.dpi']= 120
import matplotlib.pyplot as plt
import numpy as np

batch_size = 1
dataset_train = gluon.data.ArrayDataset(X_train, y_train)
data_iter_train = gluon.data.DataLoader(dataset_train, batch_size, shuffle=True)

square_loss = gluon.loss.L2Loss()

def test(net, X, y):
    return square_loss(net(X), y).mean().asscalar()

def train(weight_decay):
    epochs = 10
    learning_rate = 0.005
    net = gluon.nn.Sequential()
    with net.name_scope():
        net.add(gluon.nn.Dense(1))
    net.collect_params().initialize(mx.init.Normal(sigma=1))

    # 注意到这里 'wd'
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {
        'learning_rate': learning_rate, 'wd': weight_decay})

    train_loss = []
    test_loss = []
    for e in range(epochs):
        for data, label in data_iter_train:
            with autograd.record():
                output = net(data)
                loss = square_loss(output, label)
            loss.backward()
            trainer.step(batch_size)
        train_loss.append(test(net, X_train, y_train))
        test_loss.append(test(net, X_test, y_test))
    plt.plot(train_loss)
    plt.plot(test_loss)
    plt.legend(['train','test'])
    plt.show()

    return ('learned w[:10]:', net[0].weight.data()[:,:10],
            'learned b:', net[0].bias.data())

train(0)
正则化——gluon_第3张图片
('learned w[:10]:',
 [[-1.2421037   0.08937309  0.48468193 -2.474812    1.6000378  -0.7002478
    1.003187   -1.7175496   0.46522993  0.9414017 ]]
 , 'learned b:', 
 [-0.13920185]
 )

train(5)
正则化——gluon_第4张图片
('learned w[:10]:',
 [[ 0.0030201  -0.00451496  0.00090438 -0.001776   -0.00489771  0.00282109
   -0.00575881 -0.00393021  0.00229077  0.01102347]]
 , 'learned b:', 
 [0.0032311]
 )

    高维线性回归

我们使用高维线性回归为例来引入一个过拟合问题。

具体来说我们使用如下的线性函数来生成每一个数据样本

y=0.05+i=1p0.01xi+noise

这里噪音服从均值0和标准差为0.01的正态分布。

需要注意的是,我们用以上相同的数据生成函数来生成训练数据集和测试数据集。为了观察过拟合,

我们特意把训练数据样本数设低,例如n=20, 同时把维度升高,例如p=200


L2
范数正则化

这里我们引入L2范数正则化。不同于在训练时仅仅最小化损失函数(Loss),我们在训练时其实在最小化

loss+λpparamsp22

直观上,L2范数正则化试图惩罚较大绝对值的参数值。注意有些时候大家对偏移加罚,有时候不加罚。通常结果上两者区别不大。

从第一张图可见即便训练误差可以达到0.000000,但是测试数据集上的误差很高。这是典型的过拟合现象。

第二张图是采用了正则化之后,我们发现训练误差虽然有所提高,但测试数据集上的误差有所下降。过拟合现象得到缓解。但打印出的学到的参数依然不是很理想,这主要是因为我们训练数据的样本相对维度来说太少。

结论:我们可以使用正则化来应对过拟合问题。

参考文档:

http://zh.gluon.ai/chapter_supervised-learning/index.html



你可能感兴趣的:(python,machine,learning,mxnet,gluon)