Deep Belief Network

参考资料:

http://deeplearning.net/tutorial/DBN.html

Part 1

class DBN(object):

    """Deep Belief Network

    在几个RBM上相互得到一个DBN堆叠.。

    RBM隐藏层中的`i`层 会变层`i+1`层的输入。

    得到的第一层RBM作为输入,而最后一层为输出。

    至于分类器, the DBN is treated as a MLP  by adding a logistic

    regression layer on top.

    """

    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,

                hidden_layers_sizes=[500, 500], n_outs=10):

        """这个类支持可变数量的层。

        :type numpy_rng: numpy.random.RandomState

        :param numpy_rng: 用来绘制初始权重的随机数值生成器

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams

        :param theano_rng: Theano 随机生成器;如果没有给出,则 根据从“rng”中抽取的种子生成一个

        :type n_ins: int 浮点

        :param n_ins:  DBN输入的维度

        :type hidden_layers_sizes: list of ints

        :param hidden_layers_sizes: intermediate layers size, 必须包括至少一个值

        :type n_outs: int

        :param n_outs: DBN输出的维度

        """

        self.sigmoid_layers = []

        self.rbm_layers = []

        self.params = []

        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:

            theano_rng = MRG_RandomStreams(numpy_rng.randint(2 ** 30))

        # 为数据分配符号变量

        # 数据是 rasterized images

        self.x = T.matrix('x')

        #标签是 1D vector of [int] labels

        self.y = T.ivector('y')



Part 2

for i in range(self.n_layers):

            # 构建 sigmoidal layer

            # 输入的大小就是隐藏层的数量

            # units of the layer below or the input size if we are on

            # 第一层

            if i == 0:

                input_size = n_ins

            else:

                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the

            # hidden layer below or the input of the DBN if you are on

            # 第一层

            if i == 0:

                layer_input = self.x

            else:

                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,

                                        input=layer_input,

                                        n_in=input_size,

                                        n_out=hidden_layers_sizes[i],

                                        activation=T.nnet.sigmoid)

            # 加层

            self.sigmoid_layers.append(sigmoid_layer)

            # ...  要搞清楚sigmoid层的参数是DBN的参数

            # RBM的可视的偏差是那些 RBMs的参数

            self.params.extend(sigmoid_layer.params)

            # 构建RBM 共享这一层的权重

            rbm_layer = RBM(numpy_rng=numpy_rng,

                            theano_rng=theano_rng,

                            input=layer_input,

                            n_visible=input_size,

                            n_hidden=hidden_layers_sizes[i],

                            W=sigmoid_layer.W,

                            hbias=sigmoid_layer.b)

            self.rbm_layers.append(rbm_layer)



Part 3

self.logLayer = LogisticRegression(

            input=self.sigmoid_layers[-1].output,

            n_in=hidden_layers_sizes[-1],

            n_out=n_outs)

        self.params.extend(self.logLayer.params)

        # 计算第二阶段训练的损失,定义为逻辑回归层--输出层的负对数似然

        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        # compute the gradients with respect to the model parameters

        # symbolic variable that points to the number of errors made on the

        # minibatch given by self.x and self.y

        self.errors = self.logLayer.errors(self.y)



Part 4

def pretraining_functions(self, train_set_x, batch_size, k):

        '''Generates a list of functions, for performing one step of gradient descent at a given layer. The function will require as input the minibatch index, and to train an RBM you just need to iterate, calling the corresponding function on all minibatch indexes.

        :type train_set_x: theano.tensor.TensorType

        :param train_set_x: Shared var. that contains all datapoints used for training the RBM

        :type batch_size: int

        :param batch_size: size of a [mini]batch

        :param k: number of Gibbs steps to do in CD-k / PCD-k

        '''

        # index to a [mini]batch

        index = T.lscalar('index')  # index to a minibatch



Part 5

learning_rate = T.scalar('lr') # learning rate to use

        # begining of a batch, given `index`

        batch_begin = index * batch_size

        # ending of a batch given `index`

        batch_end = batch_begin + batch_size

        pretrain_fns = []

        for rbm in self.rbm_layers:

            # get the cost and the updates list

            # using CD-k here (persisent=None) for training each RBM.

            # TODO: change cost function to reconstruction error

            cost, updates = rbm.get_cost_updates(learning_rate,

                                                persistent=None, k=k)

            # compile the theano function

            fn = theano.function(

                inputs=[index, theano.In(learning_rate, value=0.1)],

                outputs=cost,

                updates=updates,

                givens={

                    self.x: train_set_x[batch_begin:batch_end]

                }

            )

            # append `fn` to the list of functions

            pretrain_fns.append(fn)

        return pretrain_fns



Part 6 

def build_finetune_functions(self, datasets, batch_size, learning_rate):

        '''Generates a function `train` that implements one step of

        finetuning, a function `validate` that computes the error on a

        batch from the validation set, and a function `test` that

        computes the error on a batch from the testing set

        :type datasets: list of pairs of theano.tensor.TensorType

        :param datasets: It is a list that contain all the datasets;

                        the has to contain three pairs, `train`,

                        `valid`, `test` in this order, where each pair

                        is formed of two Theano variables, one for the

                        datapoints, the other for the labels

        :type batch_size: int

        :param batch_size: size of a minibatch

        :type learning_rate: float

        :param learning_rate: learning rate used during finetune stage

        '''

        (train_set_x, train_set_y) = datasets[0]

        (valid_set_x, valid_set_y) = datasets[1]

        (test_set_x, test_set_y) = datasets[2]

        # compute number of minibatches for training, validation and testing

        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]

        n_valid_batches //= batch_size

        n_test_batches = test_set_x.get_value(borrow=True).shape[0]

        n_test_batches //= batch_size

        index = T.lscalar('index')  # index to a [mini]batch

        # compute the gradients with respect to the model parameters

        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates

        updates = []

        for param, gparam in zip(self.params, gparams):

            updates.append((param, param - gparam * learning_rate))

        train_fn = theano.function(

            inputs=[index],

            outputs=self.finetune_cost,

            updates=updates,

            givens={

                self.x: train_set_x[

                    index * batch_size: (index + 1) * batch_size

                ],

                self.y: train_set_y[

                    index * batch_size: (index + 1) * batch_size

                ]

            }

        )

        test_score_i = theano.function(

            [index],

            self.errors,

            givens={

                self.x: test_set_x[

                    index * batch_size: (index + 1) * batch_size

                ],

                self.y: test_set_y[

                    index * batch_size: (index + 1) * batch_size

                ]

            }

        )

        valid_score_i = theano.function(

            [index],

            self.errors,

            givens={

                self.x: valid_set_x[

                    index * batch_size: (index + 1) * batch_size

                ],

                self.y: valid_set_y[

                    index * batch_size: (index + 1) * batch_size

                ]

            }

        )

        # Create a function that scans the entire validation set

        def valid_score():

            return [valid_score_i(i) for i in range(n_valid_batches)]

        # Create a function that scans the entire test set

        def test_score():

            return [test_score_i(i) for i in range(n_test_batches)]

        return train_fn, valid_score, test_score



Part 7

numpy_rng = numpy.random.RandomState(123)

    print('... building the model')

    # construct the Deep Belief Network

    dbn = DBN(numpy_rng=numpy_rng, n_ins=28 * 28,

              hidden_layers_sizes=[1000, 1000, 1000],

              n_outs=10)



Part 8

#########################

    # PRETRAINING THE MODEL #

    #########################

    print('... getting the pretraining functions')

    pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x,

                                                batch_size=batch_size,

                                                k=k)

    print('... pre-training the model')

    start_time = timeit.default_timer()

    # Pre-train layer-wise

    for i in range(dbn.n_layers):

        # go through pretraining epochs

        for epoch in range(pretraining_epochs):

            # go through the training set

            c = []

            for batch_index in range(n_train_batches):

                c.append(pretraining_fns[i](index=batch_index,

                                            lr=pretrain_lr))

            print('Pre-training layer %i, epoch %d, cost ' % (i, epoch), end=' ')

            print(numpy.mean(c, dtype='float64'))

    end_time = timeit.default_timer()


With the default parameters, the code runs for 100 pre-training epochs with mini-batches of size 10. This corresponds to performing 500,000 unsupervised parameter updates. We use an unsupervised learning rate of 0.01, with a supervised learning rate of 0.1. The DBN itself consists of three hidden layers with 1000 units per layer. With early-stopping, this configuration achieved a minimal validation error of 1.27 with corresponding test error of 1.34 after 46 supervised epochs.

On an Intel(R) Xeon(R) CPU X5560 running at 2.80GHz, using a multi-threaded MKL library (running on 4 cores), pretraining took 615 minutes with an average of 2.05 mins/(layer * epoch). Fine-tuning took only 101 minutes or approximately 2.20 mins/epoch.

Hyper-parameters were selected by optimizing on the validation error. We tested unsupervised learning rates in {10 -1,10-5}

 and supervised learning rates in {10-1,10-4}

. We did not use any form of regularization besides early-stopping, nor did we optimize over the number of pretraining updates.

你可能感兴趣的:(Deep Belief Network)