模型的核心部分。代码并不多,基本思路就是层层训练,前一层的输出作为下一层的输入。hidde layer和dA共享的是相同的网络结构。有些需要注意的地方已经在代码中加入注释了。另外,还发现了原来代码实现中的一个bug,已经更正。
代码如下:
void SdA::pretrain ( int *input, double lr, double corruption_level, int epochs ) { int *layer_input; int prev_layer_input_size; int *prev_layer_input; int *train_X = new int[n_ins]; for(int i=0; i<n_layers; i++) // layer-wise, i { for(int epoch=0; epoch<epochs; epoch++) // training epochs, epoch { for(int n=0; n<N; n++) // input x1...xN, iterate each sample, n { // initial input for(int m=0; m<n_ins; m++) // get the nth input sample train_X[m] = input[n * n_ins + m]; // set the layer input and train it // the code here is a little complicated: // it calcualtes the node value layer by layer, from bottom // input layer to the current calculated layer -- i. Then it // train the network from the i-1 layer to the i layer by the // denosing auto-encoder for(int l=0; l<=i; l++) // l { if(l == 0) // in case the first layer { layer_input = new int[n_ins]; for(int j=0; j<n_ins; j++) layer_input[j] = train_X[j]; } else // in case the rest layers { // set the value of previous layer from the last 'layer_input' // as the input for the current layer if(l == 1) prev_layer_input_size = n_ins; else prev_layer_input_size = hidden_layer_sizes[l-2]; prev_layer_input = new int[prev_layer_input_size]; for(int j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j]; delete[] layer_input; // calcualte the value of current layer from prev_layer_input // and put the value into the current layer_input layer_input = new int[hidden_layer_sizes[l-1]]; sigmoid_layers[l-1]->sample_h_given_v(prev_layer_input, layer_input); delete[] prev_layer_input; } } // for l // train the current layer as denosing auto-encoder dA_layers[i]->train(layer_input, lr, corruption_level); } // for N } // for epoches } // for n_layers delete[] train_X; delete[] layer_input; } void SdA::finetune( int *input, int *label, double lr, int epochs) { int *layer_input; int prev_layer_input_size; int *prev_layer_input; int *train_X = new int[n_ins]; int *train_Y = new int[n_outs]; for(int epoch=0; epoch<epochs; epoch++) { for(int n=0; n<N; n++) // input x1...xN { // initial input for(int m=0; m<n_ins; m++) train_X[m] = input[n * n_ins + m]; for(int m=0; m<n_outs; m++) train_Y[m] = label[n * n_outs + m]; // calculate the value of the last dA layer // most different from the code block in pretrain is : // here the network weights in hidden layers have been well trained, // which can be used directly to calculating the output layer by layer. // however, in the pretrain function, the weights have not been trained. // it should train the weights based on whichi the value of next layer // can be calculated. for(int i=0; i<n_layers; i++) { if(i == 0) { prev_layer_input = new int[n_ins]; for(int j=0; j<n_ins; j++) prev_layer_input[j] = train_X[j]; } else { prev_layer_input = new int[hidden_layer_sizes[i-1]]; for(int j=0; j<hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j]; delete[] layer_input; } layer_input = new int[hidden_layer_sizes[i]]; sigmoid_layers[i]->sample_h_given_v(prev_layer_input, layer_input); delete[] prev_layer_input; } // for n_layers // train the output (last) layer by logistic regression log_layer->train(layer_input, train_Y, lr); } // for N // lr *= 0.95; } // for epoch delete[] layer_input; delete[] train_X; delete[] train_Y; } void SdA::predict ( int *x, double *y ) { double *layer_input; int prev_layer_input_size; double *prev_layer_input; double linear_output; prev_layer_input = new double[n_ins]; for(int j=0; j<n_ins; j++) prev_layer_input[j] = x[j]; // layer activation: calculate the output value layer by layer for(int i=0; i<n_layers; i++) { layer_input = new double[sigmoid_layers[i]->n_out]; // linear_output = 0.0; // it is a bug, it should be in the 'for-loop' below!! for(int k=0; k<sigmoid_layers[i]->n_out; k++) { linear_output = 0.0; // here is the right place!! for(int j=0; j<sigmoid_layers[i]->n_in; j++) { linear_output += sigmoid_layers[i]->W[k][j] * prev_layer_input[j]; } linear_output += sigmoid_layers[i]->b[k]; layer_input[k] = sigmoid(linear_output); } delete[] prev_layer_input; if(i < n_layers-1) { prev_layer_input = new double[sigmoid_layers[i]->n_out]; for(int j=0; j<sigmoid_layers[i]->n_out; j++) prev_layer_input[j] = layer_input[j]; delete[] layer_input; } } // for n_layers // the logestic regresssion layer for(int i=0; i<log_layer->n_out; i++) { y[i] = 0; for(int j=0; j<log_layer->n_in; j++) { y[i] += log_layer->W[i][j] * layer_input[j]; } y[i] += log_layer->b[i]; } log_layer->softmax(y); delete[] layer_input; }