【deep learning学习笔记】注释yusugomori的SDA代码 -- Sda.cpp -- 模型训练与预测

模型的核心部分。代码并不多,基本思路就是层层训练,前一层的输出作为下一层的输入。hidde layer和dA共享的是相同的网络结构。有些需要注意的地方已经在代码中加入注释了。另外,还发现了原来代码实现中的一个bug,已经更正。

代码如下:

void SdA::pretrain (
			int *input, 
			double lr, 
			double corruption_level, 
			int epochs
				) 
{
  	int *layer_input;
  	int prev_layer_input_size;
  	int *prev_layer_input;

  	int *train_X = new int[n_ins];

  	for(int i=0; i<n_layers; i++) 	// layer-wise, i
	{  
    	for(int epoch=0; epoch<epochs; epoch++) 	// training epochs, epoch
		{  
      		for(int n=0; n<N; n++) 	// input x1...xN, iterate each sample, n
			{ 
        		// initial input
        		for(int m=0; m<n_ins; m++) 		// get the nth input sample
					train_X[m] = input[n * n_ins + m];

        		// set the layer input and train it
        		// the code here is a little complicated:
        		// it calcualtes the node value layer by layer, from bottom
        		// input layer to the current calculated layer -- i. Then it 
        		// train the network from the i-1 layer to the i layer by the 
        		// denosing auto-encoder
        		for(int l=0; l<=i; l++) // l 
				{

		          	if(l == 0) 	// in case the first layer
					{
		            	layer_input = new int[n_ins];
		            	for(int j=0; j<n_ins; j++) 
							layer_input[j] = train_X[j];
		          	} 
 					else 		// in case the rest layers
				 	{
				 		// set the value of previous layer from the last 'layer_input'
				 		// as the input for the current layer
		            	if(l == 1) 
							prev_layer_input_size = n_ins;
		            	else 
							prev_layer_input_size = hidden_layer_sizes[l-2];
		
		            	prev_layer_input = new int[prev_layer_input_size];
		            	for(int j=0; j<prev_layer_input_size; j++) 
							prev_layer_input[j] = layer_input[j];
		            	delete[] layer_input;
		
						// calcualte the value of current layer from prev_layer_input
						// and put the value into the current layer_input
		            	layer_input = new int[hidden_layer_sizes[l-1]];		
		            	sigmoid_layers[l-1]->sample_h_given_v(prev_layer_input, layer_input);
		            	
		            	delete[] prev_layer_input;
		          	}
        		} // for l

				// train the current layer as denosing auto-encoder
        		dA_layers[i]->train(layer_input, lr, corruption_level);
        		
      		}	// for N
    	}	// for epoches
  	} // for n_layers

  	delete[] train_X;
  	delete[] layer_input;
}

void SdA::finetune(
	int *input, 
	int *label, 
	double lr, 
	int epochs) 
{
  	int *layer_input;
  	int prev_layer_input_size;
  	int *prev_layer_input;

  	int *train_X = new int[n_ins];
  	int *train_Y = new int[n_outs];

  	for(int epoch=0; epoch<epochs; epoch++) 
  	{
    	for(int n=0; n<N; n++) // input x1...xN
		{ 
      		// initial input
      		for(int m=0; m<n_ins; m++)  
			  train_X[m] = input[n * n_ins + m];
      		for(int m=0; m<n_outs; m++) 
			  train_Y[m] = label[n * n_outs + m];

      		// calculate the value of the last dA layer
      		// most different from the code block in pretrain is :
      		// here the network weights in hidden layers have been well trained,
      		// which can be used directly to calculating the output layer by layer.
      		// however, in the pretrain function, the weights have not been trained.
      		// it should train the weights based on whichi the value of next layer
      		// can be calculated. 
      		for(int i=0; i<n_layers; i++) 
			{
        		if(i == 0) 
				{
          			prev_layer_input = new int[n_ins];
          			for(int j=0; j<n_ins; j++) 
					  prev_layer_input[j] = train_X[j];
        		} 
				else 
				{
          			prev_layer_input = new int[hidden_layer_sizes[i-1]];
          			for(int j=0; j<hidden_layer_sizes[i-1]; j++) 
					  prev_layer_input[j] = layer_input[j];
          			delete[] layer_input;
        		}

				layer_input = new int[hidden_layer_sizes[i]];
        		sigmoid_layers[i]->sample_h_given_v(prev_layer_input, layer_input);
        		delete[] prev_layer_input;
        		
      		} // for n_layers

			// train the output (last) layer by logistic regression
      		log_layer->train(layer_input, train_Y, lr);
      		
    	} // for N
    	// lr *= 0.95;
  	} // for epoch

  	delete[] layer_input;
  	delete[] train_X;
  	delete[] train_Y;
}

void SdA::predict (
		int *x, 
		double *y
			) 
{
  	double *layer_input;
  	int prev_layer_input_size;
  	double *prev_layer_input;

  	double linear_output;

  	prev_layer_input = new double[n_ins];
  	for(int j=0; j<n_ins; j++) 
	  prev_layer_input[j] = x[j];

  	// layer activation: calculate the output value layer by layer
  	for(int i=0; i<n_layers; i++) 
  	{
    	layer_input = new double[sigmoid_layers[i]->n_out];

    	// linear_output = 0.0;	// it is a bug, it should be in the 'for-loop' below!!
    	for(int k=0; k<sigmoid_layers[i]->n_out; k++) 
		{
			linear_output = 0.0;	// here is the right place!!
      		for(int j=0; j<sigmoid_layers[i]->n_in; j++) 
 			{
        		linear_output += 
					sigmoid_layers[i]->W[k][j] * prev_layer_input[j];
      		}
      		linear_output += sigmoid_layers[i]->b[k];
      		layer_input[k] = sigmoid(linear_output);
 		}
    	delete[] prev_layer_input;

    	if(i < n_layers-1) 
		{
      		prev_layer_input = new double[sigmoid_layers[i]->n_out];
      		for(int j=0; j<sigmoid_layers[i]->n_out; j++) 
			  prev_layer_input[j] = layer_input[j];
      		delete[] layer_input;
    	}
  	} // for n_layers
  
  	// the logestic regresssion layer
  	for(int i=0; i<log_layer->n_out; i++) 
  	{
    	y[i] = 0;
    	for(int j=0; j<log_layer->n_in; j++) 
		{
      		y[i] += log_layer->W[i][j] * layer_input[j];
    	}
    	y[i] += log_layer->b[i];
  	}
  
  	log_layer->softmax(y);

  	delete[] layer_input;
}

你可能感兴趣的:(【deep learning学习笔记】注释yusugomori的SDA代码 -- Sda.cpp -- 模型训练与预测)