RBM C++代码理解
代码链接:https://github.com/yusugomori/DeepLearning
class RBM {
public:
int N;
int n_visible;
int n_hidden;
double **W;
double *hbias;
double *vbias;
RBM(int, int, int, double**, double*, double*);
~RBM();
void contrastive_divergence(int*, double, int);
void sample_h_given_v(int*, double*, int*);
void sample_v_given_h(int*, double*, int*);
double propup(int*, double*, double);
double propdown(int*, int, double);
void gibbs_hvh(int*, double*, int*, double*, int*);
void reconstruct(int*, double*);
};
rbm类定义,结构很清晰。
#include <iostream>
#include <math.h>
#include "RBM.h"
using namespace std;
double uniform(double min, double max) { //在max与min之间随机一个数
return rand() / (RAND_MAX + 1.0) * (max - min) + min;
}
int binomial(int n, double p) { //二值化
if(p < 0 || p > 1) return 0;
int c = 0;
double r;
for(int i=0; i<n; i++) {
r = rand() / (RAND_MAX + 1.0);
if (r < p) c++;
}
return c;
}
double sigmoid(double x) {
return 1.0 / (1.0 + exp(-x));
}
RBM::RBM(int size, int n_v, int n_h, double **w, double *hb, double *vb) { //初始化RBM:W,hbias,vbias
N = size;
n_visible = n_v;
n_hidden = n_h;
if(w == NULL) {
W = new double*[n_hidden];
for(int i=0; i<n_hidden; i++) W[i] = new double[n_visible];
double a = 1.0 / n_visible;
for(int i=0; i<n_hidden; i++) {
for(int j=0; j<n_visible; j++) {
W[i][j] = uniform(-a, a);
}
}
} else {
W = w;
}
if(hb == NULL) {
hbias = new double[n_hidden];
for(int i=0; i<n_hidden; i++) hbias[i] = 0;
} else {
hbias = hb;
}
if(vb == NULL) {
vbias = new double[n_visible];
for(int i=0; i<n_visible; i++) vbias[i] = 0;
} else {
vbias = vb;
}
}
RBM::~RBM() { //析构函数
for(int i=0; i<n_hidden; i++) delete[] W[i];
delete[] W;
delete[] hbias;
delete[] vbias;
}
void RBM::contrastive_divergence(int *input, double lr, int k) { //cd-k input为输入数据,lr为学习率,
double *ph_mean = new double[n_hidden]; //通过计算得到的h0隐含节点的输入值
int *ph_sample = new int[n_hidden]; //二值化后得到的h0
double *nv_means = new double[n_visible]; //通过计算得到的v1重构节点的输入值
int *nv_samples = new int[n_visible]; //二值化后得到的v1
double *nh_means = new double[n_hidden]; //通过计算得到的h1重构隐含节点的输入值
int *nh_samples = new int[n_hidden]; //二值化后得到的h0
/* CD-k */
sample_h_given_v(input, ph_mean, ph_sample); //首先计算h0
for(int step=0; step<k; step++) {
if(step == 0) {
gibbs_hvh(ph_sample, nv_means, nv_samples, nh_means, nh_samples); //一般k等于1。重构v1和h1
} else {
gibbs_hvh(nh_samples, nv_means, nv_samples, nh_means, nh_samples);
}
}
for(int i=0; i<n_hidden; i++) { //更新W,hbias,vbias
for(int j=0; j<n_visible; j++) {
// W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / N; //△Wij=lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / N
}
hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N; //△hbias=lr * (ph_sample[i] - nh_means[i]) / N;
}
for(int i=0; i<n_visible; i++) {
vbias[i] += lr * (input[i] - nv_samples[i]) / N; //△vbias=lr * (input[i] - nv_samples[i]) / N,和hitton的更新不太一样。
}
delete[] ph_mean;
delete[] ph_sample;
delete[] nv_means;
delete[] nv_samples;
delete[] nh_means;
delete[] nh_samples;
}
void RBM::sample_h_given_v(int *v0_sample, double *mean, int *sample) { //已知v采样h
for(int i=0; i<n_hidden; i++) {
mean[i] = propup(v0_sample, W[i], hbias[i]);
sample[i] = binomial(1, mean[i]);
}
}
void RBM::sample_v_given_h(int *h0_sample, double *mean, int *sample) { //已知h采样v
for(int i=0; i<n_visible; i++) {
mean[i] = propdown(h0_sample, i, vbias[i]);
sample[i] = binomial(1, mean[i]);
}
}
double RBM::propup(int *v, double *w, double b) { //propup传入的是要求的隐层节点对应那一行的权值W[i]
double pre_sigmoid_activation = 0.0;
for(int j=0; j<n_visible; j++) {
pre_sigmoid_activation += w[j] * v[j];
}
pre_sigmoid_activation += b;
return sigmoid(pre_sigmoid_activation);
}
double RBM::propdown(int *h, int i, double b) { //propdown传入的是要求的重构可见层节点号i
double pre_sigmoid_activation = 0.0;
for(int j=0; j<n_hidden; j++) {
pre_sigmoid_activation += W[j][i] * h[j];
}
pre_sigmoid_activation += b;
return sigmoid(pre_sigmoid_activation);
}
void RBM::gibbs_hvh(int *h0_sample, double *nv_means, int *nv_samples, \
double *nh_means, int *nh_samples) {
sample_v_given_h(h0_sample, nv_means, nv_samples);
sample_h_given_v(nv_samples, nh_means, nh_samples);
}
void RBM::reconstruct(int *v, double *reconstructed_v) { //重构,propup一次,propdown一次得到重构值。
double *h = new double[n_hidden];
double pre_sigmoid_activation;
for(int i=0; i<n_hidden; i++) {
h[i] = propup(v, W[i], hbias[i]);
}
for(int i=0; i<n_visible; i++) {
pre_sigmoid_activation = 0.0;
for(int j=0; j<n_hidden; j++) {
pre_sigmoid_activation += W[j][i] * h[j];
}
pre_sigmoid_activation += vbias[i];
reconstructed_v[i] = sigmoid(pre_sigmoid_activation);
}
delete[] h;
}
void test_rbm() {
srand(0);
double learning_rate = 0.1;
int training_epochs = 1000;
int k = 1;
int train_N = 6;
int test_N = 2;
int n_visible = 6;
int n_hidden = 3;
// training data
int train_X[6][6] = {
{1, 1, 1, 0, 0, 0},
{1, 0, 1, 0, 0, 0},
{1, 1, 1, 0, 0, 0},
{0, 0, 1, 1, 1, 0},
{0, 0, 1, 0, 1, 0},
{0, 0, 1, 1, 1, 0}
};
// construct RBM
RBM rbm(train_N, n_visible, n_hidden, NULL, NULL, NULL);
// train
for(int epoch=0; epoch<training_epochs; epoch++) {
for(int i=0; i<train_N; i++) {
rbm.contrastive_divergence(train_X[i], learning_rate, k);
}
}
// test data
int test_X[2][6] = {
{1, 1, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 0}
};
double reconstructed_X[2][6];
// test
for(int i=0; i<test_N; i++) {
rbm.reconstruct(test_X[i], reconstructed_X[i]);
for(int j=0; j<n_visible; j++) {
printf("%.5f ", reconstructed_X[i][j]);
}
cout << endl;
}
}
int main() {
test_rbm();
return 0;
}
运行结果即重构结果是: