深度学习C++代码 (位于 Github)
深度学习C++代码配套教程(1. 总述)
深度学习C++代码配套教程(2. 基础数据操作)
深度学习C++代码配套教程(3. 数据文件读取)
深度学习C++代码配套教程(4. ANN 经典神经网络)
深度学习C++代码配套教程(5. CNN 卷积神经网络)
这里是CNN 的 Java 代码 , 我照着翻译成 C++. 效果确实不错, mninst 手写数字识别率达到 97.5%. 与 Java 代码不同, 我使用原数据的训练集作为所有数据, 其中随机选择 80% 训练, 其余 20% 测试.
CNN 是 ANN 的一种扩展, 网络结构差不多. 区别在于:
这个层比较复杂, 因为有好几种情况.
好吧, 成员变量也够多的.
//Layer type, 4 types.
//INPUT_LAYER, CONVOLUTION_LAYER, SAMPLING_LAYER, OUTPUT_LAYER
int layerType;
//The number of classes, not the index of the class (label) attribute.
//仅对输出层有用.
int numClasses;
//The batch size.
int batchSize;
//For batch processing. 当前数据处于当前批的位置.
int recordInBatch = 0;
//The number of output maps.
//本层输出节点数, 本层的输入节点数即上一层的输出节点数.
int numOutMaps;
//The size of output maps.
MfSize* mapSize;
//The kernel size.
MfSize* kernelSize;
//The scale size. 仅对 SAMPLING_LAYER 有效.
MfSize* scaleSize;
//Kernel. Dimensions: [front map][out map][width][height].
Mf4DTensor* kernel;
//The current kernel.
MfDoubleMatrix* currentKernel;
//Delta kernel.
MfDoubleMatrix* deltaKernel;
//Single delta kernel.
MfDoubleMatrix* singleDeltaKernel;
//The current rotate 180 kernel. Avoid local variables.
MfDoubleMatrix* currentRot180Kernel;
//Bias. The length is outMapNum.
MfDoubleMatrix* bias;
//Out maps. Dimensions: [batchSize][numOutMaps][mapSize.width][mapSize.height].
//The first dimension is due to the parallel computing and parameter updating.
Mf4DTensor* outMaps;
//Current out map.
MfDoubleMatrix* currentOutMap;
//A single output, space allocated once and used many times to avoid allocating for temporary variables.
MfDoubleMatrix* singleOutMap;
//Errors. Dimensions: [batchSize][numOutMaps][mapSize.width][mapSize.height].
Mf4DTensor* errors;
//Current errors, for one map, one instance in a batch.
MfDoubleMatrix* currentErrors;
//Current single errors, for accumulation.
MfDoubleMatrix* currentSingleErrors;
//layers[i - 1].
MfCnnLayer* lastLayer;
//layers[i + 1].
MfCnnLayer* nextLayer;
//The activator of this layer.
Activator* layerActivator;
说明如下:
成员函数罗列如下:
//The default constructor. 没啥用.
MfCnnLayer();
//The second constructor.
MfCnnLayer(int, int, int, MfSize*);
//The destructor
virtual ~MfCnnLayer();
//Initialize the kernel.
void initKernel(int);
//Initialize the bias.
void initBias();
//Initialize the errors.
void initErrors();
//Initialize the output maps.
void initOutMaps();
//Prepare for new batch.
void prepareForNewBatch();
//Prepare for new record.
void prepareForNewRecord();
//Getter.
int getNumClasses();
//Getter.
int getLayerType();
//Getter.
int getNumOutMaps();
//Setter.
void setNumOutMaps(int);
//Getter.
MfSize* getMapSize();
//Setter.
void setMapSize(MfSize* paraSize);
//Getter.
MfSize* getKernelSize();
//Setter.
void setKernelSize(MfSize* paraSize);
//Getter.
MfSize* getScaleSize();
//Setter.
void setScaleSize(MfSize* paraSize);
//Getter.
MfDoubleMatrix* getKernelAt(int paraFrontMap, int paraOutMap);
//Setter.
void setKernelAt(int paraFrontMap, int paraOutMap, MfDoubleMatrix* paraKernel);
//Getter.
MfDoubleMatrix* getRot180KernelAt(int paraFrontMap, int paraOutMap);
//Getter.
double getBiasAt(int paraMapNo);
//Setter.
void setBiasAt(int paraMapNo, double paraValue);
//Getter.
Mf4DTensor* getOutMaps();
//Set the out map value.
void setOutMapValue(int paraMapNo, int paraX, int paraY, double paraValue);
//Set the map value.
void setOutMapValue(int paraMapNo, MfDoubleMatrix* paraMatrix);
//Getter.
MfDoubleMatrix* getOutMapAt(int paraIndex);
//Getter.
MfDoubleMatrix* getOutMapAt(int paraRecordId, int paraOutMapNo);
//Getter.
Mf4DTensor* getErrors();
//Getter.
MfDoubleMatrix* getErrorsAt(int paraMapNo);
//Setter.
void setErrorsAt(int, MfDoubleMatrix*);
//Getter.
MfDoubleMatrix* getErrorsAt(int paraRecordId, int paraMapNo);
//Setter.
void setErrorAt(int, int, int, double);
//Setup.
void setup();
//Setter.
void setLastLayer(MfCnnLayer* paraLayer);
//Setter.
void setNextLayer(MfCnnLayer* paraLayer);
//Handle the input layer.
void setInputLayerOutput(MfDoubleMatrix* paraData);
//Handle the convolution layer.
void setConvolutionOutput();
//Handle the sampling layer.
void setSamplingOutput();
//Get the prediction for the current instance.
int getCurrentPrediction();
//Forward an instance, the parameters may not be useful.
void forward(MfDoubleMatrix* paraData);
//Set the errors of the convolution layer.
void setConvolutionLayerErrors();
//Set the errors of the sampling layer.
void setSamplingLayerErrors();
//Set the error of the output layer.
void setOutputLayerErrors(int paraLabel);
//Back propagation, the parameters may not be useful.
void backPropagation(int paraLabel);
//Update kernels.
void updateKernels();
//Update bias.
void updateBias();
//Set the layer activator.
void setLayerActivator(char);
//Getter.
MfDoubleMatrix* getCurrentOutMap();
//Unit test.
void unitTest();
多数函数比较简单. 以下分析几个关键函数.
MfCnnLayer::MfCnnLayer(int paraLayerType, int paraBatchSize, int paraNum, MfSize* paraSize)
{
//Accept parameter
layerType = paraLayerType;
batchSize = paraBatchSize;
switch (layerType)
{
case INPUT_LAYER:
numOutMaps = 1;
mapSize->cloneToMe(paraSize);
break;
case CONVOLUTION_LAYER:
numOutMaps = paraNum;
kernelSize->cloneToMe(paraSize);
break;
case SAMPLING_LAYER:
scaleSize->cloneToMe(paraSize);
break;
case OUTPUT_LAYER:
numClasses = paraNum;
mapSize->setValues(1, 1);
numOutMaps = numClasses;
break;
}// Of switch
}//Of the second constructor
该函数根据层类型进行参数设置.
仅有构造函数是不够的, 还需要进一步对成员变量进行初始化.
void MfCnnLayer::setup()
{
int tempNumFrontMaps = 0;
if (lastLayer != nullptr)
{
tempNumFrontMaps = lastLayer->getNumOutMaps();
}//Of if
switch (layerType)
{
case INPUT_LAYER:
initOutMaps();
break;
case CONVOLUTION_LAYER:
getMapSize()->subtractToMe(lastLayer->getMapSize(), kernelSize, 1);
initKernel(tempNumFrontMaps);
initBias();
initErrors();
initOutMaps();
break;
case SAMPLING_LAYER:
setNumOutMaps(tempNumFrontMaps);
getMapSize()->divideToMe(lastLayer->getMapSize(), getScaleSize());
initErrors();
initOutMaps();
break;
case OUTPUT_LAYER:
kernelSize->cloneToMe(lastLayer->getMapSize());
initKernel(tempNumFrontMaps);
initBias();
initErrors();
initOutMaps();
break;
}//Of switch
}//Of setup
说明如下:
其中:
kernel->fill(-0.005, 0.095);
表示用区间 (-0.005, 0.095) 的随机数进行填充.bias->fill(0);
与 ANN 中相同, forward 负责前向数据计算.
void MfCnnLayer::forward(MfDoubleMatrix* paraData)
{
switch (layerType)
{
case INPUT_LAYER:
setInputLayerOutput(paraData);
break;
case CONVOLUTION_LAYER:
setConvolutionOutput();
break;
case SAMPLING_LAYER:
setSamplingOutput();
break;
case OUTPUT_LAYER:
setConvolutionOutput();
break;
default:
printf("Unsupported layer type.\r\n");
throw "Unsupported layer type.\r\n";
break;
}//Of switch
}//Of forward
这个函数只是分情况调用了另外几个函数, 其中, 输出层也是使用的卷积操作.
进行输入层的前向处理.
void MfCnnLayer::setInputLayerOutput(MfDoubleMatrix* paraData)
{
if (paraData->getColumns() != mapSize->width * mapSize->height)
{
printf("input record does not match the map size.\r\n");
throw "input record does not match the map size.";
}//Of if
for (int i = 0; i < mapSize->width; i++)
{
for (int j = 0; j < mapSize->height; j++)
{
//The input layer has only 1 out map.
setOutMapValue(0, i, j, paraData->getValue(0, mapSize->height * i + j));
}//Of for j
}//Of for i
}//Of setInputLayerOutput
它用于将输入向量转换为 nn 的矩阵. 例如, mninst 数据集中, 每个图片存储时为长度 784 的向量, 这里转换为 2828 的矩阵.
关于图片怎样定义 width/height, 只要统一就好.
void MfCnnLayer::setConvolutionOutput()
{
int tempLastNumMaps = lastLayer->getNumOutMaps();
MfDoubleMatrix* tempMap;
MfDoubleMatrix* tempKernel;
double tempBias;
bool tempEmpty = true;
for (int j = 0; j < numOutMaps; j++)
{
tempEmpty = true;
for (int i = 0; i < tempLastNumMaps; i++)
{
tempMap = lastLayer->getOutMapAt(i);
tempKernel = getKernelAt(i, j);
if (tempEmpty)
{
//Only convolution on one map.
currentOutMap->convolutionValidToMe(tempMap, tempKernel);
tempEmpty = false;
}
else
{
//Sum up convolution maps
singleOutMap->convolutionValidToMe(tempMap, tempKernel);
currentOutMap->addToMe(currentOutMap, singleOutMap);
}//Of if
}//Of for i
//Bias.
tempBias = getBiasAt(j);
currentOutMap->addValueToMe(tempBias);
//Activation.
currentOutMap->setActivator(layerActivator);
currentOutMap->activate();
setOutMapValue(j, currentOutMap);
}//Of for j
}//Of setConvolutionOutput
说明如下:
tempEmpty = false;
搞掉了, 调拭花了两天. 泪目…void MfCnnLayer::setSamplingOutput()
{
int tempLastMapNum = lastLayer->getNumOutMaps();
for (int i = 0; i < tempLastMapNum; i++) {
currentOutMap->scaleToMe(lastLayer->getOutMapAt(i), scaleSize);
setOutMapValue(i, currentOutMap);
}//Of for i
}//Of setSamplingOutput
全靠 MfDoubleMatrix 里面的 scaleToMe 函数. 当前该函数采用的取平均值方案, 以后可以扩充, 支持取最大值/最小值等等.
和 forward 函数一样, 这个函数也是根据层的类型, 调用具体的函数来处理.
void MfCnnLayer::backPropagation(int paraLabel)
{
switch (layerType)
{
case INPUT_LAYER:
printf("Input layer should not back propagation.\r\n");
throw "Input layer should not back propagation.";
break;
case CONVOLUTION_LAYER:
setConvolutionLayerErrors();
break;
case SAMPLING_LAYER:
setSamplingLayerErrors();
break;
case OUTPUT_LAYER:
setOutputLayerErrors(paraLabel);
break;
default:
printf("Unsupported layer type.\r\n");
throw "Unsupported layer type.\r\n";
break;
}//Of switch
}//Of backPropagation
void MfCnnLayer::setOutputLayerErrors(int paraLabel)
{
double tempTarget[numOutMaps];
double tempOutmaps[numOutMaps];
double tempValue;
for (int i = 0; i < numOutMaps; i++)
{
tempTarget[i] = 0;
tempOutmaps[i] = getOutMapAt(i)->getValue(0, 0);
}//Of for i
tempTarget[paraLabel] = 1;
for (int i = 0; i < numOutMaps; i ++)
{
tempValue = layerActivator->derive(tempOutmaps[i]) * (tempTarget[i] - tempOutmaps[i]);
setErrorAt(i, 0, 0, tempValue);
}//Of for i
}//Of setOutputLayerErrors
说明:
void MfCnnLayer::setSamplingLayerErrors()
{
int tempNextMapNum = nextLayer->getNumOutMaps();
bool tempFirst;
MfDoubleMatrix* tempNextErrors;
MfDoubleMatrix* tempRot180Kernel;
for (int i = 0; i < numOutMaps; i++)
{
tempFirst = true;
for (int j = 0; j < tempNextMapNum; j++) {
tempNextErrors = nextLayer->getErrorsAt(j);
tempRot180Kernel = nextLayer->getRot180KernelAt(i, j);
if (tempFirst)
{
currentErrors->convolutionFullToMe(tempNextErrors, tempRot180Kernel);
tempFirst = false;
}
else
{
currentSingleErrors->convolutionFullToMe(tempNextErrors, tempRot180Kernel);
currentErrors->addToMe(currentErrors, currentSingleErrors);
}//Of if
}//Of for j
setErrorsAt(i, currentErrors);
}//Of for i
}//Of setSamplingLayerErrors
说明:
void MfCnnLayer::setConvolutionLayerErrors()
{
MfDoubleMatrix* tempNextLayerErrors;
for (int i = 0; i < numOutMaps; i ++)
{
currentOutMap = getOutMapAt(i);
currentOutMap->setActivator(layerActivator);
currentOutMap->deriveToMe(currentOutMap);
//The space of singleOutMap is reused here, in fact here is the error.
tempNextLayerErrors = nextLayer->getErrorsAt(i);
singleOutMap->kroneckerToMe(nextLayer->getErrorsAt(i), nextLayer->getScaleSize());
currentOutMap->cwiseProductToMe(currentOutMap, singleOutMap);
setErrorsAt(i, currentOutMap);
}//Of for i
}//Of setConvolutionLayerErrors
说明:
一批数据 forward 并 backPropagation 的过程中计算了 errors, 需要用本函数来更新卷积核.
void MfCnnLayer::updateKernels()
{
int tempNumLastMap = lastLayer->getNumOutMaps();
bool tempFirst = true;
for (int j = 0; j < numOutMaps; j++)
{
for (int i = 0; i < tempNumLastMap; i++)
{
tempFirst = true;
for (int r = 0; r < batchSize; r++)
{
currentErrors = getErrorsAt(r, j);
if (tempFirst)
{
tempFirst = false;
deltaKernel->convolutionValidToMe(lastLayer->getOutMapAt(r, i), currentErrors);
}
else
{
singleDeltaKernel->convolutionValidToMe(lastLayer->getOutMapAt(r, i), currentErrors);
deltaKernel->addToMe(deltaKernel, singleDeltaKernel);
}//Of if
}//Of for r
currentKernel = getKernelAt(i, j);
currentKernel->timesValueToMe(1 - lambda * alpha);
currentKernel->addToMe(currentKernel, deltaKernel);
setKernelAt(i, j, currentKernel);
}//Of for i
}//Of for j
}//Of updateKernels
说明:
currentKernel->timesValueToMe(1 - lambda * alpha);
没效果.convolutionValidToMe
即可.void MfCnnLayer::updateBias() {
double tempBias;
double tempDeltaBias;
for (int j = 0; j < numOutMaps; j ++)
{
errors->sumToMatrix(j, currentErrors);
tempDeltaBias = currentErrors->sumUp() / batchSize;
tempBias = getBiasAt(j) + alpha * tempDeltaBias;
setBiasAt(j, tempBias);
}//Of for i
}//Of updateBias
说明:
errors->sumToMatrix(j, currentErrors);
会把太多的东西叠加到 currentErrors, 做完实验发现自己想多了.由于 MfCnnLayer 已经承担了多数具体的工作, 本类的负担很小.
//The number of layers.
int numLayers;
//The layers.
MfCnnLayer** layers;
//The batch size
int batchSize;
//The activator
Activator* layerActivator;
//The random array for training.
MfIntArray* randomArray;
这里仅分析一个函数.
double MfFullCnn::train(MfDoubleMatrix* paraX, MfIntArray* paraY)
{
int tempRows = paraX->getRows();
int tempColumns = paraX->getColumns();
int tempEpochs = tempRows / batchSize;
int tempInstance;
int tempLabel;
int tempPrediction;
double tempCorrect = 0.0;
MfDoubleMatrix* tempData = new MfDoubleMatrix(1, tempColumns);
randomize();
for(int e = 0; e < tempEpochs; e ++)
{
//A new batch
prepareForNewBatch();
for(int i = 0; i < batchSize; i ++)
{
tempInstance = randomArray->getValue(e * batchSize + i);
for(int j = 0; j < tempColumns; j ++)
{
tempData->setValue(0, j, paraX->getValue(tempInstance, j));
}//Of for j
tempLabel = paraY->getValue(tempInstance);
tempPrediction = forward(tempData);
if (tempPrediction == tempLabel)
{
tempCorrect ++;
}//Of if
backPropagation(tempLabel);
//A new record
prepareForNewRecord();
}//Of for i
//Update for each batch
//printf("\r\n updateParameters\r\n");
updateParameters();
}//Of for e
return tempCorrect/tempRows;
}//Of train
说明:
这两个类的翻译花了不少时间.