完整项目代码可从 https://github.com/hfq0219/mnist
下载。
/**
*@Author: fengqi
*@Email: [email protected]
*/
本程序使用全连接神经网络进行手写数字识别的训练和预测。当然修改一下输入和输出节点数,调整网络层数,也可用于其他多分类或回归问题。
代码结构参考了 yolo(You Only Look Once) 项目源码框架 darknet.
目录文件介绍:
--mnist/ 存放的是 mnist 数据集原始二进制文件;
--obj/ 存放的是编译生成的 .obj 文件;
--backup/ 存放的是每轮训练过程中生成的权重文件
--testData/ 是运行 ./data 程序读取 mnist 测试数据集生成的测试图片(如 0.jpg, 1.jpg...)和对应的标签文件 testLabel.txt;
--trainData/ 是运行 ./data 程序读取 mnist 训练数据集生成的训练图片(如 0.jpg, 1.jpg...)和对应的标签文件 trainLabel.txt;
--layer.cpp/.h 是全连接层的类定义及实现,主要是分配层的计算数据存储空间和前向计算反向传播以及参数更新函数定义;
--network.cpp/.h 是网络的类定义及实现,主要是定义了网络中,全连接层的添加,网络的前向传播,反向传播等函数;
--mnist.cpp 是读取 mnist/ 下的二进制文件,生成相应的图片,便于可视化和图片读写;
--main.cpp 是主函数入口文件,里面实现了网络训练及验证,以及预测功能,并且实现了在训练网络完成后保存网络各层的权重到文件里,
方便下次训练或预测时随时载入权重,不用重新训练网络;
使用方法介绍:
--本程序使用 Makefile 进行项目管理构建,只需在终端输入 make 命令,即可生成 data 和 run 两个可执行文件;
--注意,本程序生成和读写图片使用了 opencv,所以请确保电脑上安装并配置好了 opencv 开发环境。
1、运行 ./data 可读取 mnist/ 数据文件,生成训练和测试用的图片;
2、运行 ./run train 可训练网络,网络训练完成后,会生成 mnist.weight 网络权重文件;
3、运行 ./run test mnist.weight 可进行图片识别预测,只需输入图片文件名,按 ctrl-c 停止即可;
主要参数介绍:
--程序里的主要需要修改的参数有,训练迭代次数 epoches 和学习率 learning rate, 在构建 network 对象时传入;
--全连接层的个数及各层神经元数量和激活函数类型,可通过 network->addLayer(int node,ACTIVATION activate) 调整;
--学习率变化调整,默认调整方式是每次迭代减小 0.01,当小于 0.01 时,固定使用 0.01 作为学习率;
主入口函数:
/**
* @Author: fengqi
* @Email: [email protected]
*/
#include
#include
#include
#include
#include "network.h"
using namespace std;
using namespace cv;
void saveWeight(string file,Network *network){ //保存各层权重到文件
ofstream outfile(file);
for(int i=0;i<network->mNumLayers;i++){
Layer *layer=network->mLayers[i];
for(int m=0;m<layer->mNumNodes;m++){
for(int n=0;n<layer->mNumInputNodes+1;n++){
outfile<<layer->mWeights[m][n]<<" ";
}
}
}
outfile.close();
cout<<"save weight file to <"<<file<<"> done."<<endl;
}
void loadWeight(string file,Network *network){ //加载权重文件
ifstream infile(file);
if(!infile.is_open()){
cout<<"open weight file failed!"<<endl;
exit(-1);
}
for(int i=0;i<network->mNumLayers;i++){
Layer *layer=network->mLayers[i];
for(int m=0;m<layer->mNumNodes;m++){
for(int n=0;n<layer->mNumInputNodes+1;n++){
infile>>layer->mWeights[m][n];
}
}
}
infile.close();
cout<<"load weight from <"<<file<<"> done."<<endl;
}
float train(Network *network,string path,int imageSize, int numImages) //训练网络,使用训练数据集
{
srand(time(0));
float *temp = new float[imageSize];
string la=path;
ifstream labelFile(path.append("trainLabel.txt")); //标签文件
int label;
for (int i = 0; i < numImages; i++)
{
if(i%(numImages/10)==0){ //每 6000 张图片统计错误率,并显示训练进度
network->mErrorSum=0;
cout << setfill('=') << setw(2) << ">"<<(i/(numImages/10))*10<<"%"<<flush;
}
if(i==numImages-1)
cout<<"====>100%"<< endl;
int k=rand()%numImages; //随机选取图片训练
string l=la;
Mat x=imread(l.append(to_string(k)).append(".jpg"),0); //使用 opencv 读取图片
if(!x.data){cout<<"read image error."<<endl;return -1;}
for(int m=0;m<x.rows;m++){
for(int n=0;n<x.cols;n++){
float a=(x.at<uchar>(m,n))/255.0; //归一化
temp[m*x.cols+n]=a;
}
}
labelFile.seekg(2*k); //标签和图片对应
labelFile>>label;
network->compute(temp,label); //每次训练一张图片
}
cout << "the error is:" << network->mErrorSum/(numImages/10);
labelFile.close();
delete [] temp;
return network->mErrorSum;
}
int validate(Network *network,string path,int imageSize, int numImages) //验证网络准确率,使用测试数据集
{
int ok_cnt = 0;
float* temp = new float[imageSize];
string la=path;
ifstream labelFile(path.append("testLabel.txt")); //标签文件
int label,idx=0;
for (int i = 0; i < numImages; i++)
{
if(i%(numImages/10)==0) //显示进度
cout << setfill('=') << setw(2) << ">"<<(i/(numImages/10))*10<<"%"<<flush;
if(i==numImages-1)
cout<<"====>100%"<< endl;
string l=la;
Mat x=imread(l.append(to_string(i)).append(".jpg"),0); //按顺序读取图片测试
if(!x.data){cout<<"read image error."<<endl;return -1;}
for(int m=0;m<x.rows;m++){
for(int n=0;n<x.cols;n++){
float a=(x.at<uchar>(m,n))/255.0; //归一化
temp[m*x.cols+n]=a;
}
}
labelFile>>label;
network->compute(temp,label); //验证
float *out=network->mOutputs; //获得计算输出
float max_value = -9999;
for (int i = 0; i < network->mNumOutputs; i++)
{
if (out[i] > max_value)
{
max_value = out[i]; //最大输出位置即图片所属类别
idx = i;
}
}
if (idx == label) //判断与标签是否相同,即预测是否准确
{
ok_cnt++;
}
}
labelFile.close();
delete [] temp;
return ok_cnt;
}
int main(int argc, char* argv[]) //主入口函数
{
if(argc<2||(strcmp(argv[1],"train")!=0&&strcmp(argv[1],"test")!=0)){ //判断调用参数是否合法
cout<<"usage: ./run [train/test] [weight_file]\nwrong parameter!!!"<<endl;
return -1;
}
bool load_weight=false; //是否加载权重文件
int imageRow=28,imageCol=28; //输入图片大小
int imageSize=imageRow*imageCol;
int trainNumImages = 60000; //训练集大小
int testNumImages = 10000; //测试集大小
int networkInputs=imageSize; //网络参数设置
int networkOutputs=10;
int epoches=10;
float learningRate=0.1;
Network *network = new Network(epoches,learningRate,networkInputs,networkOutputs);
network->addLayer(256,SIGMOID); //加入全连接层,参数有神经元个数和激活函数类型
network->addLayer(128,SIGMOID);
network->addLayer(network->mNumOutputs,SIGMOID);
cout <<"\nnetwork framework: (input)"<< network->mNumInputs;
for(int i=0;i<network->mNumLayers;i++){
cout<<"=>"<<network->mLayers[i]->mNumNodes;
}
cout<<"(output)"<<endl<<endl;
if(argc>2){ //加载预训练权重文件
loadWeight(argv[2],network);
load_weight=true;
}
if(strcmp(argv[1],"train")==0) //训练网络
{
time_t time0=time(0);
cout<<"start training..."<<endl<<endl;
cout<<"total epoches: "<<network->mEpoches<<", NO.1 epoches. begin learning rate: "<<network->mLearningRate<< endl;
for(int i = 0; i < network->mEpoches; i++) //共训练 epoches 轮次
{
string weightFile="backup/mnist.weight_";
time_t time1=time(0);
network->mTrain=true; //训练标志
cout<<"\nep: "<<i+1<<", lr: "<<network->mLearningRate<<" ";
float err = train(network,"trainData/",imageSize,trainNumImages); //开始训练
cout<<", cost time: "<<time(0)-time1<<" seconds."<<endl;
network->mTrain=false; //验证测试标志
cout<<"\nvalidate...";
int ok = validate(network,"testData/",imageSize,testNumImages); //开始验证
cout<<"validate accuracy: "<<(float)ok/testNumImages*100<< "%, true: "<<ok<<", total: "<<testNumImages<< endl;
if(network->mLearningRate>0.01) network->mLearningRate-= 0.01; //学习率变化调整
else network->mLearningRate=0.01;
if(i<network->mEpoches-1)
saveWeight(weightFile.append(to_string(i+1)).append("_").append(to_string(ok)),network); //一轮训练结束,保存权重文件
}
saveWeight("mnist.weight",network); //网络训练结束,保存权重文件
cout<<"\ntraining network success...cost time: "<<(time(0)-time0)<<" seconds.\n"<<endl;
}
else if(strcmp(argv[1],"test")==0) //测试预测图片
{
if(!load_weight){ //必须先加载网络权重
cout<<"no weight file loaded in, can't start prediction.\n"<<endl;
return -1;
}
string name;
while(1){ //循环测试图片
cout<<"\nplease enter the image path...(ctrl-c to exit.)"<<endl;
getline(cin,name); //输入图片名
Mat m=imread(name,0); //使用 opencv 读入图片
if(!m.data){
cout<<"read image wrong. please check image file name..."<<endl;
continue;
}
network->mTrain=false;
if(m.cols!=imageCol||m.rows!=imageRow) resize(m,m,Size(imageCol,imageRow)); //resize 图片到网络接受输入大小
float *d=new float[imageSize];
for(int i=0;i<imageRow;i++){
for(int j=0;j<imageCol;j++){
float x=(m.at<uchar>(i,j))/255.0; //将二维像素值转成一维向量,并归一化
d[i*imageCol+j]=x;
}
}
float max=-9999;
int idx=10;
network->compute(d); //开始预测
float *out=network->mOutputs; //获得网络输出
for(int i=0;i<network->mNumOutputs;i++){
if(out[i]>max){ //取最大输出为预测值
max=out[i];
idx=i;
}
}
cout<<"the prediction is: "<<idx<<endl;
delete [] d;
}
}
delete network;
return 0;
}
网络结构定义:
//network.h
#ifndef __NETWORK_H__
#define __NETWORK_H__
#include "layer.h"
#include
using namespace std;
class Network
{
public:
Network(int epoches,float learningRate,int numInputs,int numOutputs);
~Network();
void compute(float *inputs,int label=10);
void addLayer(int numNodes,ACTIVATION activate=SIGMOID);
private:
void init();
void forwardNetwork(float *inputs,int label);
void backwardNetwork();
public:
bool mTrain;
int mEpoches;
int mNumInputs;
int mNumOutputs;
int mNumLayers;
float mLearningRate;
float mErrorSum;
float *mInputs;
float *mOutputs;
vector<Layer *> mLayers;
};
#endif
//network.cpp
#include "network.h"
#include
#include
#include
#include
using namespace std;
Network::Network(int epoches,float learningRate, int numInputs, int numOutputs)
:mEpoches(epoches),
mNumInputs(numInputs),
mNumOutputs(numOutputs),
mLearningRate(learningRate)
{
mNumLayers=0;
mErrorSum=0;
mInputs=NULL;
mOutputs=NULL;
}
Network::~Network()
{
for (int i = 0; i < mNumLayers; i++)
{
if (mLayers[i])
{
delete mLayers[i];
}
}
}
void Network::init() //初始化
{
for (int i = 0; i < mNumLayers; ++i)
{
mLayers[i]->init();
}
mErrorSum = 0;
}
void Network::addLayer(int numNodes,ACTIVATION activate) //添加全连接层
{
int numInputNodes = (mNumLayers > 0) ? mLayers[mNumLayers-1]->mNumNodes : mNumInputs;
mLayers.push_back(new Layer(numNodes,numInputNodes,activate));
mNumLayers++;
}
void Network::forwardNetwork(float *inputs,int label) //网络前向计算
{
for (int i = 0; i < mNumLayers; i++)
{
mLayers[i]->forwardLayer(inputs); //对每个层计算
inputs = mLayers[i]->mOutputs;
}
mOutputs=inputs; //注意是指向了最后一层的输出
if(!mTrain) return;
float *outputs = mOutputs;
float *delta = mLayers[mNumLayers-1]->mDelta;
for (int i = 0; i < mNumOutputs; i++)
{
float err;
if(i==label){
err=1-outputs[i];
}else{
err=0-outputs[i];
}
delta[i] = err; //计算 delta 和误差
mErrorSum += err * err;
}
}
void Network::backwardNetwork() //网络反向计算并更新权重参数
{
float *prevOutputs = NULL;
float *prevDelta = NULL;
for (int i = mNumLayers-1; i >= 0; i--)
{
if (i > 0)
{
Layer &prev = *mLayers[i-1];
prevOutputs = prev.mOutputs;
prevDelta = prev.mDelta;
memset(prevDelta, 0, prev.mNumNodes * sizeof(float));
}
else
{
prevOutputs = mInputs;
prevDelta = NULL; //第一层前是输入,不需计算 delta
}
mLayers[i]->backwardLayer(prevOutputs, prevDelta,mLearningRate); //反向计算更新权重
}
}
void Network::compute(float *inputs,int label) //网络计算入口
{
mInputs=inputs;
forwardNetwork(inputs,label);
if(!mTrain){
return;
}
backwardNetwork();
}
全连接层定义:
//layer.h
#ifndef __LAYER_H__
#define __LAYER_H__
typedef enum{
SIGMOID,RELU,LEAKY
} ACTIVATION;
class Layer
{
public:
Layer(int numNodes, int numInputNodes,ACTIVATION activate=SIGMOID);
Layer(Layer &layer);
~Layer();
void forwardLayer(float *inputs);
void backwardLayer(float *prevOutputs,float *prevDelta,float learningRate);
void init();
private:
inline float active(float x,ACTIVATION activate);
inline float gradient(float x,ACTIVATION activate);
public:
ACTIVATION mActivate;
int mNumInputNodes;
int mNumNodes;
float **mWeights;
float *mOutputs;
float *mDelta;
};
#endif
//layer.cpp
#include "layer.h"
#include
#include
#include
#include
using namespace std;
Layer::Layer(int numNodes, int numInputNodes,ACTIVATION activa)
:mNumNodes(numNodes),
mNumInputNodes(numInputNodes),
mActivate(activa)
{
mWeights = new float*[mNumNodes];
mOutputs = new float[mNumNodes];
mDelta = new float[mNumNodes];
init();
}
Layer::Layer(Layer &layer)
:mNumNodes(layer.mNumNodes),
mNumInputNodes(layer.mNumInputNodes),
mActivate(layer.mActivate)
{
int size = mNumNodes * sizeof(float);
memcpy(mOutputs, layer.mOutputs, size);
memcpy(mDelta, layer.mDelta, size);
for (int i = 0; i < mNumNodes; i++)
{
memcpy(mWeights[i], layer.mWeights[i], layer.mNumInputNodes+1);
}
}
Layer::~Layer()
{
for (int i = 0; i < mNumNodes; i++)
{
delete [] mWeights[i];
}
delete [] mWeights;
delete [] mOutputs;
delete [] mDelta;
}
void Layer::init()
{
memset(mOutputs, 0, mNumNodes * sizeof(float));
memset(mDelta, 0, mNumNodes * sizeof(float));
srand(time(0));
for (int i = 0; i < mNumNodes; ++i)
{
float *curWeights = new float[mNumInputNodes + 1];
mWeights[i] = curWeights;
for (int w = 0; w < mNumInputNodes + 1; w++) //还有一个 bias 值,所以加 1
{
curWeights[w] = rand() % 1000 * 0.001 - 0.5;
}
}
}
float Layer::active(float x,ACTIVATION activate) //激活函数
{
switch(activate){
case SIGMOID:
return (1.0/(1.0+exp(-x)));
case RELU:
return x*(x>0);
case LEAKY:
return (x>0)?x:0.1*x;
default:
cout<<"no activation."<<endl;
return x;
}
}
float Layer::gradient(float x,ACTIVATION activate) //激活函数导数
{
switch(activate){
case SIGMOID:
return x*(1.0-x);
case RELU:
return (x>0);
case LEAKY:
return (x>0)?1:0.1;
default:
cout<<"no activation."<<endl;
return 1.0;
}
}
void Layer::forwardLayer(float *inputs) //前向计算
{
for (int n = 0; n < mNumNodes; ++n)
{
float *curWeights = mWeights[n];
float x = 0;
int k;
for (k = 0; k < mNumInputNodes; ++k)
{
x += curWeights[k] * inputs[k];
}
x += curWeights[k];
mOutputs[n] = active(x,mActivate);
}
}
void Layer::backwardLayer(float *prevOutputs,float *prevDelta,float learningRate) //反向计算
{
for (int i = 0; i < mNumNodes; i++)
{
float* curWeights = mWeights[i];
float delta = mDelta[i] * gradient(mOutputs[i],mActivate);
int w;
for (w = 0; w < mNumInputNodes; w++)
{
if (prevDelta)
{
prevDelta[w] += curWeights[w] * delta;
}
curWeights[w] += delta * learningRate * prevOutputs[w]; //更新权重
}
curWeights[w] += delta * learningRate; //更新 bias
}
}
Mnist 数据处理函数(这个是在网上看的别人的代码,自己修改了下,抱歉已经忘了是从哪看到的)
//mnist.cpp
#include
#include
#include
#include
using namespace std;
using namespace cv;
int ReverseInt(int i)
{
unsigned char ch1, ch2, ch3, ch4;
ch1 = i & 255;
ch2 = (i >> 8) & 255;
ch3 = (i >> 16) & 255;
ch4 = (i >> 24) & 255;
return((int)ch1 << 24) + ((int)ch2 << 16) + ((int)ch3 << 8) + ch4;
}
void read_Mnist_Label(string filename,string save)
{
ofstream saveLabel;
saveLabel.open(save);
fstream file(filename);
if (file.is_open())
{
int magic_number = 0;
int number_of_images = 0;
file.read((char*)&magic_number, sizeof(magic_number));
file.read((char*)&number_of_images, sizeof(number_of_images));
magic_number = ReverseInt(magic_number);
number_of_images = ReverseInt(number_of_images);
cout << "magic number = " << magic_number << endl;
cout << "number of images = " << number_of_images << endl;
for (int i = 0; i < number_of_images; i++)
{
unsigned char label = 0;
file.read((char*)&label, sizeof(label));
saveLabel<<(int)label<<" "; //输出标签文件
}
}
else{
cout<<"open file failed."<<endl;
}
saveLabel.close();
file.close();
}
void read_Mnist_Images(string filename,string path)
{
fstream file(filename);
if (file.is_open())
{
int magic_number = 0;
int number_of_images = 0;
int n_rows = 0;
int n_cols = 0;
unsigned char label;
file.read((char*)&magic_number, sizeof(magic_number));
file.read((char*)&number_of_images, sizeof(number_of_images));
file.read((char*)&n_rows, sizeof(n_rows));
file.read((char*)&n_cols, sizeof(n_cols));
magic_number = ReverseInt(magic_number);
number_of_images = ReverseInt(number_of_images);
n_rows = ReverseInt(n_rows);
n_cols = ReverseInt(n_cols);
cout << "magic number = " << magic_number << endl;
cout << "number of images = " << number_of_images << endl;
cout << "rows = " << n_rows << endl;
cout << "cols = " << n_cols << endl;
Mat temp(n_rows,n_cols,CV_8UC1,Scalar::all(0));
for (int i = 0; i < number_of_images; i++)
{
string tm=path;
for (int r = 0; r < n_rows; r++)
{
for (int c = 0; c < n_cols; c++)
{
unsigned char image = 0;
file.read((char*)&image, sizeof(image));
temp.at<uchar>(r,c)=image;
}
}
imwrite(tm.append(to_string(i)).append(".jpg"),temp); //保存图片
}
}
else{
cout<<"open file failed."<<endl;
}
file.close();
}
int main()
{
read_Mnist_Label("./mnist/t10k-labels.idx1-ubyte","./testData/testLabel.txt");
read_Mnist_Images("./mnist/t10k-images.idx3-ubyte","./testData/");
read_Mnist_Label("./mnist/train-labels.idx1-ubyte","./trainData/trainLabel.txt");
read_Mnist_Images("./mnist/train-images.idx3-ubyte","./trainData/");
cout<<"end."<<endl;
return 0;
}