MNIST手写字体数据库下载地址http://yann.lecun.com/exdb/mnist/ 。
从上面的数据库说明可以看出来,MNIST手写字体数据库包含了是个文件,每个文件都是单纯的普通文件格式,因此,可以采用C++的文件流将其打开,每一个文件除了几个字节的文件头之外,就是剩下的要数据部分了。因此,可以先将文件的文件头读进来,然后利用magic number进行验证,验证所读的文件是否为MNIST文件。
struct MNISTImageFileHeader
unsigned char MagicNumber[4];
unsigned char NumberOfImages[4];
unsigned char NumberOfRows[4];
unsigned char NumberOfColums[4];
struct MNISTLabelFileHeader
unsigned char MagicNumber[4];
unsigned char NumberOfLabels[4];
int ConvertCharArrayToInt(unsigned char* array, int LengthOfArray)
if (LengthOfArray < 0)
return -1;
int result = static_cast<signed int>(array[0]);
for (int i = 1; i < LengthOfArray; i++)
result = (result << 8) + array[i];
return result;
#ifndef MNIST_H
#define MNIST_H
#include <iostream>
#include <fstream>
#include <opencv2/opencv.hpp>
struct MNISTImageFileHeader
unsigned char MagicNumber[4];
unsigned char NumberOfImages[4];
unsigned char NumberOfRows[4];
unsigned char NumberOfColums[4];
struct MNISTLabelFileHeader
unsigned char MagicNumber[4];
unsigned char NumberOfLabels[4];
const int MAGICNUMBEROFIMAGE = 2051;
const int MAGICNUMBEROFLABEL = 2049;
int ConvertCharArrayToInt(unsigned char* array, int LengthOfArray);
bool IsImageDataFile(unsigned char* MagicNumber, int LengthOfArray);
bool IsLabelDataFile(unsigned char* MagicNumber, int LengthOfArray);
cv::Mat ReadData(std::fstream& DataFile, int NumberOfData, int DataSizeInBytes);
cv::Mat ReadImageData(std::fstream& ImageDataFile, int NumberOfImages);
cv::Mat ReadLabelData(std::fstream& LabelDataFile, int NumberOfLabel);
cv::Mat ReadImages(std::string& FileName);
cv::Mat ReadLabels(std::string& FileName);
#endif // MNIST_H
* @file ReadData.cpp The file contains the functions used to read image data
* and label data from the origin mnist file
* @author sheng
* @version 1.0.0
* @date 2014-04-09
* @function
* @histroy <author> <date> <version> <description>
* sheng 2014-04-09 1.0.0 build the module
#include <MNIST.h>
* @brief IsImageDataFile Check the input MagicNumber is equal to
* @param MagicNumber The array of the magicnumber to be checked
* @param LengthOfArray The length of the array
* @return true, if the magcinumber is mathed;
* false, otherwise.
* @author sheng
* @version 1.0.0
* @date 2014-04-08
* @histroy <author> <date> <version> <description>
* sheng 2014-04-08 1.0.0 build the function
bool IsImageDataFile(unsigned char* MagicNumber, int LengthOfArray)
int MagicNumberOfImage = ConvertCharArrayToInt(MagicNumber, LengthOfArray);
if (MagicNumberOfImage == MAGICNUMBEROFIMAGE)
return true;
return false;
* @brief IsImageDataFile Check the input MagicNumber is equal to
* @param MagicNumber The array of the magicnumber to be checked
* @param LengthOfArray The length of the array
* @return true, if the magcinumber is mathed;
* false, otherwise.
* @author sheng
* @version 1.0.0
* @date 2014-04-08
* @histroy <author> <date> <version> <description>
* sheng 2014-04-08 1.0.0 build the function
bool IsLabelDataFile(unsigned char *MagicNumber, int LengthOfArray)
int MagicNumberOfLabel = ConvertCharArrayToInt(MagicNumber, LengthOfArray);
if (MagicNumberOfLabel == MAGICNUMBEROFLABEL)
return true;
return false;
* @brief ReadData Read the data in a opened file
* @param DataFile The file which the data is read from.
* @param NumberOfData The number of the data
* @param DataSizeInBytes The size fo the every data
* @return The Mat which rows is a data,
* Return a empty Mat if the file is not opened or the some flag was
* seted when reading the data.
* @author sheng
* @version 1.0.0
* @date 2014-04-08
* @histroy <author> <date> <version> <description>
* sheng 2014-04-08 1.0.0 build the function
cv::Mat ReadData(std::fstream& DataFile, int NumberOfData, int DataSizeInBytes)
cv::Mat DataMat;
// read the data if the file is opened.
if (DataFile.is_open())
int AllDataSizeInBytes = DataSizeInBytes * NumberOfData;
unsigned char* TmpData = new unsigned char[AllDataSizeInBytes];
DataFile.read((char *)TmpData, AllDataSizeInBytes);
// // If the state is good, convert the array to a mat.
// if (!DataFile.fail())
// {
// DataMat = cv::Mat(NumberOfData, DataSizeInBytes, CV_8UC1,
// TmpData).clone();
// }
DataMat = cv::Mat(NumberOfData, DataSizeInBytes, CV_8UC1,
delete [] TmpData;
return DataMat;
* @brief ReadImageData Read the Image data from the MNIST file.
* @param ImageDataFile The file which contains the Images.
* @param NumberOfImages The number of the images.
* @return The mat contains the image and each row of the mat is a image.
* Return empty mat is the file is closed or the data is not matching
* the number.
* @author sheng
* @version 1.0.0
* @date 2014-04-08
* @histroy <author> <date> <version> <description>
* sheng 2014-04-08 1.0.0 build the function
cv::Mat ReadImageData(std::fstream& ImageDataFile, int NumberOfImages)
int ImageSizeInBytes = 28 * 28;
return ReadData(ImageDataFile, NumberOfImages, ImageSizeInBytes);
* @brief ReadLabelData Read the label data from the MNIST file.
* @param LabelDataFile The file contained the labels.
* @param NumberOfLabel The number of the labels.
* @return The mat contains the labels and each row of the mat is a label.
* Return empty mat is the file is closed or the data is not matching
* the number.
* @author sheng
* @version 1.0.0
* @date 2014-04-08
* @histroy <author> <date> <version> <description>
* sheng 2014-04-08 1.0.0 build the function
cv::Mat ReadLabelData(std::fstream& LabelDataFile, int NumberOfLabel)
int LabelSizeInBytes = 1;
return ReadData(LabelDataFile, NumberOfLabel, LabelSizeInBytes);
* @brief ReadImages Read the Training images.
* @param FileName The name of the file.
* @return The mat contains the image and each row of the mat is a image.
* Return empty mat is the file is closed or the data is not matched.
* @author sheng
* @version 1.0.0
* @date 2014-04-08
* @histroy <author> <date> <version> <description>
* sheng 2014-04-08 1.0.0 build the function
cv::Mat ReadImages(std::string& FileName)
std::fstream File(FileName.c_str(), std::ios_base::in | std::ios_base::binary);
if (!File.is_open())
return cv::Mat();
MNISTImageFileHeader FileHeader;
File.read((char *)(&FileHeader), sizeof(FileHeader));
if (!IsImageDataFile(FileHeader.MagicNumber, 4))
return cv::Mat();
int NumberOfImage = ConvertCharArrayToInt(FileHeader.NumberOfImages, 4);
return ReadImageData(File, NumberOfImage);
* @brief ReadLabels Read the label from the MNIST file.
* @param FileName The name of the file.
* @return The mat contains the image and each row of the mat is a image.
* Return empty mat is the file is closed or the data is not matched.
* @author sheng
* @version 1.0.0
* @date 2014-04-08
* @histroy <author> <date> <version> <description>
* sheng 2014-04-08 1.0.0 build the function
cv::Mat ReadLabels(std::string& FileName)
std::fstream File(FileName.c_str(), std::ios_base::in | std::ios_base::binary);
if (!File.is_open())
return cv::Mat();
MNISTLabelFileHeader FileHeader;
File.read((char *)(&FileHeader), sizeof(FileHeader));
if (!IsLabelDataFile(FileHeader.MagicNumber, 4))
return cv::Mat();
int NumberOfImage = ConvertCharArrayToInt(FileHeader.NumberOfLabels, 4);
return ReadLabelData(File, NumberOfImage);