C语言解析MNIST数据集

小编用C语言解析MNIST数据集,产生的图片是倒立着的。

数据集下载地址:http://yann.lecun.com/exdb/mnist/
小编解析的是测试集:t10k-images.idx3-ubyte
代码如下:
bmp.h

#ifndef BMP_H
#define BMP_H

#pragma pack(1)
typedef struct tagBITMAPFILEHEADER
{
    unsigned char bfType[2];//文件格式
    unsigned long bfSize;//文件大小
    unsigned short bfReserved1;//保留
    unsigned short bfReserved2;
    unsigned long bfOffBits; //DIB数据在文件中的偏移量
}fileHeader;
#pragma pack()
/*
位图数据信息结构
*/
#pragma pack(1)
typedef struct tagBITMAPINFOHEADER
{
    unsigned long biSize;//该结构的大小
    long biWidth;//文件宽度
    long biHeight;//文件高度
    unsigned short biPlanes;//平面数
    unsigned short biBitCount;//颜色位数
    unsigned long biCompression;//压缩类型
    unsigned long biSizeImage;//DIB数据区大小
    long biXPixPerMeter;
    long biYPixPerMeter;
    unsigned long biClrUsed;//多少颜色索引表
    unsigned long biClrImporant;//多少重要颜色
}fileInfo;
#pragma pack()
/*
调色板结构
*/
#pragma pack(1)
typedef struct tagRGBQUAD
{
    unsigned char rgbBlue; //蓝色分量亮度
    unsigned char rgbGreen;//绿色分量亮度
    unsigned char rgbRed;//红色分量亮度
    unsigned char rgbReserved;
}rgbq;
#pragma pack()

#endif

main

#include 
#include 
#include 
#include "bmp.h"

using namespace std;

#define SUM 10000

int main()
{
    int a, num, size1, size2;
    //用于存储一张手写体图片的位图信息
    char image[28 * 28];
    //用于存储产生的图片的名字
    char name[100];
    //10000张测试集
    FILE *fp1 = fopen("C:\\Users\\Administrator\\Desktop\\data\\t10k-images.idx3-ubyte", "rb+");
    if (fp1 == NULL)
    {
        //cout << "unable open file1" << endl;
        //exit(1);
        printf("unable open file2");
    }

    /*
    //测试集的label
    FILE *fp11 = fopen("C:\\Users\\Administrator\\Desktop\\data\\t10k-labels.idx1-ubyte", "rb+");
    if (fp1 == NULL)
    {
        printf("unable open t10k-labels.idx1-ubyte");
    }
    else
    {
        //读取t10k-labels.idx1-ubyte的幻数、数量
        fread(&a, sizeof(int), 1, fp11);
        fread(&num, sizeof(int), 1, fp11);
        //读取t10k-labels.idx1-ubyte的前28×28个label
        fread(image, sizeof(char), 28 * 28, fp11);
        printf("------------------label--------------------------------");
        printf("a=%0x\n", a);
        printf("num=%0x\n", num);
        for (int i = 0; i < 28 * 28; i++)
        {
            printf("%d\t", image[i]);
        }
        printf("------------------label--------------------------------");
    }
    */

    //读取字符集的幻数、数量、宽、高
    fread(&a, sizeof(int), 1, fp1);
    fread(&num, sizeof(int), 1, fp1);
    fread(&size1, sizeof(int), 1, fp1);
    fread(&size2, sizeof(int), 1, fp1);

    /*
    //打印字符集的幻数、数量、宽、高
    printf("a=%0x\n", a);
    printf("num=%0x\n", num);
    printf("size1=%0x\n", size1);
    printf("size2=%0x\n", size2);

    for (int i = 0; i < 28 * 28; i++)
    {
        printf("%d\t", image[i]);
    }
    */

    fileHeader fh;
    fileInfo fi;
    //编写信息头fi
    fi.biSize = 40;
    fi.biWidth = 28;
    fi.biHeight = 28;
    fi.biPlanes = 1;
    fi.biBitCount = 8;
    fi.biCompression = 0;
    fi.biSizeImage = 28 * 28;
    fi.biXPixPerMeter = 0;
    fi.biYPixPerMeter = 0;
    fi.biClrUsed = 0;
    fi.biClrImporant = 0;

    //编写文件头fh
    fh.bfType[0] = 0x42;
    fh.bfType[1] = 0x4D;
    fh.bfOffBits = sizeof(fileHeader) + sizeof(fileInfo) + 256 * sizeof(rgbq);
    fh.bfSize = fh.bfOffBits + fi.biSizeImage;
    fh.bfReserved1 = 0;
    fh.bfReserved2 = 0;

    //创建调色板
    rgbq *fq = (rgbq *)malloc(256 * sizeof(rgbq));
    for (int i = 0; i<256; i++)
    {
        fq[i].rgbBlue = fq[i].rgbGreen = fq[i].rgbRed = i;
    }

    //创建要生成的数字图片
    FILE *fp2 = fopen("C:\\Users\\Administrator\\Desktop\\data\\numbers.bmp", "wb");
    if (fp2 == NULL)
    {
        printf("unable open file2");
    }

    for (int i = 1; i <= SUM; i++)
    {
        //创建一个BMP图片
        sprintf(name, "C:\\Users\\Administrator\\Desktop\\data\\image\\%05d.bmp", i);
        FILE *ftmp = fopen(name, "wb");
        if (ftmp == NULL)
            printf("Create %05d.bmp failure\n", i);
        else
        {
            //读取一张图片的位图信息
            fread(image, sizeof(char), 28 * 28, fp1);
            //写文件头
            fwrite(&fh, sizeof(fileHeader), 1, ftmp);
            //写信息头
            fwrite(&fi, sizeof(fileInfo), 1, ftmp);
            //写调色板
            fwrite(fq, sizeof(rgbq), 256, ftmp);
            //写位图信息
            fwrite(image, fi.biSizeImage, 1, ftmp);
            fclose(ftmp);
        }
    }
    fclose(fp1);
    printf("------------------------------\n");

    return 0;
}

结果如下:
C语言解析MNIST数据集_第1张图片

你可能感兴趣的:(C++)