C语言二进制文件读写以及大小端转换

因为工作需要所以需要将文件用二进制方式读取。网上资料比较少,所以我再次把自己的实践过程mark一下。
并且由于代码可能需要在不同的机器上运行,所以还需要考虑一下大小端转换问题。
先看下面的代码,然后我在简短的解释一下。

#include 
#include 
#include 
#include 

#define nmemb 7

/****************************************************
  Date types(Compiler specific) 数据类型(和编译器相关)
*****************************************************/
typedef unsigned char uint8;      /* Unsigned 8 bit quantity  */
typedef signed char int8;         /* Signed 8 bit quantity    */
typedef unsigned short uint16;    /* Unsigned 16 bit quantity */
typedef signed short int16;       /* Signed 16 bit quantity   */
typedef unsigned int uint32;      /* Unsigned 32 bit quantity */
typedef signed int int32;         /* Signed 32 bit quantity   */
typedef float fp32;               /* Single precision         */
                                  /* floating point           */
typedef double fp64;              /* Double precision         */
                                  /* floating point           */

//int32
#define BigtoLittle32(A)   ((( (uint32)(A) & 0xff000000 ) >> 24) | \
                                               (( (uint32)(A) & 0x00ff0000 ) >> 8)   | \
                                               (( (uint32)(A) & 0x0000ff00 ) << 8)   | \
                                               (( (uint32)(A) & 0x000000ff ) << 24))

//int16
#define BigtoLittle16(A)   (( ((uint16)(A) & 0xff00) >> 8 )    | \
                            (( (uint16)(A) & 0x00ff ) << 8))


/************************************************************
*     Conversion little endian float data to big endian
*     *************************************************************/

float ReverseFloat(const float inFloat)
{
    float retVal;
    char *floatToConvert = (char*) & inFloat;
    char *returnFloat = (char*) & retVal;

    // swap the bytes into a temporary buffer
    returnFloat[0] = floatToConvert[3];
    returnFloat[1] = floatToConvert[2];
    returnFloat[2] = floatToConvert[1];
    returnFloat[3] = floatToConvert[0];

    return retVal;
}

struct matrix
{
    int row;
    int column;
}s[nmemb];

void set_s(int j, int x, int y)
{
    s[j].row = x;
    s[j].column = y;
}

bool is_bigendian()
{
    int a = 0x1234;
    char b =  *(char *)&a;  //b == the Low address part of a
    //printf("%c\n", b);
    if (b == 0x34) {
        return false;
    }
    return true;
}

int main()
{
    if (is_bigendian()) {
        printf("BigEndian\n");
    } else {
        printf("LittleEndian\n");
    }

    FILE *fp;
    set_s(0, 1, 50);
    set_s(1, 1, 80);
    set_s(2, 4, 20);
    set_s(3, 50, 1);
    set_s(4, 80, 2);
    set_s(5, 100, 3);
    set_s(6, 100, 4);
    int ans = sizeof(struct matrix);
    printf("size: %d\n", ans);
    printf("size: %d\n", sizeof(s));

    if ((fp = fopen("test", "wb")) == NULL) {
        printf("EROOR\n");
        return 1;
    }
    for (int j = 0; j < nmemb; ++j) {
        printf("row: %d   column: %d\n", s[j].row, s[j].column);
    }
    fwrite(s, sizeof(struct matrix), nmemb, fp);

    for (int i = 0; i < nmemb; ++i) {
        float *m = (float*) malloc(sizeof(float) * s[i].row * s[i].column);
        bzero(m, sizeof(float) * s[i].row * s[i].column);
        for (int j = 0; j < s[i].row; ++j) {
            for (int k = 0; k < s[i].column; ++k) {
                m[k + j*s[i].column] = k;
            }
        }
        fwrite(m, sizeof(float), s[i].row * s[i].column, fp);
        free(m);
    }

    fclose(fp);
    printf("11\n");

    /*
    printf("%d\n", sizeof(float));
    FILE *fp;
    if ((fp = fopen("test", "rb")) == NULL) {
        printf("EROOR\n");
        return 1;
    }
    fread(s, sizeof(struct matrix), nmemb, fp);
    for (int i = 0; i < nmemb; ++i) {
        printf("row: %d   column: %d\n", s[i].row, s[i].column);
    }

    for (int i = 0; i < nmemb; ++i) {
        float *m = (float*) malloc(sizeof(float) * s[i].row * s[i].column);
        bzero(m, sizeof(float) * s[i].row * s[i].column);
        fread(m, sizeof(float), s[i].row * s[i].column, fp);
        for (int j = 0; j < s[i].row; ++j) {
            for (int k = 0; k < s[i].column; ++k) {
                printf("%lf ", m[k + j*s[i].column]);
            }
            printf("\n");
        }
        printf("\n\n");
        free(m);
    }
    fclose(fp);
    */
    return 0;
}

fopen和fclose是很常见的,在这里就不做解释了。我们来看看fwrite和fread,本来以为这个很麻烦,但是用过之后发现这个二进制文件读写才是最简单的。

size_t fwrite(const void * ptr,size_t size,size_t nmemb,FILE * stream);
fwrite()用来将数据写入文件流中。
stream为已打开的文件指针
ptr 指向欲写入的数据地址
写入的字符数以参数size*nmemb来决定。
size表示写入一个nmemb的内存大小。
fwrite()会返回实际写入的nmemb数目。

size_t fread(void * ptr,size_t size,size_t nmemb,FILE * stream);
fread()用来从文件流中读取数据。
stream为已打开的文件指针
ptr 指向欲存放读取进来的数据空间
读取的字符数以参数size*nmemb来决定
size表示读取一个nmemb的内存大小。
fread()会返回实际读取到的nmemb数目,如果此值比参数nmemb 小,则代表可能读到了文件尾或有错误发生,这时必须用feof()或ferror()来决定发生什么情况。
返回实际读取到的nmemb数目。

详情参见上面的代码。

另外就是大小端的问题了。关于大小端的具体解释网上有很多,在此不作解释。参考上面写的代码,我判断了自己机器是大端还是小端,并且实现了int16,int32已经float数据类型的大小端转换,大端转小端,在使用相同的代码一次小端又变成了大端。

PS:float的大小端转化我之前一直以为写的是错的,因为好多数据转化之后输出都是0。后来发现可能是与float类型在内存中的存放有关,我们的程序是对的。

下面是写的比较详细的二进制读和写的函数。包括数值类型,struct,char*等的读写。

bool save_binary(char* file_path)
{
    FILE *fp;
    if ((fp = fopen(file_path, "wb")) == NULL) {
        printf("EROOR\n");
        return false;
    }

    fwrite(&vocab_total_size, sizeof(int), 1, fp);
    FILE *fp1;
    if ((fp1 = fopen("../out/vocab", "r")) == NULL) {
        printf("No vocab file!\n");
        return false;
    }
    char str[100];
    int coun = 0;
    while ((fscanf(fp1, "%s", str)) != EOF) {
        int len = strlen(str);
    coun++;
    printf("%d %d %s\n", coun, len, str);
    fwrite(&len, sizeof(int), 1, fp);
    fwrite(str, sizeof(char), len, fp);
    }
    fclose(fp1);

    //binary head, save the row and column of the matrixs
    struct matrix
    {
        int row;
        int column;
    }s[NMEMB];

    s[0].row = vocab_total_size; s[0].column = projection_size;
    s[1].row = projection_size; s[1].column = hidden_size*4;
    s[2].row = 1; s[2].column = hidden_size;
    s[3].row = 1; s[3].column = hidden_size;
    s[4].row = 1; s[4].column = hidden_size;
    s[5].row = hidden_size; s[5].column = hidden_size*4;
    s[6].row = hidden_size; s[6].column = punc_total_size;

    s[7].row = vocab_total_size; s[7].column = projection_size;
    s[8].row = projection_size; s[8].column = hidden_size*4;
    s[9].row = 1; s[9].column = hidden_size;
    s[10].row = 1; s[10].column = hidden_size;
    s[11].row = 1; s[11].column = hidden_size;
    s[12].row = hidden_size; s[12].column = hidden_size*4;
    s[13].row = hidden_size; s[13].column = punc_total_size;

    fwrite(s, sizeof(struct matrix), NMEMB, fp);

    fwrite(final_We, sizeof(float), s[0].row * s[0].column, fp);
    fwrite(final_W, sizeof(float), s[1].row * s[1].column, fp);
    fwrite(final_Wip, sizeof(float), s[2].row * s[2].column, fp);
    fwrite(final_Wfp, sizeof(float), s[3].row * s[3].column, fp);
    fwrite(final_Wop, sizeof(float), s[4].row * s[4].column, fp);
    fwrite(final_Wr, sizeof(float), s[5].row * s[5].column, fp);
    fwrite(final_Wy, sizeof(float), s[6].row * s[6].column, fp);

    fwrite(final_We_hg, sizeof(float), s[7].row * s[7].column, fp);
    fwrite(final_W_hg, sizeof(float), s[8].row * s[8].column, fp);
    fwrite(final_Wip_hg, sizeof(float), s[9].row * s[9].column, fp);
    fwrite(final_Wfp_hg, sizeof(float), s[10].row * s[10].column, fp);
    fwrite(final_Wop_hg, sizeof(float), s[11].row * s[11].column, fp);
    fwrite(final_Wr_hg, sizeof(float), s[12].row * s[12].column, fp);
    fwrite(final_Wy_hg, sizeof(float), s[13].row * s[13].column, fp);

    fclose(fp);

    return true;
}
bool load_binary(char *file_path)
{
    FILE *fp;
    if ((fp = fopen(file_path, "rb")) == NULL) {
        printf("EROOR\n");
        return false;
    }

    int vocab_size;
    fread(&vocab_size, sizeof(int), 1, fp);

    printf("%d\n", vocab_size);
    int coun = 0;
    for (int j = 0; j < vocab_size; ++j) {
        int len;
        coun++;
        fread(&len, sizeof(int), 1, fp);
        char str[100];
        fread(str, sizeof(char), len, fp);
        str[len] = '\0';    //一定要加,不加出错
    }
    printf("%d\n", coun);


    struct matrix
    {
        int row;
        int column;
    }s[NMEMB];

    fread(s, sizeof(struct matrix), NMEMB, fp);

    for (int i = 0; i < NMEMB; ++i) {
        printf("row: %d   column: %d\n", s[i].row, s[i].column);
    }

    fread(We, sizeof(float), s[0].row * s[0].column, fp);
    fread(W, sizeof(float), s[1].row * s[1].column, fp);
    fread(Wip, sizeof(float), s[2].row * s[2].column, fp);
    fread(Wfp, sizeof(float), s[3].row * s[3].column, fp);
    fread(Wop, sizeof(float), s[4].row * s[4].column, fp);
    fread(Wr, sizeof(float), s[5].row * s[5].column, fp);
    fread(Wy, sizeof(float), s[6].row * s[6].column, fp);

    fread(We_hg, sizeof(float), s[7].row * s[7].column, fp);
    fread(W_hg, sizeof(float), s[8].row * s[8].column, fp);
    fread(Wip_hg, sizeof(float), s[9].row * s[9].column, fp);
    fread(Wfp_hg, sizeof(float), s[10].row * s[10].column, fp);
    fread(Wop_hg, sizeof(float), s[11].row * s[11].column, fp);
    fread(Wr_hg, sizeof(float), s[12].row * s[12].column, fp);
    fread(Wy_hg, sizeof(float), s[13].row * s[13].column, fp);

    fclose(fp);
    for (int j = 0; j < s[3].row; ++j) {
        for (int k = 0; k < s[3].column; ++k) {
            printf("%f ",Wfp[k + j*s[3].column]);
        }
    printf("\n");
    }

    return true;
}

你可能感兴趣的:(★C/C++基础)