枚举GB2312中的汉字

//Build gcc -g gb.c -o gb.exe

//

//Characters in GB2312 are divided into codepoints, with each codepoint comprising 94 glyphs/characters.

//

//The codepoints can be divided and categorised as:

//    * 01-09, comprising punctuation and other special characters.

//    * 16-55, the first plane for chinese characters, arranged according to Pinyin.

//    * 56-87, the second plane for chinese characters, arranged according to radical and strokes.

//

//The codepoints 10-15 and 88-94 are unassigned.

//

//

#include <stdio.h>

static void DumpRange(FILE* fp, unsigned short n)

{

         unsigned char c = 0;

         unsigned short i = 0;

        

         for(i = 0; i < (0xFF - 0xA0); i++)

         {

                   c = 0xA0 + n;

                  fwrite(&c, sizeof(c), 1, fp);

                   c = 0xA0 + i;

                  fwrite(&c, sizeof(c), 1, fp);

         }

         fflush(fp);

 

         return;

}

 

static void DumpGB2312(void)

{

         unsigned short i = 0;

         FILE* fp = fopen("gb.txt", "wb+");

        

         if(fp != NULL)

         {

                   for(i = 1; i <= 9; i++) DumpRange(fp, i);

                   for(i = 16; i <= 55; i++) DumpRange(fp, i);

                   for(i = 56; i <= 87; i++) DumpRange(fp, i);

 

                  fclose(fp);

         }

         else

         {

                  perror("fopen");

         }

 

         return;      

}

 

int main(int argc, char* argv[])

{

         DumpGB2312();

 

         return 0;

}

 

你可能感兴趣的:(枚举GB2312中的汉字)