使用iconv进行内码转换(Big5->GB2312)

阅读更多

i conv 是一个通过unicode 作为中间码实现各种内码间相互转换的库,它基本上囊括了世界上所有编码方式,例如,ASCIIGB2312 、 GBK 、 GB18030BIG5UTF-8UCS-2UCS-2BEUCS-2LEUCS-4UCS-4BEUCS- 4LEUTF-16 、 UTF-16BEUTF-16LEUTF-32UTF-32BEUTF-32LEUTF-7 等等等,除此之外,还包括 泰语、日语、韩语、西欧等国家语言的编码。下面我们演示如何使用iconv 实现Big5GB2312 的转换,当然只要简单修改一下便可实现iconv 支 持任何编码间的转换。 


下载 
libiconv
linux 版本的iconv ,可在 http://www.gnu.org/software/libiconv/  下载 
iconv
win32 版本可以在 http://gnuwin32.sourceforge.net/packages/libiconv.htm  下载 

SVN
源码 
另外,还有一些演示代码,需要的可以到我的SVN 下载 
http://xcyber.googlecode.com/svn/trunk/Convert/

标签: libiconv , iconv , Big5 , GB2312 ,  大五码  内码

代码片段(1)

[ 代码] [C/C++/Objective-C] 代码

001

/****************************************************************************

002

 *  Big5ToGB2312 - Convert Big5 encoding file to GB2312 encoding file

003

 *  File:

004

 *    Big5ToGb2312.c

005

 *  Description:

006

 *    Convert Big5 encoding file to GB2312 encoding file using iconv library

007

 *  Author:

008

 *    XCyber   email:[email protected]

009

 *  Date:

010

 *    August 7, 2008

011

 *  Other:

012

 *    visit http://www.gnu.org/software/libiconv/ for more help of iconv

013

 ***************************************************************************/

014

 

015

 

016

#include

017

#include

018

#include

019

#include

020

#include "../iconv-1.9.2.win32/include/iconv.h"

021

 

022

//#pragma comment(lib, "../iconv-1.9.2.win32/lib/iconv.lib")  // using iconv dynamic-link lib, iconv.dll

023

#pragma comment(lib, "../iconv-1.9.2.win32/lib/iconv_a.lib")  // using iconv static lib 

024

 

025

#define BUFFER_SIZE 1024   //BUFFER_SIZE must >= 2

026

 

027

 

028

void usage()

029

{

030

    printf("\nBig5ToGB2312 - Convert Big5 encoding file to GB2312 encoding file\n");

031

    printf("[email protected] on August 7, 2008\n");

032

    printf("  Usage:\n");

033

    printf("      Big5ToGB2312 [Big5 file(in)]  [GB2312 file(out)]\n\n");

034

}

035

 

036

 

037

int main(int argc, char* argv[])

038

{

039

    FILE * pSrcFile = NULL;

040

    FILE * pDstFile = NULL;

041

 

042

    char szSrcBuf[BUFFER_SIZE];

043

    char szDstBuf[BUFFER_SIZE];

044

 

045

    size_t nSrc  = 0;

046

    size_t nDst  = 0;

047

    size_t nRead = 0;

048

    size_t nRet  = 0;

049

 

050

    char *pSrcBuf = szSrcBuf;

051

    char *pDstBuf = szDstBuf;

052

 

053

    iconv_t icv;

054

    int argument = 1;

055

 

056

    //check input arguments

057

    if(argc != 3)

058

    {

059

        usage();

060

        return -1;

061

    }

062

 

063

 

064

    pSrcFile = fopen(argv[1],"r");

065

    if(pSrcFile == NULL)

066

    {

067

        printf("can't open source file!\n");

068

        return -1;

069

    }

070

 

071

    pDstFile = fopen(argv[2],"w");

072

   &nbs

073

p;if(pSrcFile == NULL)

074

    {

075

        printf("can't open destination file!\n");

076

        return -1;

077

    }

078

 

079

    //initialize iconv routine, perform conversion from BIG5 to GB2312

080

    //TODO: if you want to perfom other type of coversion, e.g. GB2312->BIG5, GB2312->UTF-8 ...

081

    //just change following two paremeters of iconv_open()

082

    icv = iconv_open("GB2312","BIG5");

083

    if(icv == 0)

084

    {

085

        printf("can't initalize iconv routine!\n");

086

        return -1;

087

    }

088

 

089

    //enable "illegal sequence discard and continue" feature, so that if met illeagal sequence, 

090

    //conversion will continue instead of being terminated

091

    if(iconvctl (icv ,ICONV_SET_DISCARD_ILSEQ,&argument) != 0)

092

    {

093

        printf("can't enable \"illegal sequence discard and continue\" feature!\n");

094

        return -1;

095

    }

096

 

097

    while(!feof(pSrcFile))

098

    {

099

        pSrcBuf = szSrcBuf;

100

        pDstBuf = szDstBuf;

101

        nDst = BUFFER_SIZE;

102

 

103

        // read data from source file

104

        nRead = fread(szSrcBuf + nSrc,sizeof(char),BUFFER_SIZE - nSrc,pSrcFile);

105

        if(nRead == 0)

106

            break;

107

 

108

        // the amount of data to be converted should include previous left data and current read data

109

        nSrc = nSrc + nRead; 

110

 

111

        //perform conversion

112

        nRet = iconv(icv,(const char**)&pSrcBuf,&nSrc,&pDstBuf,&nDst);

113

 

114

        if(nRet == -1)

115

        {

116

            // include all case of errno: E2BIG, EILSEQ, EINVAL

117

            //     E2BIG: There is not sufficient room at *outbuf.

118

            //     EILSEQ: An invalid multibyte sequence has been encountered in the input.

119

            //     EINVAL: An incomplete multibyte sequence has been encountered in the input

120

            // move the left data to the head of szSrcBuf in other to link it with the next data block

121

            memmove(szSrcBuf,pSrcBuf,nSrc);

122

        }

123

 

124

        //wirte data to destination file

125

        fwrite(szDstBuf,sizeof(char),BUFFER_SIZE - nDst,pDstFile);

126

 

127

    }

128

    iconv_close(icv);

129

    fclose(pSrcFile);

130

    fclose(pDstFile);

131

 

132

    printf("conversion complete.\n");

133

 

134

    return;

135

}

你可能感兴趣的:(使用iconv进行内码转换(Big5->GB2312))