///////////////////////////////////////////////////////////////////////////////////// /// 此内容摘自 linux 上 iconv 命令程序代码,目的在于处理转码出现无效字符的情况 //////////////////////////////////////////////////////////////////////////////////// struct iconv_hooks {}; struct iconv_fallbacks {}; typedef unsigned int ucs4_t; typedef struct conv_struct * conv_t; struct loop_funcs { size_t (*loop_convert) (iconv_t icd, const char* * inbuf, size_t *inbytesleft, char* * outbuf, size_t *outbytesleft); size_t (*loop_reset) (iconv_t icd, char* * outbuf, size_t *outbytesleft); }; struct mbtowc_funcs { int (*xxx_mbtowc) (conv_t conv, ucs4_t *pwc, unsigned char const *s, int n); /* * int xxx_mbtowc (conv_t conv, ucs4_t *pwc, unsigned char const *s, int n) * converts the byte sequence starting at s to a wide character. Up to n bytes * are available at s. n is >= 1. * Result is number of bytes consumed (if a wide character was read), * or -1 if invalid, or -2 if n too small, or -2-(number of bytes consumed) * if only a shift sequence was read. */ int (*xxx_flushwc) (conv_t conv, ucs4_t *pwc); /* * int xxx_flushwc (conv_t conv, ucs4_t *pwc) * returns to the initial state and stores the pending wide character, if any. * Result is 1 (if a wide character was read) or 0 if none was pending. */ }; struct wctomb_funcs { int (*xxx_wctomb) (conv_t conv, unsigned char *r, ucs4_t wc, int n); /* * int xxx_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) * converts the wide character wc to the character set xxx, and stores the * result beginning at r. Up to n bytes may be written at r. n is >= 1. * Result is number of bytes written, or -1 if invalid, or -2 if n too small. */ int (*xxx_reset) (conv_t conv, unsigned char *r, int n); /* * int xxx_reset (conv_t conv, unsigned char *r, int n) * stores a shift sequences returning to the initial state beginning at r. * Up to n bytes may be written at r. n is >= 0. * Result is number of bytes written, or -2 if n too small. */ }; typedef unsigned int state_t; struct conv_struct { struct loop_funcs lfuncs; /* Input (conversion multibyte -> unicode) */ int iindex; struct mbtowc_funcs ifuncs; state_t istate; /* Output (conversion unicode -> multibyte) */ int oindex; struct wctomb_funcs ofuncs; int oflags; state_t ostate; /* Operation flags */ int transliterate; int discard_ilseq; #ifndef LIBICONV_PLUG struct iconv_fallbacks fallbacks; struct iconv_hooks hooks; #endif }; //////////////////////////////////////////////////////////// /// 转载结束 //////////////////////////////////////////////////////////// int __charcode_convert__(LPCSTR from, LPCSTR to, LPSTR save, int savelen, LPSTR src, int srclen, bool ignore_invalid_sequence) { iconv_t cd; char *inbuf = src; char *outbuf = save; size_t outbufsize = savelen; int status = 0; size_t savesize = 0; size_t inbufsize = srclen; const char* inptr = inbuf; size_t insize = inbufsize; char* outptr = outbuf; size_t outsize = outbufsize; if (!ignore_invalid_sequence) cd = iconv_open(to, from); else { char tochartset[64]={0}; sprintf(tochartset, "%s//IGNORE", to); cd = iconv_open(tochartset, from); } if (cd == (iconv_t)(-1)) { printf("iconv_open oper error!\n"); status = -1; goto done; } iconv(cd, NULL, NULL, NULL, NULL); if (inbufsize == 0) { status = -1; goto done; } int invaild_do; invaild_do = 0; while (insize > 0) { size_t res = iconv(cd, (char**)&inptr, &insize, &outptr, &outsize); if (res == (size_t)(-1)) { if (errno == EILSEQ) { if (invaild_do == 0) { ((conv_t)cd)->discard_ilseq = 1; invaild_do = 1; continue; } status = -3; goto done; } else if (errno == EINVAL) { if (inbufsize == 0) { status = -4; goto done; } else { break; } } else if (errno == E2BIG) { status = -5; goto done; } else { status = -6; goto done; } } invaild_do = 0; if (outptr != outbuf) { int saved_errno = errno; int outsize = outptr - outbuf; strncpy(save+savesize, outbuf, outsize); errno = saved_errno; } lj_sleep(0, 1); } status = strlen(save); status = status > 0 ? 0 : -1; done: iconv_close(cd); return status; } char *charcode_convert(LPCSTR from, LPCSTR to, LPSTR src, int srclen, bool ignore_invalid_sequence) { char *outbuf = (char*)malloc(4*srclen+sizeof(char)); memset(outbuf, 0, 4*srclen+sizeof(char)); if (__charcode_convert__(from, to, outbuf, 4*srclen, src, srclen, ignore_invalid_sequence) != 0) { LJFREE(outbuf); outbuf = (char*)malloc(sizeof(char)); memset(outbuf, 0, sizeof(char)); } return outbuf; } char* utf8_to_chna(char *utf8buf, bool ignore_invalid_sequence, LPCSTR to_chna_charset) { return charcode_convert("UTF-8", to_chna_charset, utf8buf, strlen(utf8buf), ignore_invalid_sequence); } char* chna_to_utf8(char *chnabuf, bool ignore_invalid_sequence, LPCSTR frm_chna_charset) { return charcode_convert(frm_chna_charset, "UTF-8", chnabuf, strlen(chnabuf), ignore_invalid_sequence); }