在linux下,如果需要将编码格式转换,可以使用iconv系列函数。
iconv.h
/* Identifier for conversion method from one codeset to another. */ typedefvoid *iconv_t; /* Allocate descriptor for code conversion from codeset FROMCODE to codeset TOCODE. This function is a possible cancellation points and therefore not marked with __THROW. */ externiconv_ticonv_open (__constchar *__tocode, __constchar *__fromcode); /* Convert at most *INBYTESLEFT bytes from *INBUF according to the code conversion algorithm specified by CD and place up to *OUTBYTESLEFT bytes in buffer at *OUTBUF. */ externsize_ticonv (iconv_t __cd, char **__restrict __inbuf, size_t *__restrict __inbytesleft, char **__restrict __outbuf, size_t *__restrict __outbytesleft); /* Free resources allocated for descriptor CD for code conversion. This function is a possible cancellation points and therefore not marked with __THROW. */ externinticonv_close (iconv_t __cd);
4)当GBK或UTF16BE转到ASCII时,非ASCII字符会被丢弃。
1)iconv的结果常常是失败,errno:84,表示某些字符无法转换的error。可取出来看outbuf,其实内容都已经转换了,此时inlen和outlen的位置都为负数,无法表示转换到了哪个字符。
【解决】inleft和outleft一定要用类型size_t, 不能用int,否则会报错84,并且会将buf后面的内存段也写乱了。切忌。
#ifndef __SIZE_TYPE__ #define __SIZE_TYPE__ long unsigned int #endif #if !(defined (__GNUG__) && defined (size_t)) typedef __SIZE_TYPE__ size_t; #ifdef __BEOS__ typedef long ssize_t;
示例代码:
#include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include <iconv.h> #include <errno.h> #define MIN(a,b) ((a)>(b)?(b):(a)) void dumprawmsg(char *p, int len) { int i = 0; for(i = 0; i < len; i++) { unsigned char c = p[i]; printf("%.2X ", c); } printf ("\n"); } int convmsg(char * src, char * des, int srclen, int deslen, const char *srctype, const char *destype) { if (strcmp(srctype, destype) == 0) { memcpy(des, src, MIN(srclen, deslen)); return 0; } iconv_t conv = iconv_open (destype, srctype); if(conv == (iconv_t)-1) { printf("iconvopen err\n"); return -1; } char *in = src; char *out = des; // int ret = iconv (conv, &in, (size_t *) & srclen, // &out, // (size_t *)& deslen); // // if(ret == 0) // { // printf ("iconv succ\n"); // } // else // { // if(errno == 84) // { // printf("iconv 84:%d,%d\n", srclen, deslen); // } // else // { // printf("iconv err %d:%d,%d\n", errno, srclen, deslen); // } // } size_t avail = deslen; size_t insize = srclen; char *wrptr = des; char *inptr = src; while (avail > 0) { size_t nread; size_t nconv; printf("avail:%d\n", avail); /* Do the conversion. */ nconv = iconv (conv, &inptr, &insize, &wrptr, &avail); if (nconv == (size_t) -1) { /* Not everything went right. It might only be an unfinished byte sequence at the end of the buffer. Or it is a real problem. */ if (errno == EINVAL) { /* This is harmless. Simply move the unused bytes to the beginning of the buffer so that they can be used in the next round. */ //memmove (inbuf, inptr, insize); printf("EINVAL\n"); } else { /* It is a real problem. Maybe we ran out of space in the output buffer or we have invalid input. In any case back the file pointer to the position of the last processed byte. */ printf("error\n"); break; } } } iconv_close (conv); return 0; } int main(int argc, char * argv[]) { if (argc < 3) { printf("need two type para\n"); return -1; } printf("type in %s\n, type out %s\n", argv[1], argv[2]); char src[100] = "abcd 1234 其他"; char des[100] = {0}; int srclen = 50; int deslen = 50; const char * srctype = argv[1]; const char * destype = argv[2]; dumprawmsg(des, 400); int ret = convmsg(src, des, srclen, deslen, srctype, destype); dumprawmsg(des, 400); printf("des is : %s\n", des); return 0; }