linux下C转码函数:iconv使用

在linux下,如果需要将编码格式转换,可以使用iconv系列函数。


头文件:

iconv.h


常用函数:
/* Identifier for conversion method from one codeset to another.  */
typedefvoid *iconv_t;
/* Allocate descriptor for code conversion from codeset FROMCODE to
   codeset TOCODE.
   This function is a possible cancellation points and therefore not
   marked with __THROW.  */
externiconv_ticonv_open (__constchar *__tocode, __constchar *__fromcode);
/* Convert at most *INBYTESLEFT bytes from *INBUF according to the
   code conversion algorithm specified by CD and place up to
   *OUTBYTESLEFT bytes in buffer at *OUTBUF.  */
externsize_ticonv (iconv_t __cd, char **__restrict __inbuf,
size_t *__restrict __inbytesleft,
char **__restrict __outbuf,
size_t *__restrict __outbytesleft);
/* Free resources allocated for descriptor CD for code conversion.
   This function is a possible cancellation points and therefore not
   marked with __THROW.  */
externinticonv_close (iconv_t __cd);


注意事项
1)outlen和inlen的长度最好相等,iconv会转换所有outlen长度的内容,如果inlen长度不够,可能会造成访问越界的问题。
2)当ASCII转到GBK时,ASCII字符占一位,中文等的书字符占两位;
3)当ASCII转到UTF16BE时,ASCII字符占两位,前补零,所以UTF16BE格式不能用strlen取长度或用%s的printf。

4)当GBK或UTF16BE转到ASCII时,非ASCII字符会被丢弃。


遗留问题:

1)iconv的结果常常是失败,errno:84,表示某些字符无法转换的error。可取出来看outbuf,其实内容都已经转换了,此时inlen和outlen的位置都为负数,无法表示转换到了哪个字符。

【解决】inleft和outleft一定要用类型size_t, 不能用int,否则会报错84,并且会将buf后面的内存段也写乱了。切忌。

【原因】size_t在64位系统上是8字节,而在32位系统上是4字节,int为4字节。这样导致取地址的时候越界。
size_t定义如下:

#ifndef __SIZE_TYPE__
#define __SIZE_TYPE__ long unsigned int
#endif
#if !(defined (__GNUG__) && defined (size_t))
typedef __SIZE_TYPE__ size_t;
#ifdef __BEOS__
typedef long ssize_t;




示例代码:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <iconv.h>
#include <errno.h>
#define MIN(a,b) ((a)>(b)?(b):(a))
void dumprawmsg(char *p, int len)
{
    int i = 0;
    for(i = 0; i < len; i++)
    {
        unsigned char c = p[i];
        printf("%.2X ", c);
    }
    printf ("\n");
}
int convmsg(char * src, char * des, int srclen, int deslen, const char *srctype, const char *destype)
{
    if (strcmp(srctype, destype) == 0)
    {
        memcpy(des, src, MIN(srclen, deslen));
        return 0;
    }
    iconv_t conv = iconv_open (destype, srctype);
    if(conv == (iconv_t)-1)
    {
        printf("iconvopen err\n");
        return -1;
    }
    char *in = src;
    char *out = des;
//    int ret =  iconv (conv, &in, (size_t *) & srclen,
//                                &out,
//                                (size_t *)& deslen);
//
//    if(ret == 0)
//    {
//        printf ("iconv succ\n");
//    }
//    else
//    {
//        if(errno == 84)
//        {
//            printf("iconv  84:%d,%d\n", srclen, deslen);
//        }
//        else
//        {
//            printf("iconv  err %d:%d,%d\n", errno, srclen, deslen);
//        }
//    }
    size_t  avail = deslen;
    size_t insize = srclen;
    char *wrptr = des;
    char *inptr = src;
    while (avail > 0)
      {
        size_t nread;
        size_t nconv;
        printf("avail:%d\n", avail);
        /* Do the conversion.  */
        nconv = iconv (conv, &inptr, &insize, &wrptr, &avail);
        if (nconv == (size_t) -1)
          {
            /* Not everything went right.  It might only be
               an unfinished byte sequence at the end of the
               buffer.  Or it is a real problem.  */
            if (errno == EINVAL)
            {
              /* This is harmless.  Simply move the unused
                 bytes to the beginning of the buffer so that
                 they can be used in the next round.  */
              //memmove (inbuf, inptr, insize);
              printf("EINVAL\n");
            }
            else
              {
                /* It is a real problem.  Maybe we ran out of
                   space in the output buffer or we have invalid
                   input.  In any case back the file pointer to
                   the position of the last processed byte.  */
                printf("error\n");
                break;
              }
          }
      }
    iconv_close (conv);
    return 0;
}
int main(int argc, char * argv[])
{
    if (argc < 3)
    {
        printf("need two type para\n");
        return -1;
    }
    printf("type in %s\n, type out %s\n", argv[1], argv[2]);
    char src[100] = "abcd 1234 其他";
    char des[100] = {0};
    int srclen = 50;
    int deslen = 50;
    const char * srctype = argv[1];
    const char * destype = argv[2];
    dumprawmsg(des, 400);
    int ret = convmsg(src, des, srclen, deslen, srctype, destype);
    dumprawmsg(des, 400);
    printf("des is : %s\n", des);
    return 0;
}


你可能感兴趣的:(c,linux,编码格式)