linux下C转码函数:iconv使用

http://anfernee.blog.51cto.com/4828774/1240270

在linux下,如果需要将编码格式转换,可以使用iconv系列函数。


头文件:

iconv.h


常用函数:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
/* Identifier for conversion method from one codeset to another.  */
typedefvoid *iconv_t;
/* Allocate descriptor for code conversion from codeset FROMCODE to
    codeset TOCODE.
    This function is a possible cancellation points and therefore not
    marked with __THROW.  */
externiconv_ticonv_open (__constchar *__tocode, __constchar *__fromcode);
/* Convert at most *INBYTESLEFT bytes from *INBUF according to the
    code conversion algorithm specified by CD and place up to
    *OUTBYTESLEFT bytes in buffer at *OUTBUF.  */
externsize_ticonv (iconv_t __cd,  char  **__restrict __inbuf,
size_t  *__restrict __inbytesleft,
char  **__restrict __outbuf,
size_t  *__restrict __outbytesleft);
/* Free resources allocated for descriptor CD for code conversion.
    This function is a possible cancellation points and therefore not
    marked with __THROW.  */
externinticonv_close (iconv_t __cd);


注意事项
1)outlen和inlen的长度最好相等,iconv会转换所有outlen长度的内容,如果inlen长度不够,可能会造成访问越界的问题。
2)当ASCII转到GBK时,ASCII字符占一位,中文等的书字符占两位;
3)当ASCII转到UTF16BE时,ASCII字符占两位,前补零,所以UTF16BE格式不能用strlen取长度或用%s的printf。

4)当GBK或UTF16BE转到ASCII时,非ASCII字符会被丢弃。


遗留问题:

1)iconv的结果常常是失败,errno:84,表示某些字符无法转换的error。可取出来看outbuf,其实内容都已经转换了,此时inlen和outlen的位置都为负数,无法表示转换到了哪个字符。

【解决】inleft和outleft一定要用类型size_t, 不能用int,否则会报错84,并且会将buf后面的内存段也写乱了。切忌。

【原因】size_t在64位系统上是8字节,而在32位系统上是4字节,int为4字节。这样导致取地址的时候越界。
size_t定义如下:

1
2
3
4
5
6
7
#ifndef __SIZE_TYPE__
#define __SIZE_TYPE__ long unsigned int
#endif
#if !(defined (__GNUG__) && defined (size_t))
typedef  __SIZE_TYPE__  size_t ;
#ifdef __BEOS__
typedef  long  ssize_t;




示例代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#include
#include
#include
#include
#include
#include
#define MIN(a,b) ((a)>(b)?(b):(a))
void  dumprawmsg( char  *p,  int  len)
{
     int  i = 0;
     for (i = 0; i < len; i++)
     {
         unsigned  char  c = p[i];
         printf ( "%.2X " , c);
     }
     printf  ( "\n" );
}
int  convmsg( char  * src,  char  * des,  int  srclen,  int  deslen,  const  char  *srctype,  const  char  *destype)
{
     if  ( strcmp (srctype, destype) == 0)
     {
         memcpy (des, src, MIN(srclen, deslen));
         return  0;
     }
     iconv_t conv = iconv_open (destype, srctype);
     if (conv == (iconv_t)-1)
     {
         printf ( "iconvopen err\n" );
         return  -1;
     }
     char  *in = src;
     char  *out = des;
//    int ret =  iconv (conv, &in, (size_t *) & srclen,
//                                &out,
//                                (size_t *)& deslen);
//
//    if(ret == 0)
//    {
//        printf ("iconv succ\n");
//    }
//    else
//    {
//        if(errno == 84)
//        {
//            printf("iconv  84:%d,%d\n", srclen, deslen);
//        }
//        else
//        {
//            printf("iconv  err %d:%d,%d\n", errno, srclen, deslen);
//        }
//    }
     size_t   avail = deslen;
     size_t  insize = srclen;
     char  *wrptr = des;
     char  *inptr = src;
     while  (avail > 0)
       {
         size_t  nread;
         size_t  nconv;
         printf ( "avail:%d\n" , avail);
         /* Do the conversion.  */
         nconv = iconv (conv, &inptr, &insize, &wrptr, &avail);
         if  (nconv == ( size_t ) -1)
           {
             /* Not everything went right.  It might only be
                an unfinished byte sequence at the end of the
                buffer.  Or it is a real problem.  */
             if  ( errno  == EINVAL)
             {
               /* This is harmless.  Simply move the unused
                  bytes to the beginning of the buffer so that
                  they can be used in the next round.  */
               //memmove (inbuf, inptr, insize);
               printf ( "EINVAL\n" );
             }
             else
               {
                 /* It is a real problem.  Maybe we ran out of
                    space in the output buffer or we have invalid
                    input.  In any case back the file pointer to
                    the position of the last processed byte.  */
                 printf ( "error\n" );
                 break ;
               }
           }
       }
     iconv_close (conv);
     return  0;
}
int  main( int  argc,  char  * argv[])
{
     if  (argc < 3)
     {
         printf ( "need two type para\n" );
         return  -1;
     }
     printf ( "type in %s\n, type out %s\n" , argv[1], argv[2]);
     char  src[100] =  "abcd 1234 其他" ;
     char  des[100] = {0};
     int  srclen = 50;
     int  deslen = 50;
     const  char  * srctype = argv[1];
     const  char  * destype = argv[2];
     dumprawmsg(des, 400);
     int  ret = convmsg(src, des, srclen, deslen, srctype, destype);
     dumprawmsg(des, 400);
     printf ( "des is : %s\n" , des);
     return  0;
}

你可能感兴趣的:(c,linux)