跨平台的UTF8GBK转换以及GBKw_chart转换代码,支持std::string

关于unicode,各种编码等国际化的技术原理可以参看我blog上的文章。最近的项目里要用到GBK->wchar_t,wchar_t->UTF8.所以对这部分功能做了些简单封装。其实对于国际化技术的封装,无非就是

DBCS <=> wchar_t .

wchar_t <=> 各种unicode编码 比如说UTF8,UTF16等。

这样的转换都是绝对可以成功的。像GBK<->BIG5这种dbcs<=>dbcs的转换就不一定能成功了。

wchar_t作为C++的字符串内部处理用类型,主要原因是各种字符串函数都都有wchar_t作为接口的版本。方便使用。wchar_t这个东西的长度是由编译器,平台实现决定的,所以请记住,处理wchar_t的时候,千万不要关心他的长度。如果你的代码对wchar_t的长度特别关心,这个时候你需要的应该是一种标准的unicode编码

(Linux下的wchar_t长度为4byte,好心痛)

罗嗦这么多,各位看官久等了。上代码。

  class string_util
 {
 public:

#ifndef _UNIX
    //我的程序只需要支持GBK,各位可以在这里加上Linux下的编码名字与windows下的codepage的对应关系进行扩展。
    static inline int codepage(const char* code_page)
    {
        return 936;//"GBK"
    }
 #endif

     static inline int dbcs2wchar(const char* code_page,/*in*/const char* in,int in_len,
                                                /*out*/wchar_t* out,int out_max)
     {
#ifdef _UNIX
         size_t result;
         iconv_t env;
         env = iconv_open("WCHAR_T",code_page);
         result = iconv(env,(char**)&in,(size_t*)&in_len,(char**)&out,(size_t*)&out_max);
         iconv_close(env);
         return (int) result;
#else
         return ::MultiByteToWideChar(codepage(code_page),0,in,in_len,out,out_max);
#endif
     }

     static inline int dbcs2wchar(const char* code_page,/*in*/const string& in,/*out*/wstring& out)
     {
         int len = in.length() + 1;
         int result;
         wchar_t* pBuffer = new wchar_t[len];
         memset(pBuffer,0,len*sizeof(wchar_t));
         result = dbcs2wchar(code_page,in.c_str(),in.length(),pBuffer,len*sizeof(wchar_t));
         if(pBuffer >= 0)
         {
            out = pBuffer;
         }
         else
         {
             out.clear();
         }
         delete[] pBuffer;
         return result;
     }

     static inline int wchar2dbcs(const char* code_page,/*in*/const wchar_t* in,int in_len,
                                                 /*out*/char* out,int out_max)
     {
#ifdef _UNIX
         size_t result;
         iconv_t env;
         env = iconv_open(code_page,"WCHAR_T");
         result = iconv(env,(char**)&in,(size_t*)&in_len,(char**)&out,(size_t*)&out_max);
         iconv_close(env);
         return (int) result;
#else
         BOOL use_def_char;
         use_def_char = FALSE;
         return ::WideCharToMultiByte(codepage(code_page),0,in,in_len/sizeof(wchar_t),out,out_max,"?",&use_def_char);
#endif  
     }

     static inline int wchar2dbcs(const char* code_page,/*in*/const wstring& in,/*out*/string& out)
     {
         int len = in.length() + 1;
         int result;
         char* pBuffer = new char[len*3];
         memset(pBuffer,0,len*3);
         result = wchar2dbcs(code_page,in.c_str(),in.length() * sizeof(wchar_t),pBuffer,len*3);
         if(result >= 0)
         { 
             out = pBuffer;
         }
         else
         {
             out = "";
         }
         delete[] pBuffer;
         return result;
     }

     static inline int wchar2utf8(/*in*/const wchar_t* in,int in_len,
                                               /*out*/char* out,int out_max)
     {
#ifdef _UNIX
         size_t result;
         iconv_t env;
    
         env = iconv_open("UTF8","WCHAR_T");
         result = iconv(env,(char**)&in,(size_t*)&in_len,(char**)&out,(size_t*)&out_max);    
         iconv_close(env);
         return (int) result;
#else
         BOOL use_def_char;
         use_def_char = FALSE;
         return ::WideCharToMultiByte(CP_UTF8,0,in,in_len/sizeof(wchar_t),out,out_max,NULL,NULL);
#endif
     }
   
     static inline int wchar2utf8(/*in*/const wstring& in,/*out*/string& out)
     {
         int len = in.length() + 1;
         int result;
         char* pBuffer = new char[len*3];
         memset(pBuffer,0,len*3);  
    
     
         result = wchar2utf8(in.c_str(),in.length() * sizeof(wchar_t),pBuffer,len*3); 
    
   
         if(result >= 0)
         {
             out = pBuffer;
         }
         else
         {
             out = "";
         }
         delete[] pBuffer;
         return result;
     }
   
     static inline int utf82wchar(/*in*/const char* in,int in_len,
                                                /*out*/wchar_t* out,int out_max)
     {
#ifdef _UNIX
         size_t result;
         iconv_t env;
         env = iconv_open("WCHAR_T","UTF8");
         result = iconv(env,(char**)&in,(size_t*)&in_len,(char**)&out,(size_t*)&out_max);
         iconv_close(env);
         return (int) result;
#else
         return ::MultiByteToWideChar(CP_UTF8,0,in,in_len,out,out_max);
#endif
     }

     static inline int utf82wchar(/*in*/const string& in,/*out*/wstring& out)
     {
         int len = in.length() + 1;
         int result;
         //wstring temp;
         wchar_t* pBuffer = new wchar_t[len];
         memset(pBuffer,0,len*sizeof(wchar_t));
         result = utf82wchar(in.c_str(),in.length(),pBuffer,len*sizeof(wchar_t));
         //printf("utf82wchar result is %d,errno is %s/n",result,strerror(errno));
         if(result >= 0)
         {
            out = pBuffer;
         }
         else
         {
            out.clear();     
         }
         delete[] pBuffer;
         return result;
     }
};

你可能感兴趣的:(敝帚自珍,string,跨平台,unix,delete,linux,编译器)