关于C++中字符的转换

VS的工程属性中我们常会设置Character Set:

为了支持Unicode编码,需要多字节与宽字节之间的相互转换。这里涉及两个函数:

int WideCharToMultiByte(
  _In_      UINT    CodePage,
  _In_      DWORD   dwFlags,
  _In_      LPCWSTR lpWideCharStr,
  _In_      int     cchWideChar,
  _Out_opt_ LPSTR   lpMultiByteStr,
  _In_      int     cbMultiByte,
  _In_opt_  LPCSTR  lpDefaultChar,
  _Out_opt_ LPBOOL  lpUsedDefaultChar
);

int MultiByteToWideChar(
  _In_      UINT   CodePage,
  _In_      DWORD  dwFlags,
  _In_      LPCSTR lpMultiByteStr,
  _In_      int    cbMultiByte,
  _Out_opt_ LPWSTR lpWideCharStr,
  _In_      int    cchWideChar
);

详细介绍参看:https://msdn.microsoft.com/en-us/library/dd374130%28VS.85%29.aspx和https://msdn.microsoft.com/en-us/library/windows/desktop/dd319072%28v=vs.85%29.aspx

这两个函数在使用时需要指定代码页,常用的代码页由CP_ACP和CP_UTF8两个。使用CP_ACP代码页就实现了ANSI与Unicode之间的转换,使用CP_UTF8代码页就实现了UTF-8与Unicode之间的转换。

举例:

#include <iostream>
#include <string>
#include <windows.h>

std::wstring ANSIToUnicode(const std::string& str);
std::string UnicodeToANSI(const std::wstring& str);
std::wstring UTF8ToUnicode(const std::string& str);
std::string UnicodeToUTF8(const std::wstring& str);

int main()
{
    std::string str = "Hello World!";
    std::wstring wstr = ANSIToUnicode(str);

    std::wstring wstr1 = L"Hello World";
    std::string str1 = UnicodeToANSI(wstr1);

    system("pause");
    return 0;
}

std::wstring ANSIToUnicode(const std::string& str)
{
    int  len = 0;
    len = str.length();
    int  unicodeLen = ::MultiByteToWideChar(CP_ACP,//第一次只是为了获取转换为宽字符的长度
        0,
        str.c_str(),
        -1,
        NULL,
        0);
    wchar_t *  pUnicode;
    pUnicode = new  wchar_t[unicodeLen + 1];
    memset(pUnicode, 0, (unicodeLen + 1)*sizeof(wchar_t));
    ::MultiByteToWideChar(CP_ACP,
        0,
        str.c_str(),
        -1,
        (LPWSTR)pUnicode,
        unicodeLen);
    std::wstring  rt;
    rt = (wchar_t*)pUnicode;
    delete  pUnicode;

    return  rt;
}


std::string UnicodeToANSI(const std::wstring& str)
{
    char*     pElementText;
    int    iTextLen;
    // wide char to multi char
    iTextLen = WideCharToMultiByte(CP_ACP,
        0,
        str.c_str(),
        -1,
        NULL,
        0,
        NULL,
        NULL);
    pElementText = new char[iTextLen + 1];
    memset((void*)pElementText, 0, sizeof(char) * (iTextLen + 1));
    ::WideCharToMultiByte(CP_ACP,
        0,
        str.c_str(),
        -1,
        pElementText,
        iTextLen,
        NULL,
        NULL);
    std::string strText;
    strText = pElementText;
    delete[] pElementText;
    return strText;
}

std::wstring UTF8ToUnicode(const std::string& str)
{
    int  len = 0;
    len = str.length();
    int  unicodeLen = ::MultiByteToWideChar(CP_UTF8,
        0,
        str.c_str(),
        -1,
        NULL,
        0);
    wchar_t *  pUnicode;
    pUnicode = new  wchar_t[unicodeLen + 1];
    memset(pUnicode, 0, (unicodeLen + 1)*sizeof(wchar_t));
    ::MultiByteToWideChar(CP_UTF8,
        0,
        str.c_str(),
        -1,
        (LPWSTR)pUnicode,
        unicodeLen);
    std::wstring  rt;
    rt = (wchar_t*)pUnicode;
    delete  pUnicode;

    return  rt;
}

std::string UnicodeToUTF8(const std::wstring& str)
{
    char*     pElementText;
    int    iTextLen;
    // wide char to multi char
    iTextLen = WideCharToMultiByte(CP_UTF8,
        0,
        str.c_str(),
        -1,
        NULL,
        0,
        NULL,
        NULL);
    pElementText = new char[iTextLen + 1];
    memset((void*)pElementText, 0, sizeof(char) * (iTextLen + 1));
    ::WideCharToMultiByte(CP_UTF8,
        0,
        str.c_str(),
        -1,
        pElementText,
        iTextLen,
        NULL,
        NULL);
    std::string strText;
    strText = pElementText;
    delete[] pElementText;
    return strText;
}

结果:

还有一个使用C函数库的方法:

#include <string>
#include <iostream>
#include <cstdlib>

std::string ws2s(const std::wstring& ws)
{
    std::string curLocale = setlocale(LC_ALL, NULL);        // curLocale = "C";
    setlocale(LC_ALL, "chs");                                             //中文地区
    const wchar_t* _Source = ws.c_str();
    size_t _Dsize = ws.size() + 1;                                    //比要转换的宽字符个数大1
    char *_Dest = new char[_Dsize];
    memset(_Dest, 0, _Dsize);                                          //初始化缓冲区
    size_t   i;
    wcstombs_s(&i, _Dest, _Dsize, _Source, ws.size());
    std::cout << "i:" << i << std::endl;
    std::string result = _Dest;
    delete[]_Dest;
    setlocale(LC_ALL, curLocale.c_str());                               //设置回原来的locale
    return result;

}


std::wstring s2ws(const std::string& s)
{
    std::string curlLocale = setlocale(LC_ALL, NULL);
    setlocale(LC_ALL, "chs");
    const char* _Source = s.c_str();
    size_t _Dsize = s.size() + 1;
    wchar_t* _Dest = new wchar_t[_Dsize];
    size_t i;
    mbstowcs_s(&i, _Dest, _Dsize, _Source, s.size());
    std::wstring result = _Dest;
    delete[] _Dest;
    setlocale(LC_ALL, curlLocale.c_str());
    return result;
}

int main()
{
    std::wstring wstr = L"Hello World!";
    std::string str = ws2s(wstr);
    std::string str1 = "Hello World!";
    std::wstring wstr1 = s2ws(str);
    system("pause");
    return 0;
}

结果:

第三种方法:

#include <string>
#include <iostream>

std::wstring StringToWString(const std::string &str)
{
    std::wstring wstr(str.length(), L' ');
    std::copy(str.begin(), str.end(), wstr.begin());
    return wstr;
}

//只拷贝低字节至string中
std::string WStringToString(const std::wstring &wstr)
{
    std::string str(wstr.length(), ' ');
    std::copy(wstr.begin(), wstr.end(), str.begin());
    std::cout << str << std::endl;
    return str;
}


int main()
{
    std::wstring wstr = L"Hello World!";
    std::string str = WStringToString(wstr);
    std::string str1 = "Hello World!";
    std::wstring wstr1 = StringToWString(str);
    system("pause");
    return 0;
}

结果:

 

你可能感兴趣的:(关于C++中字符的转换)