C/CPP UTF8编码转成汉字 \u7528\u6237\u4e0d

参考文章:http://www.qingfengju.com/article.asp?id=245


现在的网站,经常返回下面这样的字符串:
"error":"\u7528\u6237\u4e0d\u5b58\u5728\u6216\u5bc6\u7801\u9519\u8bef"}
其中的\u7528等就是汉字的UTF8编码了,如何将其还原成相应的字符呢?

代码如下:

#include
using std::string;

string Utf8Code2String(char* szCode)
{
	string strRet = "";
	for (int i = 0; i < 4; i++)
	{
		if (szCode[i] >= '0' && szCode[i] <= '9')	continue;
		if (szCode[i] >= 'A' && szCode[i] <= 'F')	continue;
		if (szCode[i] >= 'a' && szCode[i] <= 'f')	continue;
		return strRet;
	}

	char unicode_hex[5] = {0};
	memcpy(unicode_hex, szCode, 4);
	unsigned int iCode = 0;
	sscanf_s(unicode_hex,"%04x", &iCode);
	wchar_t wchChar[4] = {0};
	wchChar[0] = iCode;

	char szAnsi[8] = {0};
	WideCharToMultiByte(CP_ACP, NULL, wchChar, 1, szAnsi, sizeof(szAnsi), NULL, NULL);
	strRet = string(szAnsi);

	return strRet;
}


string MyUnEscape(char* szUtf8Code)
{
	// {"error":"\u7528\u6237\u4e0d\u5b58\u5728\u6216\u5bc6\u7801\u9519\u8bef"}
	string strRet = "";
	char* szDelimiters = "\\u";
	char* pTok = strtok(szUtf8Code, szDelimiters);  
	while(pTok != NULL){
		/*OutputDebugString(pTok);
		OutputDebugString("\r\n");*/
		if (strlen(pTok) >= 4)
		{
			strRet += Utf8Code2String(pTok);
		}
		pTok = strtok(NULL, szDelimiters);
	}
	return strRet;
}




// 调用例子
MyUnEscape("test \u7528\u6237\u4e0d\u5b58\u5728\u6216\u5bc6\u7801\u9519\u8bef hahah ok");

-----------------------------------------------------------------------------------------

贴下参考文章的代码:

#include 
 
#include 
#include 
using namespace std;
 
#include 
 
enum
{
    UNICODE_CALC_SIZE = 1,
    UNICODE_GET_BYTES = 2
};
 
//将unicode转义字符序列转换为内存中的unicode字符串
int unicode_bytes(char* p_unicode_escape_chars,wchar_t *bytes,int flag)
{
    /*
    char* p_unicode_escape_chars="pp\\u4fddp\\u5b58\\u6210pp\\u529f0a12";
 
    //通过此函数获知转换后需要的字节数
    int n_length=unicode_bytes(p_unicode_escape_chars,NULL,UNICODE_CALC_SIZE);
 
    //再次调用此函数,取得字节序列
    wchar_t *bytes=new wchar_t[n_length+sizeof(wchar_t)];
    unicode_bytes(p_unicode_escape_chars,bytes,UNICODE_GET_BYTES);
    bytes[n_length]=0;
 
    //此时的bytes中是转换后的字节序列
    delete[] bytes;
    */
 
    int unicode_count=0;
    int length=strlen(p_unicode_escape_chars);
    for (int char_index=0;char_index










你可能感兴趣的:(C++,VC,C)