C++之——linux中字符格式转化:&#、&#x类型转中文

前提:

  1. 案例为C++语言
  2. 适用于将形如:&#、&#x等开头的字符串,转换为中文显示
  3. 如有问题欢迎评论沟通。

说明

  • 经过查阅资料,发现以上所说字符是 HTML、XML 等 SGML 类语言的转义序列(escape sequence)。它们不是编码。&#跟的是十进制,&#x跟的是十六进制。他们有一个专业名词为:NCR(numeric character reference)。
  • 转码思路说明:十六进制—>十进制—>UTF-8—>中文

代码

#include 
#include 
#include 
#include 

using namespace std;

strTrans(string strSource)
{
	string result = "";
	cout<<"转化前的字符串为 : %s"< utf16Vec =getTokenList(strSource,";");
    ostringstream oss;
    for(vector::iterator it = utf16Vec.begin();it!= utf16Vec.end();it++)
    {
    	string strUTFNCR = *it;
    	string::size_type pos = strUTFNCR.find("&#x");
    	if (pos!=string::npos)
    	{
    		string uft16Before = strUTFNCR.substr(0,pos);
    		string uft16 = strUTFNCR.substr(pos+3,4);
    		string uft16After = strUTFNCR.substr(pos+7);

			char dest1[5];
			memset(dest1, 0, 5);
    		string utfFirstByte = uft16.substr(0,2);
    		string utfLastByte = uft16.substr(2);
			dest1[0] = htoi(utfFirstByte.c_str());
			dest1[1] = htoi(utfLastByte.c_str());
			string strGBKWord;
			CSConvert("UTF-16",dest1,sizeof(dest1),"GB18030",strGBKWord);
			
			strUTFNCR = uft16Before + strGBKWord +uft16After;
			*it = strUTFNCR;
    	}
    	oss << *it;
    }
    result = oss.str();
	cout<<"转化后的字符串为 : %s"<= '0' && s[i] <= '9') || (s[i] >= 'a' && s[i] <= 'z') || (s[i] >='A' && s[i] <= 'Z');++i)
    {
        if (tolower(s[i]) > '9')
        {
            n = 16 * n + (10 + tolower(s[i]) - 'a');
        }
        else
        {
            n = 16 * n + (tolower(s[i]) - '0');
        }
    }
    return n;
}

//截取字符串的函数
vector getTokenList(const string& val, const string& token)
{
	vector slist;
	
	string tmp = val;
	int pos = 0;

	while(pos != string::npos)
	{
		pos = tmp.find(token);
		if (pos != string::npos)
		{			
			if (pos!=0)
			{
				slist.push_back(tmp.substr(0,pos));
			}
			tmp = tmp.substr(pos + token.length(), tmp.length()-pos-token.length());
		}  		  
	}
	if (tmp.length()>0)
	{
		slist.push_back(tmp);//push the last one
	}
	
	return slist;	
}

//编码转换方法
void CSConvert(string strSourceCS /*"UTF-8"*/,const char* pSourceBuffer,int iSourceLen,string strTargetCS/*"GB2312"*/,string& strTarget)
{
  UErrorCode status = U_ZERO_ERROR;

  UChar target[iSourceLen*2];
	UConverter *conv;
	int32_t     len;

	//1 convert strSourceCS string to Unicode
	// set up the converter
	conv = ucnv_open(strSourceCS.c_str(), &status);
	assert(U_SUCCESS(status));

	// convert to Unicode
	len = ucnv_toUChars(conv, target, iSourceLen*2, pSourceBuffer, iSourceLen, &status);
	assert(U_SUCCESS(status));

	// close the converter
	ucnv_close(conv);

	//2 convert Unicode string to strTargetCS
	// set up the converter
	conv = ucnv_open(strTargetCS.c_str(), &status);
	assert(U_SUCCESS(status));

	// convert to strTargetCS
	char gbTarget[iSourceLen*2];
	len = ucnv_fromUChars(conv, gbTarget, iSourceLen*2, target, -1, &status);
	assert(U_SUCCESS(status));

	// close the converter
	ucnv_close(conv);
	strTarget = gbTarget;

	return ;
}

这样就可以调用了,如下:

int main()
{
	string strSource = "用户";
	string strTar = strTrans(strSource);
	cout<<"转换后的字符串为:"<

代码就是这样了,供学习交流

你可能感兴趣的:(C++)