C# unicode 编码 和 解码

首先转换方面:
 byte[] 
bytes = System.Text.Encoding.UTF8.GetBytes(msgStr);
bytes = Encoding.Convert(Encoding.UTF8, Encoding.Unicode, bytes);
bytes = System.Text.Encoding.Unicode.GetBytes(msgStr);
msgStr = System.Text.Encoding.GetEncoding("gb2312").GetString(Encoding.Convert(Encoding.Unicode, Encoding.GetEncoding("gb2312"), bytes));


编码:"\\uxxxx" 转换为"\uxxxx"
解码:"\uxxxx" 转换为"\\uxxxxx"

编码算法

//UNICODE字符转为中文
对这个方法做一点改进 使他支持中英混排
public static string ConvertUnicodeStringToChinese(string unicodeString)
{
if (string.IsNullOrEmpty(unicodeString))
return string.Empty;

string outStr = unicodeString;

Regex re = new Regex("\\\\u[0123456789abcdef]{4}", RegexOptions.IgnoreCase);
MatchCollection mc = re.Matches(unicodeString);
foreach (Match ma in mc)
{
outStr = outStr.Replace(ma.Value, ConverUnicodeStringToChar(ma.Value).ToString());
}
return outStr;
}

private static char ConverUnicodeStringToChar(string str)
{
char outStr = Char.MinValue;
outStr = (char)int.Parse(str.Remove(0, 2), System.Globalization.NumberStyles.HexNumber);
return outStr;
}


解码:

static Regex reUnicode = new Regex(@"\\u([0-9a-fA-F]{4})", RegexOptions.Compiled);
public static string ConvertChineseToUnicodeString(string chinese)
{
MatchCollection mc = re.Matches(chinese);
string unicodeString="";
foreach(Match ma in mc)
{
char c;
if(Short.TryParse(ma.Remove(0,1), System.Globalization.NumberStyles.HexNumber, CultureInfo.InvariantCulture, out c))
{
unicodeString +=("\\u"+c);
}
}
return unicodeString;
}

你可能感兴趣的:(C#)