前面做一个基于sybase的mis系统, 由于sybase的后台是cp850编码,而.net平台不支持cp850编码。所以在程序中所有从数据库读出的中文都显示为''?''。
于是考虑在.net 平台中转换字符编码。于是查看了.net中字符编码的类System.Text.Encoding
里面支持的字符集编码有ibm850,没有cp850,后来查看资料才知道原来这两个名字指的是同一种编码规范。
于是开始进行编码转换,首先找到一个java的程序:
public String CP850ToGB2312(String str)
...{
try
...{
byte[] temp = str.getBytes("cp850");
String result = new String(temp, "gb2312");
return result;
}
catch (UnsupportedEncodingException ex)
...{ return null; }
}
public String GB2312ToCP850(String str)
...{
try
...{
byte[] temp = str.getBytes("gb2312");
String result = new String(temp, "cp850");
return result;
}
catch (UnsupportedEncodingException ex)
...{
return null;
}
}
然后在根据查找的System.Text.Encoding类的属性,方法写了如下的转换程序:
public string UTF8ToGB2312(string str)
...{
try
...{
Encoding utf8 = Encoding.GetEncoding(65001);
Encoding gb2312 = Encoding.GetEncoding("gb2312");//Encoding.Default ,936
byte[] temp = utf8.GetBytes(str);
byte[] temp1 = Encoding.Convert(utf8, gb2312, temp);
string result = gb2312.GetString(temp1);
return result;
}
catch (Exception ex)//(UnsupportedEncodingException ex)
...{
MessageBox.Show(ex.ToString());
return null;
}
}
public string GB2312ToUTF8(string str)
...{
try
...{
Encoding uft8 = Encoding.GetEncoding(65001);
Encoding gb2312 = Encoding.GetEncoding("gb2312");
byte[] temp = gb2312.GetBytes(str);
MessageBox.Show("gb2312的编码的字节个数:" + temp.Length);
for (int i = 0; i < temp.Length; i++)
...{
MessageBox.Show(Convert.ToUInt16(temp[i]).ToString());
}
byte[] temp1 = Encoding.Convert(gb2312, uft8, temp);
MessageBox.Show("uft8的编码的字节个数:" + temp1.Length);
for (int i = 0; i < temp1.Length; i++)
...{
MessageBox.Show(Convert.ToUInt16(temp1[i]).ToString());
}
string result = uft8.GetString(temp1);
return result;
}
catch (Exception ex)//(UnsupportedEncodingException ex)
...{
MessageBox.Show(ex.ToString());
return null;
}
}
主要使用的就是获取编码方式的类对象,
Encoding utf8 = Encoding.GetEncoding(65001);//使用code page
Encoding gb2312 = Encoding.GetEncoding("gb2312");//通过bodyname
获取字符编码字节序列:byte[] temp=utf8.GetBytes(str);
编码方式转换:byte[] temp1=Encoding.Convert(utf8, gb2312, temp);
获取编码的字符串:string str1=gb2312.GetString(temp1);
这样即完成了字符编码的转换。
Encoding.Default在 简体中文os中一般是gb2312格式。
static void Main(string[] args)
{
FileStream fs;
string fileName = "C://test.xml";
string message = "呵呵";
string m=System.Web.HttpUtility.UrlEncode(message, System.Text.Encoding.UTF8);
fs = new FileStream(fileName, FileMode.OpenOrCreate);
StreamWriter sw = new StreamWriter(fs);
fs.Seek(0, SeekOrigin.End);
sw.WriteLine("<?xml version=/"1.0/" encoding=/"UTF-8/"?><menu>" + message + "</menu>");
sw.Close();
fs.Close();
Console.Read();
}
private static string ToGB2312(string utfInfo)
{
string gb2312Info = string.Empty;
Encoding utf8 = Encoding.UTF8;
Encoding gb2312 = Encoding.GetEncoding("gb2312");
byte[] unicodeBytes = utf8.GetBytes(utfInfo);
byte[] asciiBytes = Encoding.Convert(utf8, gb2312, unicodeBytes);
char[] asciiChars = new char[gb2312.GetCharCount(asciiBytes, 0, asciiBytes.Length)];
gb2312.GetChars(asciiBytes, 0, asciiBytes.Length, asciiChars, 0);
string gb2312info = new string(asciiChars);
return gb2312info;
}
private static string ToUTF8(string gb2312Info)
{
string utf8Info = string.Empty;
Encoding utf8 = Encoding.UTF8;
Encoding gb2312 = Encoding.GetEncoding("gb2312");
byte[] unicodeBytes = gb2312.GetBytes(gb2312Info);
byte[] asciiBytes = Encoding.Convert(gb2312, utf8, unicodeBytes);
char[] asciiChars = new char[utf8.GetCharCount(asciiBytes, 0, asciiBytes.Length)];
utf8.GetChars(asciiBytes, 0, asciiBytes.Length, asciiChars, 0);
string utf8info = new string(asciiChars);
return utf8info;
}