关于这个话题以前曾经使用过一个简便的算法很长时间, 代码如下:
private
string ToPinyinSingle(
string str)
{
if (str.CompareTo( " 吖 ") < 0)
return str;
if (str.CompareTo( " 八 ") < 0)
return " a ";
if (str.CompareTo( " 嚓 ") < 0)
return " b ";
if (str.CompareTo( " 咑 ") < 0)
return " c ";
if (str.CompareTo( " 妸 ") < 0)
return " d ";
if (str.CompareTo( " 发 ") < 0)
return " e ";
if (str.CompareTo( " 旮 ") < 0)
return " f ";
if (str.CompareTo( " 铪 ") < 0)
return " g ";
if (str.CompareTo( " 讥 ") < 0)
return " h ";
if (str.CompareTo( " 咔 ") < 0)
return " j ";
if (str.CompareTo( " 垃 ") < 0)
return " k ";
if (str.CompareTo( " 嘸 ") < 0)
return " l ";
if (str.CompareTo( " 拏 ") < 0)
return " m ";
if (str.CompareTo( " 噢 ") < 0)
return " n ";
if (str.CompareTo( " 妑 ") < 0)
return " o ";
if (str.CompareTo( " 七 ") < 0)
return " p ";
if (str.CompareTo( " 亽 ") < 0)
return " q ";
if (str.CompareTo( " 仨 ") < 0)
return " r ";
if (str.CompareTo( " 他 ") < 0)
return " s ";
if (str.CompareTo( " 哇 ") < 0)
return " t ";
if (str.CompareTo( " 夕 ") < 0)
return " w ";
if (str.CompareTo( " 丫 ") < 0)
return " x ";
if (str.CompareTo( " 帀 ") < 0)
return " y ";
if (str.CompareTo( " 咗 ") < 0)
return " z ";
return str;
{
if (str.CompareTo( " 吖 ") < 0)
return str;
if (str.CompareTo( " 八 ") < 0)
return " a ";
if (str.CompareTo( " 嚓 ") < 0)
return " b ";
if (str.CompareTo( " 咑 ") < 0)
return " c ";
if (str.CompareTo( " 妸 ") < 0)
return " d ";
if (str.CompareTo( " 发 ") < 0)
return " e ";
if (str.CompareTo( " 旮 ") < 0)
return " f ";
if (str.CompareTo( " 铪 ") < 0)
return " g ";
if (str.CompareTo( " 讥 ") < 0)
return " h ";
if (str.CompareTo( " 咔 ") < 0)
return " j ";
if (str.CompareTo( " 垃 ") < 0)
return " k ";
if (str.CompareTo( " 嘸 ") < 0)
return " l ";
if (str.CompareTo( " 拏 ") < 0)
return " m ";
if (str.CompareTo( " 噢 ") < 0)
return " n ";
if (str.CompareTo( " 妑 ") < 0)
return " o ";
if (str.CompareTo( " 七 ") < 0)
return " p ";
if (str.CompareTo( " 亽 ") < 0)
return " q ";
if (str.CompareTo( " 仨 ") < 0)
return " r ";
if (str.CompareTo( " 他 ") < 0)
return " s ";
if (str.CompareTo( " 哇 ") < 0)
return " t ";
if (str.CompareTo( " 夕 ") < 0)
return " w ";
if (str.CompareTo( " 丫 ") < 0)
return " x ";
if (str.CompareTo( " 帀 ") < 0)
return " y ";
if (str.CompareTo( " 咗 ") < 0)
return " z ";
return str;
}
这个函数只处理单个汉字, 简单地加个循环就可以让它处理文字串了.
在.net 3.5下, 它一直工作得很好, 虽然偶尔也有出错的时候, 但是概率极低, 基本上可以忽略不计.
然而后来我把项目升级到.net 4.0以后, 发现出错的几率直线上升, 已经高得无法容忍的程度了(例如, "梅" 会返回"L"), 简单查了一下, 没找到微软关于String.CompareTo函数有什么变化的说明, 束手无策, 于是换用另一个也很简单的算法(http://topic.csdn.net/u/20090219/12/61745e3a-a39e-4f4d-8985-67d124236694.html):
static
public
string getSpell(
string cn)
{
byte[] arrCN = System.Text.Encoding.Default.GetBytes(cn);
if(arrCN.Length > 1)
{
int area = ( short)arrCN[ 0];
int pos = ( short)arrCN[ 1];
int code = (area<< 8) + pos;
int[] areacode = { 45217, 45253, 45761, 46318, 46826, 47010, 47297, 47614, 48119, 48119, 49062, 49324, 49896, 50371, 50614, 50622, 50906, 51387, 51446, 52218, 52698, 52698, 52698, 52980, 53689, 54481};
for( int i= 0;i< 26;i++)
{
int max = 55290;
if(i != 25) max = areacode[i+ 1];
if(areacode[i]<=code && code{
return System.Text.Encoding.Default.GetString( new byte[]{( byte)( 65+i)});
}
}
return " ? ";
}
else return cn;
{
byte[] arrCN = System.Text.Encoding.Default.GetBytes(cn);
if(arrCN.Length > 1)
{
int area = ( short)arrCN[ 0];
int pos = ( short)arrCN[ 1];
int code = (area<< 8) + pos;
int[] areacode = { 45217, 45253, 45761, 46318, 46826, 47010, 47297, 47614, 48119, 48119, 49062, 49324, 49896, 50371, 50614, 50622, 50906, 51387, 51446, 52218, 52698, 52698, 52698, 52980, 53689, 54481};
for( int i= 0;i< 26;i++)
{
int max = 55290;
if(i != 25) max = areacode[i+ 1];
if(areacode[i]<=code && code
return System.Text.Encoding.Default.GetString( new byte[]{( byte)( 65+i)});
}
}
return " ? ";
}
else return cn;
}
但是这个函数出错的概率也很高, 例如"闫""窦""圳" 等都无法识别, 追查了一下原因, 发现原来对GB2312编码来说, 存放规定是这样的:
01-09区为特殊符号。
16-55区为一级汉字,按拼音排序。
56-87区为二级汉字,按部首/笔画排序。
每个汉字及符号以两个字节来表示。第一个字节称为“高位字节”,第二个字节称为“低位字节”。
“高位字节”使用了0xA1-0xF7(把01-87区的区号加上0xA0),“低位字节”使用了0xA1-0xFE(把01-94加上0xA0)。
例如“啊”字在大多数程序中,会以0xB0A1储存。(与区位码对比:0xB0=0xA0+16,0xA1=0xA0+1)
上述几个字位置码都大于55290, 显然是二级汉字, 这个算法就处理不了了, 换言之, 这种写法只能用于处理一级汉字. 这当然是不可接受的.
后来翻查良久, 终于找到一个用C++写的算法, 可以同时处理一级汉字和二级汉字(http://download.csdn.net/detail/ronjay/1955072), 我把它改写成了C#, 代码如下:
public
class ChineseToPinYin
{
#region " 全局变量 "
private static string[] _regionChar = new string[ 32]
{
" CJWGNSPGCGNESYPBTYYZDXYKYGTDJNNJQMBSGZSCYJSYYQPGKBZGYCYWJKGKLJSWKPJQHYTWDDZLSGMRYPYWWCCKZNKYDG ",
" TTNGJEYKKZYTCJNMCYLQLYPYQFQRPZSLWBTGKJFYXJWZLTBNCXJJJJZXDTTSQZYCDXXHGCKBPHFFSSWYBGMXLPBYLLLHLX ",
" SPZMYJHSOJNGHDZQYKLGJHSGQZHXQGKEZZWYSCSCJXYEYXADZPMDSSMZJZQJYZCDJZWQJBDZBXGZNZCPWHKXHQKMWFBPBY ",
" DTJZZKQHYLYGXFPTYJYYZPSZLFCHMQSHGMXXSXJJSDCSBBQBEFSJYHWWGZKPYLQBGLDLCCTNMAYDDKSSNGYCSGXLYZAYBN ",
" PTSDKDYLHGYMYLCXPYCJNDQJWXQXFYYFJLEJBZRXCCQWQQSBNKYMGPLBMJRQCFLNYMYQMSQTRBCJTHZTQFRXQHXMJJCJLX ",
" QGJMSHZKBSWYEMYLTXFSYDSGLYCJQXSJNQBSCTYHBFTDCYZDJWYGHQFRXWCKQKXEBPTLPXJZSRMEBWHJLBJSLYYSMDXLCL ",
" QKXLHXJRZJMFQHXHWYWSBHTRXXGLHQHFNMNYKLDYXZPWLGGTMTCFPAJJZYLJTYANJGBJPLQGDZYQYAXBKYSECJSZNSLYZH ",
" ZXLZCGHPXZHZNYTDSBCJKDLZAYFMYDLEBBGQYZKXGLDNDNYSKJSHDLYXBCGHXYPKDJMMZNGMMCLGWZSZXZJFZNMLZZTHCS ",
" YDBDLLSCDDNLKJYKJSYCJLKOHQASDKNHCSGANHDAASHTCPLCPQYBSDMPJLPCJOQLCDHJJYSPRCHNWJNLHLYYQYYWZPTCZG ",
" WWMZFFJQQQQYXACLBHKDJXDGMMYDJXZLLSYGXGKJRYWZWYCLZMSSJZLDBYDCFCXYHLXCHYZJQSFQAGMNYXPFRKSSBJLYXY ",
" SYGLNSCMHCWWMNZJJLXXHCHSYDSTTXRYCYXBYHCSMXJSZNPWGPXXTAYBGAJCXLYSDCCWZOCWKCCSBNHCPDYZNFCYYTYCKX ",
" KYBSQKKYTQQXFCWCHCYKELZQBSQYJQCCLMTHSYWHMKTLKJLYCXWHEQQHTQHZPQSQSCFYMMDMGBWHWLGSSLYSDLMLXPTHMJ ",
" HWLJZYHZJXHTXJLHXRSWLWZJCBXMHZQXSDZPMGFCSGLSXYMJSHXPJXWMYQKSMYPLRTHBXFTPMHYXLCHLHLZYLXGSSSSTCL ",
" SLDCLRPBHZHXYYFHBBGDMYCNQQWLQHJJZYWJZYEJJDHPBLQXTQKWHLCHQXAGTLXLJXMSLXHTZKZJECXJCJNMFBYCSFYWYB ",
" JZGNYSDZSQYRSLJPCLPWXSDWEJBJCBCNAYTWGMPAPCLYQPCLZXSBNMSGGFNZJJBZSFZYNDXHPLQKZCZWALSBCCJXJYZGWK ",
" YPSGXFZFCDKHJGXDLQFSGDSLQWZKXTMHSBGZMJZRGLYJBPMLMSXLZJQQHZYJCZYDJWBMJKLDDPMJEGXYHYLXHLQYQHKYCW ",
" CJMYYXNATJHYCCXZPCQLBZWWYTWBQCMLPMYRJCCCXFPZNZZLJPLXXYZTZLGDLDCKLYRZZGQTGJHHHJLJAXFGFJZSLCFDQZ ",
" LCLGJDJCSNCLLJPJQDCCLCJXMYZFTSXGCGSBRZXJQQCTZHGYQTJQQLZXJYLYLBCYAMCSTYLPDJBYREGKLZYZHLYSZQLZNW ",
" CZCLLWJQJJJKDGJZOLBBZPPGLGHTGZXYGHZMYCNQSYCYHBHGXKAMTXYXNBSKYZZGJZLQJDFCJXDYGJQJJPMGWGJJJPKQSB ",
" GBMMCJSSCLPQPDXCDYYKYFCJDDYYGYWRHJRTGZNYQLDKLJSZZGZQZJGDYKSHPZMTLCPWNJAFYZDJCNMWESCYGLBTZCGMSS ",
" LLYXQSXSBSJSBBSGGHFJLWPMZJNLYYWDQSHZXTYYWHMCYHYWDBXBTLMSYYYFSXJCSDXXLHJHFSSXZQHFZMZCZTQCXZXRTT ",
" DJHNNYZQQMNQDMMGYYDXMJGDHCDYZBFFALLZTDLTFXMXQZDNGWQDBDCZJDXBZGSQQDDJCMBKZFFXMKDMDSYYSZCMLJDSYN ",
" SPRSKMKMPCKLGDBQTFZSWTFGGLYPLLJZHGJJGYPZLTCSMCNBTJBQFKTHBYZGKPBBYMTTSSXTBNPDKLEYCJNYCDYKZDDHQH ",
" SDZSCTARLLTKZLGECLLKJLQJAQNBDKKGHPJTZQKSECSHALQFMMGJNLYJBBTMLYZXDCJPLDLPCQDHZYCBZSCZBZMSLJFLKR ",
" ZJSNFRGJHXPDHYJYBZGDLQCSEZGXLBLGYXTWMABCHECMWYJYZLLJJYHLGBDJLSLYGKDZPZXJYYZLWCXSZFGWYYDLYHCLJS ",
" CMBJHBLYZLYCBLYDPDQYSXQZBYTDKYXJYYCNRJMPDJGKLCLJBCTBJDDBBLBLCZQRPPXJCGLZCSHLTOLJNMDDDLNGKAQHQH ",
" JGYKHEZNMSHRPHQQJCHGMFPRXHJGDYCHGHLYRZQLCYQJNZSQTKQJYMSZSWLCFQQQXYFGGYPTQWLMCRNFKKFSYYLQBMQAMM ",
" MYXCTPSHCPTXXZZSMPHPSHMCLMLDQFYQXSZYJDJJZZHQPDSZGLSTJBCKBXYQZJSGPSXQZQZRQTBDKYXZKHHGFLBCSMDLDG ",
" DZDBLZYYCXNNCSYBZBFGLZZXSWMSCCMQNJQSBDQSJTXXMBLTXZCLZSHZCXRQJGJYLXZFJPHYMZQQYDFQJJLZZNZJCDGZYG ",
" CTXMZYSCTLKPHTXHTLBJXJLXSCDQXCBBTJFQZFSLTJBTKQBXXJJLJCHCZDBZJDCZJDCPRNPQCJPFCZLCLZXZDMXMPHJSGZ ",
" GSZZQJYLWTJPFSYASMCJBTZKYCWMYTCSJJLJCQLWZMALBXYFBPNLSFHTGJWEJJXXGLLJSTGSHJQLZFKCGNNDSZFDEQFHBS ",
" AQTGLLBXMMYGSZLDYDQMJJRGBJTKGDHGKBLQKBDMBYLXWCXYTTYBKMRTJZXQJBHLMHMJJZMQASLDCYXYQDLQCAFYWYXQHZ "
};
private static System.Text.Encoding _encoding = System.Text.Encoding.GetEncoding( " GB2312 ");
#endregion
private static bool In( int lp, int hp, int value)
{
return ((value <= hp) && (value >= lp));
}
public static char GetFirstChar( string chineseChar)
{
var bytes = _encoding.GetBytes(chineseChar);
if (bytes.Length != 2)
return chineseChar[ 0];
return GetChar(bytes[ 0], bytes[ 1], chineseChar);
}
private static char GetChar( byte c1, byte c2, string originChar)
{
var Hi = c1 << 8;
var Lo = c2;
int n = Hi + Lo;
if (n <= 0xD7F9)
{
if (In( 0xB0A1, 0xB0C4, n)) return ' A ';
if (In( 0XB0C5, 0XB2C0, n)) return ' B ';
if (In( 0xB2C1, 0xB4ED, n)) return ' C ';
if (In( 0xB4EE, 0xB6E9, n)) return ' D ';
if (In( 0xB6EA, 0xB7A1, n)) return ' E ';
if (In( 0xB7A2, 0xB8C0, n)) return ' F ';
if (In( 0xB8C1, 0xB9FD, n)) return ' G ';
if (In( 0xB9FE, 0xBBF6, n)) return ' H ';
if (In( 0xBBF7, 0xBFA5, n)) return ' J ';
if (In( 0xBFA6, 0xC0AB, n)) return ' K ';
if (In( 0xC0AC, 0xC2E7, n)) return ' L ';
if (In( 0xC2E8, 0xC4C2, n)) return ' M ';
if (In( 0xC4C3, 0xC5B5, n)) return ' N ';
if (In( 0xC5B6, 0xC5BD, n)) return ' O ';
if (In( 0xC5BE, 0xC6D9, n)) return ' P ';
if (In( 0xC6D1, 0xC8BA, n)) return ' Q ';
if (In( 0xC8BB, 0xC8F5, n)) return ' R ';
if (In( 0xC8F6, 0xCBF9, n)) return ' S ';
if (In( 0xCBFA, 0xCDD9, n)) return ' T ';
if (In( 0xCDDA, 0xCEF3, n)) return ' W ';
if (In( 0xCEF4, 0xD1B8, n)) return ' X ';
if (In( 0xD1B9, 0xD4D0, n)) return ' Y ';
if (In( 0xD4D1, 0xD7F9, n)) return ' Z ';
return originChar[ 0];
}
else
{
var b1 = (c1 & 0x7F) - 0x20 - 56;
var b2 = (c2 & 0x7F) - 0x20 - 1;
if (b1 >= 0 && b1 <= 31 && b2 >= 0 && b2 <= 93)
{
return _regionChar[b1][b2];
}
return originChar[ 0];
}
}
{
#region " 全局变量 "
private static string[] _regionChar = new string[ 32]
{
" CJWGNSPGCGNESYPBTYYZDXYKYGTDJNNJQMBSGZSCYJSYYQPGKBZGYCYWJKGKLJSWKPJQHYTWDDZLSGMRYPYWWCCKZNKYDG ",
" TTNGJEYKKZYTCJNMCYLQLYPYQFQRPZSLWBTGKJFYXJWZLTBNCXJJJJZXDTTSQZYCDXXHGCKBPHFFSSWYBGMXLPBYLLLHLX ",
" SPZMYJHSOJNGHDZQYKLGJHSGQZHXQGKEZZWYSCSCJXYEYXADZPMDSSMZJZQJYZCDJZWQJBDZBXGZNZCPWHKXHQKMWFBPBY ",
" DTJZZKQHYLYGXFPTYJYYZPSZLFCHMQSHGMXXSXJJSDCSBBQBEFSJYHWWGZKPYLQBGLDLCCTNMAYDDKSSNGYCSGXLYZAYBN ",
" PTSDKDYLHGYMYLCXPYCJNDQJWXQXFYYFJLEJBZRXCCQWQQSBNKYMGPLBMJRQCFLNYMYQMSQTRBCJTHZTQFRXQHXMJJCJLX ",
" QGJMSHZKBSWYEMYLTXFSYDSGLYCJQXSJNQBSCTYHBFTDCYZDJWYGHQFRXWCKQKXEBPTLPXJZSRMEBWHJLBJSLYYSMDXLCL ",
" QKXLHXJRZJMFQHXHWYWSBHTRXXGLHQHFNMNYKLDYXZPWLGGTMTCFPAJJZYLJTYANJGBJPLQGDZYQYAXBKYSECJSZNSLYZH ",
" ZXLZCGHPXZHZNYTDSBCJKDLZAYFMYDLEBBGQYZKXGLDNDNYSKJSHDLYXBCGHXYPKDJMMZNGMMCLGWZSZXZJFZNMLZZTHCS ",
" YDBDLLSCDDNLKJYKJSYCJLKOHQASDKNHCSGANHDAASHTCPLCPQYBSDMPJLPCJOQLCDHJJYSPRCHNWJNLHLYYQYYWZPTCZG ",
" WWMZFFJQQQQYXACLBHKDJXDGMMYDJXZLLSYGXGKJRYWZWYCLZMSSJZLDBYDCFCXYHLXCHYZJQSFQAGMNYXPFRKSSBJLYXY ",
" SYGLNSCMHCWWMNZJJLXXHCHSYDSTTXRYCYXBYHCSMXJSZNPWGPXXTAYBGAJCXLYSDCCWZOCWKCCSBNHCPDYZNFCYYTYCKX ",
" KYBSQKKYTQQXFCWCHCYKELZQBSQYJQCCLMTHSYWHMKTLKJLYCXWHEQQHTQHZPQSQSCFYMMDMGBWHWLGSSLYSDLMLXPTHMJ ",
" HWLJZYHZJXHTXJLHXRSWLWZJCBXMHZQXSDZPMGFCSGLSXYMJSHXPJXWMYQKSMYPLRTHBXFTPMHYXLCHLHLZYLXGSSSSTCL ",
" SLDCLRPBHZHXYYFHBBGDMYCNQQWLQHJJZYWJZYEJJDHPBLQXTQKWHLCHQXAGTLXLJXMSLXHTZKZJECXJCJNMFBYCSFYWYB ",
" JZGNYSDZSQYRSLJPCLPWXSDWEJBJCBCNAYTWGMPAPCLYQPCLZXSBNMSGGFNZJJBZSFZYNDXHPLQKZCZWALSBCCJXJYZGWK ",
" YPSGXFZFCDKHJGXDLQFSGDSLQWZKXTMHSBGZMJZRGLYJBPMLMSXLZJQQHZYJCZYDJWBMJKLDDPMJEGXYHYLXHLQYQHKYCW ",
" CJMYYXNATJHYCCXZPCQLBZWWYTWBQCMLPMYRJCCCXFPZNZZLJPLXXYZTZLGDLDCKLYRZZGQTGJHHHJLJAXFGFJZSLCFDQZ ",
" LCLGJDJCSNCLLJPJQDCCLCJXMYZFTSXGCGSBRZXJQQCTZHGYQTJQQLZXJYLYLBCYAMCSTYLPDJBYREGKLZYZHLYSZQLZNW ",
" CZCLLWJQJJJKDGJZOLBBZPPGLGHTGZXYGHZMYCNQSYCYHBHGXKAMTXYXNBSKYZZGJZLQJDFCJXDYGJQJJPMGWGJJJPKQSB ",
" GBMMCJSSCLPQPDXCDYYKYFCJDDYYGYWRHJRTGZNYQLDKLJSZZGZQZJGDYKSHPZMTLCPWNJAFYZDJCNMWESCYGLBTZCGMSS ",
" LLYXQSXSBSJSBBSGGHFJLWPMZJNLYYWDQSHZXTYYWHMCYHYWDBXBTLMSYYYFSXJCSDXXLHJHFSSXZQHFZMZCZTQCXZXRTT ",
" DJHNNYZQQMNQDMMGYYDXMJGDHCDYZBFFALLZTDLTFXMXQZDNGWQDBDCZJDXBZGSQQDDJCMBKZFFXMKDMDSYYSZCMLJDSYN ",
" SPRSKMKMPCKLGDBQTFZSWTFGGLYPLLJZHGJJGYPZLTCSMCNBTJBQFKTHBYZGKPBBYMTTSSXTBNPDKLEYCJNYCDYKZDDHQH ",
" SDZSCTARLLTKZLGECLLKJLQJAQNBDKKGHPJTZQKSECSHALQFMMGJNLYJBBTMLYZXDCJPLDLPCQDHZYCBZSCZBZMSLJFLKR ",
" ZJSNFRGJHXPDHYJYBZGDLQCSEZGXLBLGYXTWMABCHECMWYJYZLLJJYHLGBDJLSLYGKDZPZXJYYZLWCXSZFGWYYDLYHCLJS ",
" CMBJHBLYZLYCBLYDPDQYSXQZBYTDKYXJYYCNRJMPDJGKLCLJBCTBJDDBBLBLCZQRPPXJCGLZCSHLTOLJNMDDDLNGKAQHQH ",
" JGYKHEZNMSHRPHQQJCHGMFPRXHJGDYCHGHLYRZQLCYQJNZSQTKQJYMSZSWLCFQQQXYFGGYPTQWLMCRNFKKFSYYLQBMQAMM ",
" MYXCTPSHCPTXXZZSMPHPSHMCLMLDQFYQXSZYJDJJZZHQPDSZGLSTJBCKBXYQZJSGPSXQZQZRQTBDKYXZKHHGFLBCSMDLDG ",
" DZDBLZYYCXNNCSYBZBFGLZZXSWMSCCMQNJQSBDQSJTXXMBLTXZCLZSHZCXRQJGJYLXZFJPHYMZQQYDFQJJLZZNZJCDGZYG ",
" CTXMZYSCTLKPHTXHTLBJXJLXSCDQXCBBTJFQZFSLTJBTKQBXXJJLJCHCZDBZJDCZJDCPRNPQCJPFCZLCLZXZDMXMPHJSGZ ",
" GSZZQJYLWTJPFSYASMCJBTZKYCWMYTCSJJLJCQLWZMALBXYFBPNLSFHTGJWEJJXXGLLJSTGSHJQLZFKCGNNDSZFDEQFHBS ",
" AQTGLLBXMMYGSZLDYDQMJJRGBJTKGDHGKBLQKBDMBYLXWCXYTTYBKMRTJZXQJBHLMHMJJZMQASLDCYXYQDLQCAFYWYXQHZ "
};
private static System.Text.Encoding _encoding = System.Text.Encoding.GetEncoding( " GB2312 ");
#endregion
private static bool In( int lp, int hp, int value)
{
return ((value <= hp) && (value >= lp));
}
public static char GetFirstChar( string chineseChar)
{
var bytes = _encoding.GetBytes(chineseChar);
if (bytes.Length != 2)
return chineseChar[ 0];
return GetChar(bytes[ 0], bytes[ 1], chineseChar);
}
private static char GetChar( byte c1, byte c2, string originChar)
{
var Hi = c1 << 8;
var Lo = c2;
int n = Hi + Lo;
if (n <= 0xD7F9)
{
if (In( 0xB0A1, 0xB0C4, n)) return ' A ';
if (In( 0XB0C5, 0XB2C0, n)) return ' B ';
if (In( 0xB2C1, 0xB4ED, n)) return ' C ';
if (In( 0xB4EE, 0xB6E9, n)) return ' D ';
if (In( 0xB6EA, 0xB7A1, n)) return ' E ';
if (In( 0xB7A2, 0xB8C0, n)) return ' F ';
if (In( 0xB8C1, 0xB9FD, n)) return ' G ';
if (In( 0xB9FE, 0xBBF6, n)) return ' H ';
if (In( 0xBBF7, 0xBFA5, n)) return ' J ';
if (In( 0xBFA6, 0xC0AB, n)) return ' K ';
if (In( 0xC0AC, 0xC2E7, n)) return ' L ';
if (In( 0xC2E8, 0xC4C2, n)) return ' M ';
if (In( 0xC4C3, 0xC5B5, n)) return ' N ';
if (In( 0xC5B6, 0xC5BD, n)) return ' O ';
if (In( 0xC5BE, 0xC6D9, n)) return ' P ';
if (In( 0xC6D1, 0xC8BA, n)) return ' Q ';
if (In( 0xC8BB, 0xC8F5, n)) return ' R ';
if (In( 0xC8F6, 0xCBF9, n)) return ' S ';
if (In( 0xCBFA, 0xCDD9, n)) return ' T ';
if (In( 0xCDDA, 0xCEF3, n)) return ' W ';
if (In( 0xCEF4, 0xD1B8, n)) return ' X ';
if (In( 0xD1B9, 0xD4D0, n)) return ' Y ';
if (In( 0xD4D1, 0xD7F9, n)) return ' Z ';
return originChar[ 0];
}
else
{
var b1 = (c1 & 0x7F) - 0x20 - 56;
var b2 = (c2 & 0x7F) - 0x20 - 1;
if (b1 >= 0 && b1 <= 31 && b2 >= 0 && b2 <= 93)
{
return _regionChar[b1][b2];
}
return originChar[ 0];
}
}
}
这个算法目前还没有发现哪个汉字会出错.