一个比较全的汉字转拼音工具类

 前两天无聊,把j2me的代码(gb2312转拼音) 移植到了c#,其实特别简单,就是换换东西。

此外,跟大家介绍一个很强大的工具:Java Language Conversion Assistant。使用它可以轻松的将java类库转换为c#工程。特别是算法型的代码,转换效果非常好。

我曾经把一个qrcode,dm混合编码的算法用它进行转换,喝杯茶的功夫,全部转换完成。当然了,它会提示你一些需要手动修改的地方,然后我用了10分钟,就修改完代码,可以正常执行了。有兴趣的朋友自己找找,就在vs.net中有集成的。

 1   class GB2PY

  2      {
  3           private   static   int [] FIRST_TABLE  =  {  45217 45253 45761 46318 46826 ,
  4                   47010 47297 47614 47614 48119 49062 49324 49896 50371 ,
  5                   50614 50622 50906 51387 51446 52218 52218 52218 52698 ,
  6                   52980 53689 54481 55289  };
  7 
  8           private   static   string [] ALL_VALUE  =  {  " zuo " " zun " " zui " " zuan " " zu " ,
  9                   " zou " " zong " " zi " " zhuo " " zhun " " zhui " " zhuang " " zhuan " ,
 10                   " zhuai " " zhua " " zhu " " zhou " " zhong " " zhi " " zheng " ,
 11                   " zhen " " zhe " " zhao " " zhang " " zhan " " zhai " " zha " " zeng " ,
 12                   " zen " " zei " " ze " " zao " " zang " " zan " " zai " " za " " yun " ,
 13                   " yue " " yuan " " yu " " you " " yong " " yo " " ying " " yin " " yi " ,
 14                   " ye " " yao " " yang " " yan " " ya " " xun " " xue " " xuan " " xu " ,
 15                   " xiu " " xiong " " xing " " xin " " xie " " xiao " " xiang " " xian " ,
 16                   " xia " " xi " " wu " " wo " " weng " " wen " " wei " " wang " " wan " ,
 17                   " wai " " wa " " tuo " " tun " " tui " " tuan " " tu " " tou " " tong " ,
 18                   " ting " " tie " " tiao " " tian " " ti " " teng " " te " " tao " ,
 19                   " tang " " tan " " tai " " ta " " suo " " sun " " sui " " suan " " su " ,
 20                   " sou " " song " " si " " shuo " " shun " " shui " " shuang " " shuan " ,
 21                   " shuai " " shua " " shu " " shou " " shi " " sheng " " shen " " she " ,
 22                   " shao " " shang " " shan " " shai " " sha " " seng " " sen " " se " ,
 23                   " sao " " sang " " san " " sai " " sa " " ruo " " run " " rui " " ruan " ,
 24                   " ru " " rou " " rong " " ri " " reng " " ren " " re " " rao " " rang " ,
 25                   " ran " " qun " " que " " quan " " qu " " qiu " " qiong " " qing " ,
 26                   " qin " " qie " " qiao " " qiang " " qian " " qia " " qi " " pu " " po " ,
 27                   " ping " " pin " " pie " " piao " " pian " " pi " " peng " " pen " ,
 28                   " pei " " pao " " pang " " pan " " pai " " pa " " ou " " o " " nuo " ,
 29                   " nue " " nuan " " nv " " nu " " nong " " niu " " ning " " nin " " nie " ,
 30                   " niao " " niang " " nian " " ni " " neng " " nen " " nei " " ne " ,
 31                   " nao " " nang " " nan " " nai " " na " " mu " " mou " " mo " " miu " ,
 32                   " ming " " min " " mie " " miao " " mian " " mi " " meng " " men " ,
 33                   " mei " " me " " mao " " mang " " man " " mai " " ma " " luo " " lun " ,
 34                   " lue " " luan " " lv " " lu " " lou " " long " " liu " " ling " " lin " ,
 35                   " lie " " liao " " liang " " lian " " lia " " li " " leng " " lei " ,
 36                   " le " " lao " " lang " " lan " " lai " " la " " kuo " " kun " " kui " ,
 37                   " kuang " " kuan " " kuai " " kua " " ku " " kou " " kong " " keng " ,
 38                   " ken " " ke " " kao " " kang " " kan " " kai " " ka " " jun " " jue " ,
 39                   " juan " " ju " " jiu " " jiong " " jing " " jin " " jie " " jiao " ,
 40                   " jiang " " jian " " jia " " ji " " huo " " hun " " hui " " huang " ,
 41                   " huan " " huai " " hua " " hu " " hou " " hong " " heng " " hen " ,
 42                   " hei " " he " " hao " " hang " " han " " hai " " ha " " guo " " gun " ,
 43                   " gui " " guang " " guan " " guai " " gua " " gu " " gou " " gong " ,
 44                   " geng " " gen " " gei " " ge " " gao " " gang " " gan " " gai " " ga " ,
 45                   " fu " " fou " " fo " " feng " " fen " " fei " " fang " " fan " " fa " ,
 46                   " er " " en " " e " " duo " " dun " " dui " " duan " " du " " dou " ,
 47                   " dong " " diu " " ding " " die " " diao " " dian " " di " " deng " ,
 48                   " de " " dao " " dang " " dan " " dai " " da " " cuo " " cun " " cui " ,
 49                   " cuan " " cu " " cou " " cong " " ci " " chuo " " chun " " chui " ,
 50                   " chuang " " chuan " " chuai " " chu " " chou " " chong " " chi " ,
 51                   " cheng " " chen " " che " " chao " " chang " " chan " " chai " " cha " ,
 52                   " ceng " " ce " " cao " " cang " " can " " cai " " ca " " bu " " bo " ,
 53                   " bing " " bin " " bie " " biao " " bian " " bi " " beng " " ben " ,
 54                   " bei " " bao " " bang " " ban " " bai " " ba " " ao " " ang " " an " ,
 55                   " ai " " a "  };
 56 
 57           private   static   int [] ALL_CODE  =  {  - 10254 - 10256 - 10260 - 10262 ,
 58                   - 10270 - 10274 - 10281 - 10296 - 10307 - 10309 - 10315 - 10322 ,
 59                   - 10328 - 10329 - 10331 - 10519 - 10533 - 10544 - 10587 - 10764 ,
 60                   - 10780 - 10790 - 10800 - 10815 - 10832 - 10838 - 11014 - 11018 ,
 61                   - 11019 - 11020 - 11024 - 11038 - 11041 - 11045 - 11052 - 11055 ,
 62                   - 11067 - 11077 - 11097 - 11303 - 11324 - 11339 - 11340 - 11358 ,
 63                   - 11536 - 11589 - 11604 - 11781 - 11798 - 11831 - 11847 - 11861 ,
 64                   - 11867 - 12039 - 12058 - 12067 - 12074 - 12089 - 12099 - 12120 ,
 65                   - 12300 - 12320 - 12346 - 12359 - 12556 - 12585 - 12594 - 12597 ,
 66                   - 12607 - 12802 - 12812 - 12829 - 12831 - 12838 - 12849 - 12852 ,
 67                   - 12858 - 12860 - 12871 - 12875 - 12888 - 13060 - 13063 - 13068 ,
 68                   - 13076 - 13091 - 13095 - 13096 - 13107 - 13120 - 13138 - 13147 ,
 69                   - 13318 - 13326 - 13329 - 13340 - 13343 - 13356 - 13359 - 13367 ,
 70                   - 13383 - 13387 - 13391 - 13395 - 13398 - 13400 - 13404 - 13406 ,
 71                   - 13601 - 13611 - 13658 - 13831 - 13847 - 13859 - 13870 - 13878 ,
 72                   - 13894 - 13896 - 13905 - 13906 - 13907 - 13910 - 13914 - 13917 ,
 73                   - 14083 - 14087 - 14090 - 14092 - 14094 - 14097 - 14099 - 14109 ,
 74                   - 14112 - 14122 - 14123 - 14125 - 14135 - 14137 - 14140 - 14145 ,
 75                   - 14149 - 14151 - 14159 - 14170 - 14345 - 14353 - 14355 - 14368 ,
 76                   - 14379 - 14384 - 14399 - 14407 - 14429 - 14594 - 14630 - 14645 ,
 77                   - 14654 - 14663 - 14668 - 14670 - 14674 - 14678 - 14857 - 14871 ,
 78                   - 14873 - 14882 - 14889 - 14894 - 14902 - 14908 - 14914 - 14921 ,
 79                   - 14922 - 14926 - 14928 - 14929 - 14930 - 14933 - 14937 - 14941 ,
 80                   - 15109 - 15110 - 15117 - 15119 - 15121 - 15128 - 15139 - 15140 ,
 81                   - 15141 - 15143 - 15144 - 15149 - 15150 - 15153 - 15158 - 15165 ,
 82                   - 15180 - 15183 - 15362 - 15363 - 15369 - 15375 - 15377 - 15385 ,
 83                   - 15394 - 15408 - 15416 - 15419 - 15435 - 15436 - 15448 - 15454 ,
 84                   - 15625 - 15631 - 15640 - 15652 - 15659 - 15661 - 15667 - 15681 ,
 85                   - 15701 - 15707 - 15878 - 15889 - 15903 - 15915 - 15920 - 15933 ,
 86                   - 15944 - 15958 - 15959 - 16155 - 16158 - 16169 - 16171 - 16180 ,
 87                   - 16187 - 16202 - 16205 - 16212 - 16216 - 16220 - 16393 - 16401 ,
 88                   - 16403 - 16407 - 16412 - 16419 - 16423 - 16427 - 16429 - 16433 ,
 89                   - 16448 - 16452 - 16459 - 16465 - 16470 - 16474 - 16647 - 16657 ,
 90                   - 16664 - 16689 - 16706 - 16708 - 16733 - 16915 - 16942 - 16970 ,
 91                   - 16983 - 17185 - 17202 - 17417 - 17427 - 17433 - 17454 - 17468 ,
 92                   - 17482 - 17487 - 17496 - 17676 - 17683 - 17692 - 17697 - 17701 ,
 93                   - 17703 - 17721 - 17730 - 17733 - 17752 - 17759 - 17922 - 17928 ,
 94                   - 17931 - 17947 - 17950 - 17961 - 17964 - 17970 - 17988 - 17997 ,
 95                   - 18012 - 18181 - 18183 - 18184 - 18201 - 18211 - 18220 - 18231 ,
 96                   - 18237 - 18239 - 18446 - 18447 - 18448 - 18463 - 18478 - 18490 ,
 97                   - 18501 - 18518 - 18526 - 18696 - 18697 - 18710 - 18722 - 18731 ,
 98                   - 18735 - 18741 - 18756 - 18763 - 18773 - 18774 - 18783 - 18952 ,
 99                   - 18961 - 18977 - 18996 - 19003 - 19006 - 19018 - 19023 - 19038 ,
100                   - 19212 - 19218 - 19224 - 19227 - 19235 - 19238 - 19242 - 19243 ,
101                   - 19249 - 19261 - 19263 - 19270 - 19275 - 19281 - 19288 - 19289 ,
102                   - 19467 - 19479 - 19484 - 19500 - 19515 - 19525 - 19531 - 19540 ,
103                   - 19715 - 19725 - 19728 - 19739 - 19741 - 19746 - 19751 - 19756 ,
104                   - 19763 - 19774 - 19775 - 19784 - 19805 - 19976 - 19982 - 19986 ,
105                   - 19990 - 20002 - 20026 - 20032 - 20036 - 20051 - 20230 - 20242 ,
106                   - 20257 - 20265 - 20283 - 20292 - 20295 - 20304 - 20317 - 20319  };
107 
108           public   static   string  getAllPY( string  gb2312)
109          {
110               if  ( null   ==  gb2312  ||   "" .Equals(gb2312.Trim()))
111              {
112                   return  gb2312;
113              }
114               char [] chars  =  gb2312.ToCharArray();
115              StringBuilder retuBuf  =   new  StringBuilder();
116               for  ( int  i  =   0 , Len  =  chars.Length; i  <  Len; i ++ )
117              {
118                  retuBuf.Append(getAllPY(chars[i]));
119              }  //  end of for
120               return  retuBuf.ToString();
121          }
122 
123           public   static   string  getAllPY( char  gb2312)
124          {
125               int  ascii  =  getCnAscii(gb2312);
126               if  (ascii  ==   0 )
127              {  //  取ascii时出错
128                   return   new   string (gb2312,  1 );
129              }
130               else
131              {
132                   string  spell  =  getSpellByAscii(ascii);
133                   if  (spell  ==   null )
134                  {
135                       return   new   string (gb2312,  1 );
136                  }
137                   else
138                  {
139                       return  spell;
140                  }  //  end of if spell == null
141              }
142          }
143 
144           public   static   char  getFirstPY( char  ch)
145          {
146               if  (ch  >=   0   &&  ch  <=   0x7F )
147              {
148                   return  ch;
149              }
150               int  gb  =   0 ;
151 
152               byte [] bytes  =  Encoding.GetEncoding( " gb2312 " ).GetBytes( new   string (ch,  1 ));
153               if  (bytes.Length  <   2 )
154              {
155                  gb  =  byte2Int(bytes[ 0 ]);
156              }
157              gb  =  (bytes[ 0 <<   8   &   0xff00 +  (bytes[ 1 &   0xff );
158               if  (gb  <  FIRST_TABLE[ 0 ])
159                   return  ch;
160               int  i;
161               for  (i  =   0 ; i  <   26 ++ i)
162              {
163                   if  (match(i, gb))
164                       break ;
165              }
166               if  (i  >=   26 )
167                   return  ch;
168               else
169                   return  ( char )( 65   +  i);
170          }
171 
172           public   static   string  getFirstPY( string  src)
173          {
174              StringBuilder sb  =   new  StringBuilder();
175               int  len  =  src.Length;
176               int  i;
177               for  (i  =   0 ; i  <  len; i ++ )
178              {
179                  sb.Append(getFirstPY(src[i]));
180              }
181               return  sb.ToString();
182          }
183 
184           private   static   int  getCnAscii( char  cn)
185          {
186               byte [] bytes  =   null ;
187              bytes  =  Encoding.GetEncoding( " gb2312 " ).GetBytes( new   string (cn,  1 ));
188               if  (bytes  ==   null   ||  bytes.Length  >   2   ||  bytes.Length  <=   0 )
189              {
190                   return   0 ;
191              }
192               if  (bytes.Length  ==   1 )
193              {
194                   return  bytes[ 0 ];
195              }
196               else
197              {
198                   int  hightByte  =  bytes[ 0 ];
199                   int  lowByte  =  bytes[ 1 ];
200                   int  ascii  =  ( 256   *  hightByte  +  lowByte)  -   256   *   256 ;
201                   return  ascii;
202              }
203          }
204 
205           private   static   string  getSpellByAscii( int  ascii)
206          {
207               if  (ascii  >   0   &&  ascii  <   160 )
208              {  //  单字符
209                   return   new   string (( char )ascii,  1 );
210              }
211               if  (ascii  <   - 20319   ||  ascii  >   - 10247 )
212              {  //  不知道的字符
213                   return   null ;
214              }
215               int  first  =   0 ;
216               int  sLast  =  ALL_CODE.Length  -   1 ;
217               int  last  =  ALL_CODE.Length  -   1 ;
218               int  mid;
219               int  temp;
220               while  ( true )
221              {
222                  mid  =  (first  +  last)  >>   1 ;
223                   if  (ascii  ==  ALL_CODE[mid])
224                  {
225                       return  ALL_VALUE[mid];
226                  }
227                   else   if  (ascii  >  ALL_CODE[mid])
228                  {
229                      temp  =  mid  -   1 ;
230                       if  (temp  >=   0 )
231                      {
232                           if  (ascii  <  ALL_CODE[temp])
233                          {
234                               return  ALL_VALUE[mid];
235                          }
236                           else
237                          {
238                              last  =  mid;
239                          }
240                      }
241                       else
242                      {
243                           return  ALL_VALUE[ 0 ];
244                      }
245                  }
246                   else
247                  {
248                       if  (mid  +   1   <=  sLast)
249                      {
250                          first  =  mid  +   1 ;
251                      }
252                       else
253                      {
254                           return  ALL_VALUE[sLast];
255                      }
256                  }
257              }
258          }
259 
260           private   static   bool  match( int  i,  int  gb)
261          {
262               if  (gb  <  FIRST_TABLE[i])
263              {
264                   return   false ;
265              }
266               int  j  =  i  +   1 ;
267               //  字母Z使用了两个标签
268               while  (j  <   26   &&  (FIRST_TABLE[j]  ==  FIRST_TABLE[i]))
269              {
270                   ++ j;
271              }
272               if  (j  ==   26 )
273                   return  gb  <=  FIRST_TABLE[j];
274               else
275                   return  gb  <  FIRST_TABLE[j];
276          }
277 
278           private   static   int  byte2Int( byte  b)
279          {
280               if  (b  <   0 )
281              {
282                   return   256   +  b;
283              }
284               else
285              {
286                   return  b;
287              }
288          }
289 
290           public   static   bool  isSpliter( char  c)
291          {
292               char [] spliter  =  {  ' , ' ' ' ' ; ' ' '  };
293               foreach  ( char  cc  in  spliter)
294              {
295                   if  (c  ==  cc)
296                  {
297                       return   true ;
298                  }
299              }
300               return   false ;
301          }
302 
303           public   static   string [] split( string  src)
304          {
305               string  text  =  src.Trim();
306              StringBuilder sb  =   new  StringBuilder();
307              ArrayList al  =   new  ArrayList();
308               int  i  =   0 ;
309               // 跳过之前的分隔符
310               for  (i  =   0 ; i  <  text.Length; i ++ )
311              {
312                   if  ( ! isSpliter(text[i]))
313                  {
314                       break ;
315                  }
316              }
317               for  (; i  <  text.Length; i ++ )
318              {
319                   if  (isSpliter(text[i]))
320                  {
321                       if  (sb.Length  >   0 )
322                      {
323                          al.Add(sb.ToString());
324                      }
325                      sb  =   new  StringBuilder();
326                  }
327                   else
328                  {
329                      sb.Append(text[i]);
330                  }
331              }
332               if  (sb.Length  >   0 )
333              {
334                  al.Add(sb.ToString());
335              }
336               if  (al.Count  >   0 )
337              {
338                   string [] ret  =   new   string [al.Count];
339                   for  (i  =   0 ; i  <  al.Count; i ++ )
340                  {
341                      ret[i]  =  ( string )al[i];
342                  }
343                   return  ret;
344              }
345               else
346              {
347                   return   null ;
348              }
349          }
350      }

你可能感兴趣的:(工具类)