汉字转拼音android实现(HanziToPinyin类)

     

转自:http://blog.csdn.net/spare_h/article/details/6733547

 

   对Contacts了解就会知道(不了解可以了解下),每个联系人都有一个sort_key字段,如果查询中没有设置sortOrder,默认就会以 sort_key字段为排序依据。名字的检索其实也是根据sort_key来做的(比如拨号盘的模糊匹配:数字转成字母,再到拼音,最后得到汉字)。sort_key是根据名字生成的:如果联系人名字中包含字母,sort_key和name保持一致;如果名字是汉字,生成的sort_key,拼音-汉字-拼音-汉字。。。其中拼音全大写,中间以空格分割,如:”张三“-----sort_key:"ZHANG 张 SAN 三"。

       当名字发生变化Sort_key是在packages/providers/ContactsProvider/src/com/android /providers/contacts/ContactsProvider2.java中生成的。具体调用如下:

[java] view plain copy print ?
  1. if (displayNameStyle == FullNameStyle.CHINESE ||  
  2.                     displayNameStyle == FullNameStyle.CJK) {  
  3.                 sortKeyPrimary = sortKeyAlternative =  
  4.                         ContactLocaleUtils.getIntance().getSortKey(  
  5.                                 displayNamePrimary, displayNameStyle);  
  6.             }  
if (displayNameStyle == FullNameStyle.CHINESE || displayNameStyle == FullNameStyle.CJK) { sortKeyPrimary = sortKeyAlternative = ContactLocaleUtils.getIntance().getSortKey( displayNamePrimary, displayNameStyle); }
       调用了packages\providers\contactsprovider\src\com\android\providers \contacts\ContactLocaleUtils.java中的ChineseContactUtils.getSortKey()方法,具体实现如下:
[java] view plain copy print ?
  1. ContactLocaleUtils.getIntance();  
ContactLocaleUtils.getIntance();

取得实例,然后调用

[java] view plain copy print ?
  1. 1.  public String getSortKey(String displayName, int nameStyle) {  
  2.         return getForSort(Integer.valueOf(nameStyle)).getSortKey(displayName);  
  3.     }  
  4.   
  5. 2.     @Override  
  6.         public String getSortKey(String displayName) {  
  7.             ArrayList<Token> tokens = HanziToPinyin.getInstance().get(displayName);  
  8.             if (tokens != null && tokens.size() > 0) {  
  9.                 StringBuilder sb = new StringBuilder();  
  10.                 for (Token token : tokens) {  
  11.                     // Put Chinese character's pinyin, then proceed with the   
  12.                     // character itself.   
  13.                     if (Token.PINYIN == token.type) {  
  14.                         if (sb.length() > 0) {  
  15.                             sb.append(' ');  
  16.                         }  
  17.                         sb.append(token.target);  
  18.                         sb.append(' ');  
  19.                         sb.append(token.source);  
  20.                     } else {  
  21.                         if (sb.length() > 0) {  
  22.                             sb.append(' ');  
  23.                         }  
  24.                         sb.append(token.source);  
  25.                     }  
  26.                 }  
  27.                 return sb.toString();  
  28.             }  
  29.             return super.getSortKey(displayName);  
  30.         }  
1. public String getSortKey(String displayName, int nameStyle) { return getForSort(Integer.valueOf(nameStyle)).getSortKey(displayName); } 2. @Override public String getSortKey(String displayName) { ArrayList<Token> tokens = HanziToPinyin.getInstance().get(displayName); if (tokens != null && tokens.size() > 0) { StringBuilder sb = new StringBuilder(); for (Token token : tokens) { // Put Chinese character's pinyin, then proceed with the // character itself. if (Token.PINYIN == token.type) { if (sb.length() > 0) { sb.append(' '); } sb.append(token.target); sb.append(' '); sb.append(token.source); } else { if (sb.length() > 0) { sb.append(' '); } sb.append(token.source); } } return sb.toString(); } return super.getSortKey(displayName); }
      以上调用了frameworks/base/core/java/com /android/internal/util/HanziToPinyin.java(下文附源码)中的方法得到了拼音,当然转换实现需要本地的支持,但是此处只要取得这个文件即可完成在android上从汉字到拼音的转换。从路径可以看出HanziToPinyin.java是一个内部类,不能直接调用,不过可以把这个文件取出来放在自己的项目中直接使用。调用也很简单方便,上边基本给出了,看下HanziToPinyin的源码就比较清楚了。以下是我写的一个调用:输入汉字返回拼音,字母原样返回,都转换为小写(默认取得的拼音全大写)。

[java] view plain copy print ?
  1. package com.spare.pinyin;  
  2.   
  3. import java.util.ArrayList;  
  4.   
  5. import com.spare.pinyin.HanziToPinyin.Token;  
  6.   
  7. public class PinYin {  
  8.     //汉字返回拼音,字母原样返回,都转换为小写(默认取得的拼音全大写)   
  9.     public static String getPinYin(String input) {  
  10.         ArrayList<Token> tokens = HanziToPinyin.getInstance().get(input);  
  11.         StringBuilder sb = new StringBuilder();  
  12.         if (tokens != null && tokens.size() > 0) {  
  13.             for (Token token : tokens) {  
  14.                 if (Token.PINYIN == token.type) {  
  15.                     sb.append(token.target);  
  16.                 } else {  
  17.                     sb.append(token.source);  
  18.                 }  
  19.             }  
  20.         }  
  21.         return sb.toString().toLowerCase();  
  22.     }  
  23. }  
package com.spare.pinyin; import java.util.ArrayList; import com.spare.pinyin.HanziToPinyin.Token; public class PinYin { //汉字返回拼音,字母原样返回,都转换为小写(默认取得的拼音全大写) public static String getPinYin(String input) { ArrayList<Token> tokens = HanziToPinyin.getInstance().get(input); StringBuilder sb = new StringBuilder(); if (tokens != null && tokens.size() > 0) { for (Token token : tokens) { if (Token.PINYIN == token.type) { sb.append(token.target); } else { sb.append(token.source); } } } return sb.toString().toLowerCase(); } }


以下附上frameworks/base /core/java/com/android/internal/util/HanziToPinyin.java源码

[java] view plain copy print ?
  1. /* 
  2.  * Copyright (C) 2009 The Android Open Source Project 
  3.  * 
  4.  * Licensed under the Apache License, Version 2.0 (the "License"); 
  5.  * you may not use this file except in compliance with the License. 
  6.  * You may obtain a copy of the License at 
  7.  * 
  8.  *      http://www.apache.org/licenses/LICENSE-2.0 
  9.  * 
  10.  * Unless required by applicable law or agreed to in writing, software 
  11.  * distributed under the License is distributed on an "AS IS" BASIS, 
  12.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
  13.  * See the License for the specific language governing permissions and 
  14.  * limitations under the License. 
  15.  */  
  16.   
  17. package com.android.internal.util;  
  18.   
  19. import android.text.TextUtils;  
  20. import android.util.Log;  
  21.   
  22. import java.text.Collator;  
  23. import java.util.ArrayList;  
  24. import java.util.Locale;  
  25.   
  26. /** 
  27.  * An object to convert Chinese character to its corresponding pinyin string. 
  28.  * For characters with multiple possible pinyin string, only one is selected 
  29.  * according to collator. Polyphone is not supported in this implementation. 
  30.  * This class is implemented to achieve the best runtime performance and minimum 
  31.  * runtime resources with tolerable sacrifice of accuracy. This implementation 
  32.  * highly depends on zh_CN ICU collation data and must be always synchronized with 
  33.  * ICU. 
  34.  */  
  35. public class HanziToPinyin {  
  36.     private static final String TAG = "HanziToPinyin";  
  37.   
  38.     private static final char[] UNIHANS = {  
  39.             '\u5416''\u54ce''\u5b89''\u80ae''\u51f9''\u516b''\u63b0''\u6273',  
  40.             '\u90a6''\u52f9''\u9642''\u5954''\u4f3b''\u7680''\u782d''\u706c',  
  41.             '\u618b''\u6c43''\u51ab''\u7676''\u5cec''\u5693''\u5072''\u53c2',  
  42.             '\u4ed3''\u64a1''\u518a''\u5d7e''\u564c''\u6260''\u62c6''\u8fbf',  
  43.             '\u4f25''\u6284''\u8f66''\u62bb''\u9637''\u5403''\u5145''\u62bd',  
  44.             '\u51fa''\u640b''\u5ddb''\u5205''\u5439''\u65fe''\u8e14''\u5472',  
  45.             '\u4ece''\u51d1''\u7c97''\u6c46''\u5d14''\u90a8''\u6413''\u5491',  
  46.             '\u5446''\u4e39''\u5f53''\u5200''\u6074''\u6265''\u706f''\u4efe',  
  47.             '\u55f2''\u6541''\u5201''\u7239''\u4e01''\u4e1f''\u4e1c''\u543a',  
  48.             '\u5262''\u8011''\u5796''\u5428''\u591a''\u59b8''\u5940''\u97a5',  
  49.             '\u800c''\u53d1''\u5e06''\u531a''\u98de''\u5206''\u4e30''\u8985',  
  50.             '\u4ecf''\u57ba''\u7d11''\u592b''\u7324''\u65ee''\u4f85''\u5e72',  
  51.             '\u5188''\u768b''\u6208''\u7ed9''\u6839''\u63ef''\u55bc''\u55f0',  
  52.             '\u5de5''\u52fe''\u4f30''\u9e39''\u4e56''\u5173''\u5149''\u5f52',  
  53.             '\u4e28''\u8b34''\u5459''\u598e''\u548d''\u4f44''\u592f''\u8320',  
  54.             '\u8bc3''\u9ed2''\u62eb''\u4ea8''\u53ff''\u9f41''\u4e4e''\u82b1',  
  55.             '\u6000''\u6b22''\u5ddf''\u7070''\u660f''\u5419''\u4e0c''\u52a0',  
  56.             '\u620b''\u6c5f''\u827d''\u9636''\u5dfe''\u5755''\u5182''\u4e29',  
  57.             '\u51e5''\u59e2''\u5658''\u519b''\u5494''\u5f00''\u938e''\u5ffc',  
  58.             '\u5c3b''\u533c''\u808e''\u52a5''\u7a7a''\u62a0''\u625d''\u5938',  
  59.             '\u84af''\u5bbd''\u5321''\u4e8f''\u5764''\u6269''\u62c9''\u4f86',  
  60.             '\u5170''\u5577''\u635e''\u4ec2''\u96f7''\u8137''\u68f1''\u695e',  
  61.             '\u550e''\u4fe9''\u5afe''\u826f''\u8e7d''\u57d3''\u53b8''\u62ce',  
  62.             '\u6e9c''\u9f99''\u5a04''\u565c''\u5b6a''\u62a1''\u9831''\u5988',  
  63.             '\u57cb''\u989f''\u7264''\u732b''\u5445''\u95e8''\u6c13''\u54aa',  
  64.             '\u5b80''\u55b5''\u4e5c''\u6c11''\u540d''\u8c2c''\u6478''\u725f',  
  65.             '\u6bcd''\u62cf''\u8149''\u56e1''\u56d4''\u5b6c''\u8bb7''\u5a1e',  
  66.             '\u5ae9''\u80fd''\u92b0''\u62c8''\u5a18''\u9e1f''\u634f''\u56dc',  
  67.             '\u5b81''\u599e''\u519c''\u7fba''\u5974''\u597b''\u9ec1''\u90cd',  
  68.             '\u5662''\u8bb4''\u5991''\u62cd''\u7705''\u6c78''\u629b''\u5478',  
  69.             '\u55b7''\u5309''\u4e76''\u7247''\u527d''\u6c15''\u59d8''\u4e52',  
  70.             '\u948b''\u5256''\u4ec6''\u4e03''\u6390''\u5343''\u545b''\u6084',  
  71.             '\u5207''\u4eb2''\u9751''\u5b86''\u74d7''\u533a''\u5cd1''\u7094',  
  72.             '\u590b''\u5465''\u7a63''\u835b''\u60f9''\u4eba''\u6254''\u65e5',  
  73.             '\u620e''\u53b9''\u909a''\u5827''\u6875''\u95f0''\u633c''\u4ee8',  
  74.             '\u6be2''\u4e09''\u6852''\u63bb''\u8272''\u68ee''\u50e7''\u6740',  
  75.             '\u7b5b''\u5c71''\u4f24''\u5f30''\u5962''\u7533''\u5347''\u5c38',  
  76.             '\u53ce''\u4e66''\u5237''\u8870''\u95e9''\u53cc''\u8c01''\u542e',  
  77.             '\u8bf4''\u53b6''\u5fea''\u51c1''\u82cf''\u72fb''\u590a''\u5b59',  
  78.             '\u5506''\u4ed6''\u5b61''\u574d''\u6c64''\u5932''\u5fd1''\u81af',  
  79.             '\u5254''\u5929''\u65eb''\u6017''\u5385''\u70b5''\u5077''\u51f8',  
  80.             '\u6e4d''\u63a8''\u541e''\u8bac''\u52b8''\u6b6a''\u5f2f''\u5c23',  
  81.             '\u5371''\u6637''\u7fc1''\u631d''\u4e4c''\u5915''\u5477''\u4ed9',  
  82.             '\u4e61''\u7071''\u4e9b''\u5fc3''\u5174''\u51f6''\u4f11''\u620c',  
  83.             '\u5405''\u75b6''\u7025''\u4e2b''\u54bd''\u592e''\u5e7a''\u503b',  
  84.             '\u4e00''\u4e5a''\u5e94''\u5537''\u4f63''\u4f18''\u7ea1''\u56e6',  
  85.             '\u66f0''\u8480''\u5e00''\u707d''\u5142''\u7242''\u50ae''\u556b',  
  86.             '\u9c61''\u600e''\u66fd''\u5412''\u635a''\u6cbe''\u5f20''\u4f4b',  
  87.             '\u8707''\u8d1e''\u9eee''\u4e4b''\u4e2d''\u5dde''\u6731''\u6293',  
  88.             '\u62fd''\u4e13''\u5986''\u96b9''\u5b92''\u5353''\u4ed4''\u5b97',  
  89.             '\u90b9''\u79df''\u5297''\u55fa''\u5c0a''\u6628',  
  90.         };  
  91.     private final static byte[][] PINYINS = {  
  92.             {650000000000, }, {657300000000, },  
  93.             {657800000000, }, {657871000000, },  
  94.             {657900000000, }, {666500000000, },  
  95.             {666573000000, }, {666578000000, },  
  96.             {666578710000, }, {666579000000, },  
  97.             {666973000000, }, {666978000000, },  
  98.             {666978710000, }, {667300000000, },  
  99.             {667365780000, }, {667365790000, },  
  100.             {667369000000, }, {667378000000, },  
  101.             {667378710000, }, {667900000000, },  
  102.             {668500000000, }, {676500000000, },  
  103.             {676573000000, }, {676578000000, },  
  104.             {676578710000, }, {676579000000, },  
  105.             {676900000000, }, {676978000000, },  
  106.             {676978710000, }, {677265000000, },  
  107.             {677265730000, }, {677265780000, },  
  108.             {677265787100, }, {677265790000, },  
  109.             {677269000000, }, {677269780000, },  
  110.             {677269787100, }, {677273000000, },  
  111.             {677279787100, }, {677279850000, },  
  112.             {677285000000, }, {677285657300, },  
  113.             {677285657800, }, {677285657871, },  
  114.             {677285730000, }, {677285780000, },  
  115.             {677285790000, }, {677300000000, },  
  116.             {677978710000, }, {677985000000, },  
  117.             {678500000000, }, {678565780000, },  
  118.             {678573000000, }, {678578000000, },  
  119.             {678579000000, }, {686500000000, },  
  120.             {686573000000, }, {686578000000, },  
  121.             {686578710000, }, {686579000000, },  
  122.             {686900000000, }, {686978000000, },  
  123.             {686978710000, }, {687300000000, },  
  124.             {687365000000, }, {687365780000, },  
  125.             {687365790000, }, {687369000000, },  
  126.             {687378710000, }, {687385000000, },  
  127.             {687978710000, }, {687985000000, },  
  128.             {688500000000, }, {688565780000, },  
  129.             {688573000000, }, {688578000000, },  
  130.             {688579000000, }, {690000000000, },  
  131.             {697800000000, }, {697871000000, },  
  132.             {698200000000, }, {706500000000, },  
  133.             {706578000000, }, {706578710000, },  
  134.             {706973000000, }, {706978000000, },  
  135.             {706978710000, }, {707365790000, },  
  136.             {707900000000, }, {708500000000, },  
  137.             {707985000000, }, {708500000000, },  
  138.             {718573000000, }, {716500000000, },  
  139.             {716573000000, }, {716578000000, },  
  140.             {716578710000, }, {716579000000, },  
  141.             {716900000000, }, {716973000000, },  
  142.             {716978000000, }, {716978710000, },  
  143.             {747369000000, }, {716900000000, },  
  144.             {717978710000, }, {717985000000, },  
  145.             {718500000000, }, {718565000000, },  
  146.             {718565730000, }, {718565780000, },  
  147.             {718565787100, }, {718573000000, },  
  148.             {718578000000, }, {718565780000, },  
  149.             {718579000000, }, {726500000000, },  
  150.             {726573000000, }, {726578000000, },  
  151.             {726578710000, }, {726579000000, },  
  152.             {726900000000, }, {726973000000, },  
  153.             {726978000000, }, {726978710000, },  
  154.             {727978710000, }, {727985000000, },  
  155.             {728500000000, }, {728565000000, },  
  156.             {728565730000, }, {728565780000, },  
  157.             {728565787100, }, {728573000000, },  
  158.             {728578000000, }, {728579000000, },  
  159.             {747300000000, }, {747365000000, },  
  160.             {747365780000, }, {747365787100, },  
  161.             {747365790000, }, {747369000000, },  
  162.             {747378000000, }, {747378710000, },  
  163.             {747379787100, }, {747385000000, },  
  164.             {748500000000, }, {748565780000, },  
  165.             {748569000000, }, {748578000000, },  
  166.             {756500000000, }, {756573000000, },  
  167.             {756578000000, }, {756578710000, },  
  168.             {756579000000, }, {756900000000, },  
  169.             {756978000000, }, {756978710000, },  
  170.             {757978710000, }, {757985000000, },  
  171.             {758500000000, }, {758565000000, },  
  172.             {758565730000, }, {758565780000, },  
  173.             {758565787100, }, {758573000000, },  
  174.             {758578000000, }, {758579000000, },  
  175.             {766500000000, }, {766573000000, },  
  176.             {766578000000, }, {766578710000, },  
  177.             {766579000000, }, {766900000000, },  
  178.             {766973000000, }, {767300000000, },  
  179.             {767378710000, }, {766978710000, },  
  180.             {767300000000, }, {767365000000, },  
  181.             {767365780000, }, {767365787100, },  
  182.             {767365790000, }, {767369000000, },  
  183.             {767378000000, }, {767378710000, },  
  184.             {767385000000, }, {767978710000, },  
  185.             {767985000000, }, {768500000000, },  
  186.             {768565780000, }, {768578000000, },  
  187.             {768579000000, }, {776500000000, },  
  188.             {776573000000, }, {776578000000, },  
  189.             {776578710000, }, {776579000000, },  
  190.             {776973000000, }, {776978000000, },  
  191.             {776978710000, }, {777300000000, },  
  192.             {777365780000, }, {777365790000, },  
  193.             {777369000000, }, {777378000000, },  
  194.             {777378710000, }, {777385000000, },  
  195.             {777900000000, }, {777985000000, },  
  196.             {778500000000, }, {786500000000, },  
  197.             {786573000000, }, {786578000000, },  
  198.             {786578710000, }, {786579000000, },  
  199.             {786900000000, }, {786973000000, },  
  200.             {786978000000, }, {786978710000, },  
  201.             {787300000000, }, {787365780000, },  
  202.             {787365787100, }, {787365790000, },  
  203.             {787369000000, }, {787378000000, },  
  204.             {787378710000, }, {787385000000, },  
  205.             {787978710000, }, {787985000000, },  
  206.             {788500000000, }, {788565780000, },  
  207.             {788578000000, }, {788579000000, },  
  208.             {790000000000, }, {798500000000, },  
  209.             {806500000000, }, {806573000000, },  
  210.             {806578000000, }, {806578710000, },  
  211.             {806579000000, }, {806973000000, },  
  212.             {806978000000, }, {806978710000, },  
  213.             {807300000000, }, {807365780000, },  
  214.             {807365790000, }, {807369000000, },  
  215.             {807378000000, }, {807378710000, },  
  216.             {807900000000, }, {807985000000, },  
  217.             {808500000000, }, {817300000000, },  
  218.             {817365000000, }, {817365780000, },  
  219.             {817365787100, }, {817365790000, },  
  220.             {817369000000, }, {817378000000, },  
  221.             {817378710000, }, {817379787100, },  
  222.             {817385000000, }, {818500000000, },  
  223.             {818565780000, }, {818569000000, },  
  224.             {818578000000, }, {826578000000, },  
  225.             {826578710000, }, {826579000000, },  
  226.             {826900000000, }, {826978000000, },  
  227.             {826978710000, }, {827300000000, },  
  228.             {827978710000, }, {827985000000, },  
  229.             {828500000000, }, {828565780000, },  
  230.             {828573000000, }, {828578000000, },  
  231.             {828579000000, }, {836500000000, },  
  232.             {836573000000, }, {836578000000, },  
  233.             {836578710000, }, {836579000000, },  
  234.             {836900000000, }, {836978000000, },  
  235.             {836978710000, }, {837265000000, },  
  236.             {837265730000, }, {837265780000, },  
  237.             {837265787100, }, {837265790000, },  
  238.             {837269000000, }, {837269780000, },  
  239.             {837269787100, }, {837273000000, },  
  240.             {837279850000, }, {837285000000, },  
  241.             {837285650000, }, {837285657300, },  
  242.             {837285657800, }, {837285657871, },  
  243.             {837285730000, }, {837285780000, },  
  244.             {837285790000, }, {837300000000, },  
  245.             {837978710000, }, {837985000000, },  
  246.             {838500000000, }, {838565780000, },  
  247.             {838573000000, }, {838578000000, },  
  248.             {838579000000, }, {846500000000, },  
  249.             {846573000000, }, {846578000000, },  
  250.             {846578710000, }, {846579000000, },  
  251.             {846900000000, }, {846978710000, },  
  252.             {847300000000, }, {847365780000, },  
  253.             {847365790000, }, {847369000000, },  
  254.             {847378710000, }, {847978710000, },  
  255.             {847985000000, }, {848500000000, },  
  256.             {848565780000, }, {848573000000, },  
  257.             {848578000000, }, {848579000000, },  
  258.             {876500000000, }, {876573000000, },  
  259.             {876578000000, }, {876578710000, },  
  260.             {876973000000, }, {876978000000, },  
  261.             {876978710000, }, {877900000000, },  
  262.             {878500000000, }, {887300000000, },  
  263.             {887365000000, }, {887365780000, },  
  264.             {887365787100, }, {887365790000, },  
  265.             {887369000000, }, {887378000000, },  
  266.             {887378710000, }, {887379787100, },  
  267.             {887385000000, }, {888500000000, },  
  268.             {888565780000, }, {888569000000, },  
  269.             {888578000000, }, {896500000000, },  
  270.             {896578000000, }, {896578710000, },  
  271.             {896579000000, }, {896900000000, },  
  272.             {897300000000, }, {897378000000, },  
  273.             {897378710000, }, {897900000000, },  
  274.             {897978710000, }, {897985000000, },  
  275.             {898500000000, }, {898565780000, },  
  276.             {898569000000, }, {898578000000, },  
  277.             {906500000000, }, {906573000000, },  
  278.             {906578000000, }, {906578710000, },  
  279.             {906579000000, }, {906900000000, },  
  280.             {906973000000, }, {906978000000, },  
  281.             {906978710000, }, {907265000000, },  
  282.             {907265730000, }, {907265780000, },  
  283.             {907265787100, }, {907265790000, },  
  284.             {907269000000, }, {907269780000, },  
  285.             {907269787100, }, {907273000000, },  
  286.             {907279787100, }, {907279850000, },  
  287.             {907285000000, }, {907285650000, },  
  288.             {907285657300, }, {907285657800, },  
  289.             {907285657871, }, {907285730000, },  
  290.             {907285780000, }, {907285790000, },  
  291.             {907300000000, }, {907978710000, },  
  292.             {907985000000, }, {908500000000, },  
  293.             {908565780000, }, {908573000000, },  
  294.             {908578000000, }, {908579000000, },  
  295.   
  296.         };  
  297.   
  298.     /** First and last Chinese character with known Pinyin according to zh collation */  
  299.     private static final String FIRST_PINYIN_UNIHAN =  "\u5416";  
  300.     private static final String LAST_PINYIN_UNIHAN =  "\u5497";  
  301.     /** The first Chinese character in Unicode block */  
  302.     private static final char FIRST_UNIHAN = '\u3400';  
  303.     private static final Collator COLLATOR = Collator.getInstance(Locale.CHINA);  
  304.   
  305.     private static HanziToPinyin sInstance;  
  306.     private final boolean mHasChinaCollator;  
  307.   
  308.     public static class Token {  
  309.         /** 
  310.          * Separator between target string for each source char 
  311.          */  
  312.         public static final String SEPARATOR = " ";  
  313.   
  314.         public static final int LATIN = 1;  
  315.         public static final int PINYIN = 2;  
  316.         public static final int UNKNOWN = 3;  
  317.   
  318.         public Token() {  
  319.         }  
  320.   
  321.         public Token(int type, String source, String target) {  
  322.             this.type = type;  
  323.             this.source = source;  
  324.             this.target = target;  
  325.         }  
  326.         /** 
  327.          * Type of this token, ASCII, PINYIN or UNKNOWN. 
  328.          */  
  329.         public int type;  
  330.         /** 
  331.          * Original string before translation. 
  332.          */  
  333.         public String source;  
  334.         /** 
  335.          * Translated string of source. For Han, target is corresponding Pinyin. 
  336.          * Otherwise target is original string in source. 
  337.          */  
  338.         public String target;  
  339.     }  
  340.   
  341.     protected HanziToPinyin(boolean hasChinaCollator) {  
  342.         mHasChinaCollator = hasChinaCollator;  
  343.     }  
  344.   
  345.     public static HanziToPinyin getInstance() {  
  346.         synchronized(HanziToPinyin.class) {  
  347.             if (sInstance != null) {  
  348.                 return sInstance;  
  349.             }  
  350.             // Check if zh_CN collation data is available   
  351.             final Locale locale[] = Collator.getAvailableLocales();  
  352.             for (int i = 0; i < locale.length; i++) {  
  353.                 if (locale[i].equals(Locale.CHINA)) {  
  354.                     sInstance = new HanziToPinyin(true);  
  355.                     return sInstance;  
  356.                 }  
  357.             }  
  358.             Log.w(TAG, "There is no Chinese collator, HanziToPinyin is disabled");  
  359.             sInstance = new HanziToPinyin(false);  
  360.             return sInstance;  
  361.         }  
  362.     }  
  363.   
  364.     private Token getToken(char character) {  
  365.         Token token = new Token();  
  366.         final String letter = Character.toString(character);  
  367.         token.source = letter;  
  368.         int offset = -1;  
  369.         int cmp;  
  370.         if (character < 256) {  
  371.             token.type = Token.LATIN;  
  372.             token.target = letter;  
  373.             return token;  
  374.         } else if (character < FIRST_UNIHAN) {  
  375.             token.type = Token.UNKNOWN;  
  376.             token.target = letter;  
  377.             return token;  
  378.         } else {  
  379.             cmp = COLLATOR.compare(letter, FIRST_PINYIN_UNIHAN);  
  380.             if (cmp < 0) {  
  381.                 token.type = Token.UNKNOWN;  
  382.                 token.target = letter;  
  383.                 return token;  
  384.             } else if (cmp == 0) {  
  385.                 token.type = Token.PINYIN;  
  386.                 offset = 0;  
  387.             } else {  
  388.                 cmp = COLLATOR.compare(letter, LAST_PINYIN_UNIHAN);  
  389.                 if (cmp > 0) {  
  390.                     token.type = Token.UNKNOWN;  
  391.                     token.target = letter;  
  392.                     return token;  
  393.                 } else if (cmp == 0) {  
  394.                     token.type = Token.PINYIN;  
  395.                     offset = UNIHANS.length - 1;  
  396.                 }  
  397.             }  
  398.         }  
  399.   
  400.         token.type = Token.PINYIN;  
  401.         if (offset < 0) {  
  402.             int begin = 0;  
  403.             int end = UNIHANS.length - 1;  
  404.             while (begin <= end) {  
  405.                 offset = (begin + end) / 2;  
  406.                 final String unihan = Character.toString(UNIHANS[offset]);  
  407.                 cmp = COLLATOR.compare(letter, unihan);  
  408.                 if (cmp == 0) {  
  409.                     break;  
  410.                 } else if (cmp > 0) {  
  411.                     begin = offset + 1;  
  412.                 } else {  
  413.                     end = offset - 1;  
  414.                 }  
  415.             }  
  416.         }  
  417.         if (cmp < 0) {  
  418.             offset--;  
  419.         }  
  420.         StringBuilder pinyin = new StringBuilder();  
  421.         for (int j = 0; j < PINYINS[offset].length && PINYINS[offset][j] != 0; j++) {  
  422.             pinyin.append((char)PINYINS[offset][j]);  
  423.         }  
  424.         token.target = pinyin.toString();  
  425.         return token;  
  426.     }  
  427.   
  428.     /** 
  429.      * Convert the input to a array of tokens. The sequence of ASCII or Unknown 
  430.      * characters without space will be put into a Token, One Hanzi character  
  431.      * which has pinyin will be treated as a Token. 
  432.      * If these is no China collator, the empty token array is returned. 
  433.      */  
  434.     public ArrayList<Token> get(final String input) {  
  435.         ArrayList<Token> tokens = new ArrayList<Token>();  
  436.         if (!mHasChinaCollator || TextUtils.isEmpty(input)) {  
  437.             // return empty tokens.   
  438.             return tokens;  
  439.         }  
  440.         final int inputLength = input.length();  
  441.         final StringBuilder sb = new StringBuilder();  
  442.         int tokenType = Token.LATIN;  
  443.         // Go through the input, create a new token when   
  444.         // a. Token type changed   
  445.         // b. Get the Pinyin of current charater.   
  446.         // c. current character is space.   
  447.         for (int i = 0; i < inputLength; i++) {  
  448.             final char character = input.charAt(i);  
  449.             if (character == ' ') {  
  450.                 if (sb.length() > 0) {  
  451.                     addToken(sb, tokens, tokenType);  
  452.                 }  
  453.             } else if (character < 256) {  
  454.                 if (tokenType != Token.LATIN && sb.length() > 0) {  
  455.                     addToken(sb, tokens, tokenType);  
  456.                 }  
  457.                 tokenType = Token.LATIN;  
  458.                 sb.append(character);  
  459.             } else if (character < FIRST_UNIHAN) {  
  460.                 if (tokenType != Token.UNKNOWN && sb.length() > 0) {  
  461.                     addToken(sb, tokens, tokenType);  
  462.                 }  
  463.                 tokenType = Token.UNKNOWN;  
  464.                 sb.append(character);  
  465.             } else {  
  466.                 Token t = getToken(character);  
  467.                 if (t.type == Token.PINYIN) {  
  468.                     if (sb.length() > 0) {  
  469.                         addToken(sb, tokens, tokenType);  
  470.                     }  
  471.                     tokens.add(t);  
  472.                     tokenType = Token.PINYIN;  
  473.                 } else {  
  474.                     if (tokenType != t.type && sb.length() > 0) {  
  475.                         addToken(sb, tokens, tokenType);  
  476.                     }  
  477.                     tokenType = t.type;  
  478.                     sb.append(character);  
  479.                 }  
  480.             }  
  481.         }  
  482.         if (sb.length() > 0) {  
  483.             addToken(sb, tokens, tokenType);  
  484.         }  
  485.         return tokens;  
  486.     }  
  487.   
  488.     private void addToken(final StringBuilder sb, final ArrayList<Token> tokens,  
  489.             final int tokenType) {  
  490.         String str = sb.toString();  
  491.         tokens.add(new Token(tokenType, str, str));  
  492.         sb.setLength(0);  
  493.     }  
  494.   
  495. }  
/* * Copyright (C) 2009 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.android.internal.util; import android.text.TextUtils; import android.util.Log; import java.text.Collator; import java.util.ArrayList; import java.util.Locale; /** * An object to convert Chinese character to its corresponding pinyin string. * For characters with multiple possible pinyin string, only one is selected * according to collator. Polyphone is not supported in this implementation. * This class is implemented to achieve the best runtime performance and minimum * runtime resources with tolerable sacrifice of accuracy. This implementation * highly depends on zh_CN ICU collation data and must be always synchronized with * ICU. */ public class HanziToPinyin { private static final String TAG = "HanziToPinyin"; private static final char[] UNIHANS = { '\u5416', '\u54ce', '\u5b89', '\u80ae', '\u51f9', '\u516b', '\u63b0', '\u6273', '\u90a6', '\u52f9', '\u9642', '\u5954', '\u4f3b', '\u7680', '\u782d', '\u706c', '\u618b', '\u6c43', '\u51ab', '\u7676', '\u5cec', '\u5693', '\u5072', '\u53c2', '\u4ed3', '\u64a1', '\u518a', '\u5d7e', '\u564c', '\u6260', '\u62c6', '\u8fbf', '\u4f25', '\u6284', '\u8f66', '\u62bb', '\u9637', '\u5403', '\u5145', '\u62bd', '\u51fa', '\u640b', '\u5ddb', '\u5205', '\u5439', '\u65fe', '\u8e14', '\u5472', '\u4ece', '\u51d1', '\u7c97', '\u6c46', '\u5d14', '\u90a8', '\u6413', '\u5491', '\u5446', '\u4e39', '\u5f53', '\u5200', '\u6074', '\u6265', '\u706f', '\u4efe', '\u55f2', '\u6541', '\u5201', '\u7239', '\u4e01', '\u4e1f', '\u4e1c', '\u543a', '\u5262', '\u8011', '\u5796', '\u5428', '\u591a', '\u59b8', '\u5940', '\u97a5', '\u800c', '\u53d1', '\u5e06', '\u531a', '\u98de', '\u5206', '\u4e30', '\u8985', '\u4ecf', '\u57ba', '\u7d11', '\u592b', '\u7324', '\u65ee', '\u4f85', '\u5e72', '\u5188', '\u768b', '\u6208', '\u7ed9', '\u6839', '\u63ef', '\u55bc', '\u55f0', '\u5de5', '\u52fe', '\u4f30', '\u9e39', '\u4e56', '\u5173', '\u5149', '\u5f52', '\u4e28', '\u8b34', '\u5459', '\u598e', '\u548d', '\u4f44', '\u592f', '\u8320', '\u8bc3', '\u9ed2', '\u62eb', '\u4ea8', '\u53ff', '\u9f41', '\u4e4e', '\u82b1', '\u6000', '\u6b22', '\u5ddf', '\u7070', '\u660f', '\u5419', '\u4e0c', '\u52a0', '\u620b', '\u6c5f', '\u827d', '\u9636', '\u5dfe', '\u5755', '\u5182', '\u4e29', '\u51e5', '\u59e2', '\u5658', '\u519b', '\u5494', '\u5f00', '\u938e', '\u5ffc', '\u5c3b', '\u533c', '\u808e', '\u52a5', '\u7a7a', '\u62a0', '\u625d', '\u5938', '\u84af', '\u5bbd', '\u5321', '\u4e8f', '\u5764', '\u6269', '\u62c9', '\u4f86', '\u5170', '\u5577', '\u635e', '\u4ec2', '\u96f7', '\u8137', '\u68f1', '\u695e', '\u550e', '\u4fe9', '\u5afe', '\u826f', '\u8e7d', '\u57d3', '\u53b8', '\u62ce', '\u6e9c', '\u9f99', '\u5a04', '\u565c', '\u5b6a', '\u62a1', '\u9831', '\u5988', '\u57cb', '\u989f', '\u7264', '\u732b', '\u5445', '\u95e8', '\u6c13', '\u54aa', '\u5b80', '\u55b5', '\u4e5c', '\u6c11', '\u540d', '\u8c2c', '\u6478', '\u725f', '\u6bcd', '\u62cf', '\u8149', '\u56e1', '\u56d4', '\u5b6c', '\u8bb7', '\u5a1e', '\u5ae9', '\u80fd', '\u92b0', '\u62c8', '\u5a18', '\u9e1f', '\u634f', '\u56dc', '\u5b81', '\u599e', '\u519c', '\u7fba', '\u5974', '\u597b', '\u9ec1', '\u90cd', '\u5662', '\u8bb4', '\u5991', '\u62cd', '\u7705', '\u6c78', '\u629b', '\u5478', '\u55b7', '\u5309', '\u4e76', '\u7247', '\u527d', '\u6c15', '\u59d8', '\u4e52', '\u948b', '\u5256', '\u4ec6', '\u4e03', '\u6390', '\u5343', '\u545b', '\u6084', '\u5207', '\u4eb2', '\u9751', '\u5b86', '\u74d7', '\u533a', '\u5cd1', '\u7094', '\u590b', '\u5465', '\u7a63', '\u835b', '\u60f9', '\u4eba', '\u6254', '\u65e5', '\u620e', '\u53b9', '\u909a', '\u5827', '\u6875', '\u95f0', '\u633c', '\u4ee8', '\u6be2', '\u4e09', '\u6852', '\u63bb', '\u8272', '\u68ee', '\u50e7', '\u6740', '\u7b5b', '\u5c71', '\u4f24', '\u5f30', '\u5962', '\u7533', '\u5347', '\u5c38', '\u53ce', '\u4e66', '\u5237', '\u8870', '\u95e9', '\u53cc', '\u8c01', '\u542e', '\u8bf4', '\u53b6', '\u5fea', '\u51c1', '\u82cf', '\u72fb', '\u590a', '\u5b59', '\u5506', '\u4ed6', '\u5b61', '\u574d', '\u6c64', '\u5932', '\u5fd1', '\u81af', '\u5254', '\u5929', '\u65eb', '\u6017', '\u5385', '\u70b5', '\u5077', '\u51f8', '\u6e4d', '\u63a8', '\u541e', '\u8bac', '\u52b8', '\u6b6a', '\u5f2f', '\u5c23', '\u5371', '\u6637', '\u7fc1', '\u631d', '\u4e4c', '\u5915', '\u5477', '\u4ed9', '\u4e61', '\u7071', '\u4e9b', '\u5fc3', '\u5174', '\u51f6', '\u4f11', '\u620c', '\u5405', '\u75b6', '\u7025', '\u4e2b', '\u54bd', '\u592e', '\u5e7a', '\u503b', '\u4e00', '\u4e5a', '\u5e94', '\u5537', '\u4f63', '\u4f18', '\u7ea1', '\u56e6', '\u66f0', '\u8480', '\u5e00', '\u707d', '\u5142', '\u7242', '\u50ae', '\u556b', '\u9c61', '\u600e', '\u66fd', '\u5412', '\u635a', '\u6cbe', '\u5f20', '\u4f4b', '\u8707', '\u8d1e', '\u9eee', '\u4e4b', '\u4e2d', '\u5dde', '\u6731', '\u6293', '\u62fd', '\u4e13', '\u5986', '\u96b9', '\u5b92', '\u5353', '\u4ed4', '\u5b97', '\u90b9', '\u79df', '\u5297', '\u55fa', '\u5c0a', '\u6628', }; private final static byte[][] PINYINS = { {65, 00, 00, 00, 00, 00, }, {65, 73, 00, 00, 00, 00, }, {65, 78, 00, 00, 00, 00, }, {65, 78, 71, 00, 00, 00, }, {65, 79, 00, 00, 00, 00, }, {66, 65, 00, 00, 00, 00, }, {66, 65, 73, 00, 00, 00, }, {66, 65, 78, 00, 00, 00, }, {66, 65, 78, 71, 00, 00, }, {66, 65, 79, 00, 00, 00, }, {66, 69, 73, 00, 00, 00, }, {66, 69, 78, 00, 00, 00, }, {66, 69, 78, 71, 00, 00, }, {66, 73, 00, 00, 00, 00, }, {66, 73, 65, 78, 00, 00, }, {66, 73, 65, 79, 00, 00, }, {66, 73, 69, 00, 00, 00, }, {66, 73, 78, 00, 00, 00, }, {66, 73, 78, 71, 00, 00, }, {66, 79, 00, 00, 00, 00, }, {66, 85, 00, 00, 00, 00, }, {67, 65, 00, 00, 00, 00, }, {67, 65, 73, 00, 00, 00, }, {67, 65, 78, 00, 00, 00, }, {67, 65, 78, 71, 00, 00, }, {67, 65, 79, 00, 00, 00, }, {67, 69, 00, 00, 00, 00, }, {67, 69, 78, 00, 00, 00, }, {67, 69, 78, 71, 00, 00, }, {67, 72, 65, 00, 00, 00, }, {67, 72, 65, 73, 00, 00, }, {67, 72, 65, 78, 00, 00, }, {67, 72, 65, 78, 71, 00, }, {67, 72, 65, 79, 00, 00, }, {67, 72, 69, 00, 00, 00, }, {67, 72, 69, 78, 00, 00, }, {67, 72, 69, 78, 71, 00, }, {67, 72, 73, 00, 00, 00, }, {67, 72, 79, 78, 71, 00, }, {67, 72, 79, 85, 00, 00, }, {67, 72, 85, 00, 00, 00, }, {67, 72, 85, 65, 73, 00, }, {67, 72, 85, 65, 78, 00, }, {67, 72, 85, 65, 78, 71, }, {67, 72, 85, 73, 00, 00, }, {67, 72, 85, 78, 00, 00, }, {67, 72, 85, 79, 00, 00, }, {67, 73, 00, 00, 00, 00, }, {67, 79, 78, 71, 00, 00, }, {67, 79, 85, 00, 00, 00, }, {67, 85, 00, 00, 00, 00, }, {67, 85, 65, 78, 00, 00, }, {67, 85, 73, 00, 00, 00, }, {67, 85, 78, 00, 00, 00, }, {67, 85, 79, 00, 00, 00, }, {68, 65, 00, 00, 00, 00, }, {68, 65, 73, 00, 00, 00, }, {68, 65, 78, 00, 00, 00, }, {68, 65, 78, 71, 00, 00, }, {68, 65, 79, 00, 00, 00, }, {68, 69, 00, 00, 00, 00, }, {68, 69, 78, 00, 00, 00, }, {68, 69, 78, 71, 00, 00, }, {68, 73, 00, 00, 00, 00, }, {68, 73, 65, 00, 00, 00, }, {68, 73, 65, 78, 00, 00, }, {68, 73, 65, 79, 00, 00, }, {68, 73, 69, 00, 00, 00, }, {68, 73, 78, 71, 00, 00, }, {68, 73, 85, 00, 00, 00, }, {68, 79, 78, 71, 00, 00, }, {68, 79, 85, 00, 00, 00, }, {68, 85, 00, 00, 00, 00, }, {68, 85, 65, 78, 00, 00, }, {68, 85, 73, 00, 00, 00, }, {68, 85, 78, 00, 00, 00, }, {68, 85, 79, 00, 00, 00, }, {69, 00, 00, 00, 00, 00, }, {69, 78, 00, 00, 00, 00, }, {69, 78, 71, 00, 00, 00, }, {69, 82, 00, 00, 00, 00, }, {70, 65, 00, 00, 00, 00, }, {70, 65, 78, 00, 00, 00, }, {70, 65, 78, 71, 00, 00, }, {70, 69, 73, 00, 00, 00, }, {70, 69, 78, 00, 00, 00, }, {70, 69, 78, 71, 00, 00, }, {70, 73, 65, 79, 00, 00, }, {70, 79, 00, 00, 00, 00, }, {70, 85, 00, 00, 00, 00, }, {70, 79, 85, 00, 00, 00, }, {70, 85, 00, 00, 00, 00, }, {71, 85, 73, 00, 00, 00, }, {71, 65, 00, 00, 00, 00, }, {71, 65, 73, 00, 00, 00, }, {71, 65, 78, 00, 00, 00, }, {71, 65, 78, 71, 00, 00, }, {71, 65, 79, 00, 00, 00, }, {71, 69, 00, 00, 00, 00, }, {71, 69, 73, 00, 00, 00, }, {71, 69, 78, 00, 00, 00, }, {71, 69, 78, 71, 00, 00, }, {74, 73, 69, 00, 00, 00, }, {71, 69, 00, 00, 00, 00, }, {71, 79, 78, 71, 00, 00, }, {71, 79, 85, 00, 00, 00, }, {71, 85, 00, 00, 00, 00, }, {71, 85, 65, 00, 00, 00, }, {71, 85, 65, 73, 00, 00, }, {71, 85, 65, 78, 00, 00, }, {71, 85, 65, 78, 71, 00, }, {71, 85, 73, 00, 00, 00, }, {71, 85, 78, 00, 00, 00, }, {71, 85, 65, 78, 00, 00, }, {71, 85, 79, 00, 00, 00, }, {72, 65, 00, 00, 00, 00, }, {72, 65, 73, 00, 00, 00, }, {72, 65, 78, 00, 00, 00, }, {72, 65, 78, 71, 00, 00, }, {72, 65, 79, 00, 00, 00, }, {72, 69, 00, 00, 00, 00, }, {72, 69, 73, 00, 00, 00, }, {72, 69, 78, 00, 00, 00, }, {72, 69, 78, 71, 00, 00, }, {72, 79, 78, 71, 00, 00, }, {72, 79, 85, 00, 00, 00, }, {72, 85, 00, 00, 00, 00, }, {72, 85, 65, 00, 00, 00, }, {72, 85, 65, 73, 00, 00, }, {72, 85, 65, 78, 00, 00, }, {72, 85, 65, 78, 71, 00, }, {72, 85, 73, 00, 00, 00, }, {72, 85, 78, 00, 00, 00, }, {72, 85, 79, 00, 00, 00, }, {74, 73, 00, 00, 00, 00, }, {74, 73, 65, 00, 00, 00, }, {74, 73, 65, 78, 00, 00, }, {74, 73, 65, 78, 71, 00, }, {74, 73, 65, 79, 00, 00, }, {74, 73, 69, 00, 00, 00, }, {74, 73, 78, 00, 00, 00, }, {74, 73, 78, 71, 00, 00, }, {74, 73, 79, 78, 71, 00, }, {74, 73, 85, 00, 00, 00, }, {74, 85, 00, 00, 00, 00, }, {74, 85, 65, 78, 00, 00, }, {74, 85, 69, 00, 00, 00, }, {74, 85, 78, 00, 00, 00, }, {75, 65, 00, 00, 00, 00, }, {75, 65, 73, 00, 00, 00, }, {75, 65, 78, 00, 00, 00, }, {75, 65, 78, 71, 00, 00, }, {75, 65, 79, 00, 00, 00, }, {75, 69, 00, 00, 00, 00, }, {75, 69, 78, 00, 00, 00, }, {75, 69, 78, 71, 00, 00, }, {75, 79, 78, 71, 00, 00, }, {75, 79, 85, 00, 00, 00, }, {75, 85, 00, 00, 00, 00, }, {75, 85, 65, 00, 00, 00, }, {75, 85, 65, 73, 00, 00, }, {75, 85, 65, 78, 00, 00, }, {75, 85, 65, 78, 71, 00, }, {75, 85, 73, 00, 00, 00, }, {75, 85, 78, 00, 00, 00, }, {75, 85, 79, 00, 00, 00, }, {76, 65, 00, 00, 00, 00, }, {76, 65, 73, 00, 00, 00, }, {76, 65, 78, 00, 00, 00, }, {76, 65, 78, 71, 00, 00, }, {76, 65, 79, 00, 00, 00, }, {76, 69, 00, 00, 00, 00, }, {76, 69, 73, 00, 00, 00, }, {76, 73, 00, 00, 00, 00, }, {76, 73, 78, 71, 00, 00, }, {76, 69, 78, 71, 00, 00, }, {76, 73, 00, 00, 00, 00, }, {76, 73, 65, 00, 00, 00, }, {76, 73, 65, 78, 00, 00, }, {76, 73, 65, 78, 71, 00, }, {76, 73, 65, 79, 00, 00, }, {76, 73, 69, 00, 00, 00, }, {76, 73, 78, 00, 00, 00, }, {76, 73, 78, 71, 00, 00, }, {76, 73, 85, 00, 00, 00, }, {76, 79, 78, 71, 00, 00, }, {76, 79, 85, 00, 00, 00, }, {76, 85, 00, 00, 00, 00, }, {76, 85, 65, 78, 00, 00, }, {76, 85, 78, 00, 00, 00, }, {76, 85, 79, 00, 00, 00, }, {77, 65, 00, 00, 00, 00, }, {77, 65, 73, 00, 00, 00, }, {77, 65, 78, 00, 00, 00, }, {77, 65, 78, 71, 00, 00, }, {77, 65, 79, 00, 00, 00, }, {77, 69, 73, 00, 00, 00, }, {77, 69, 78, 00, 00, 00, }, {77, 69, 78, 71, 00, 00, }, {77, 73, 00, 00, 00, 00, }, {77, 73, 65, 78, 00, 00, }, {77, 73, 65, 79, 00, 00, }, {77, 73, 69, 00, 00, 00, }, {77, 73, 78, 00, 00, 00, }, {77, 73, 78, 71, 00, 00, }, {77, 73, 85, 00, 00, 00, }, {77, 79, 00, 00, 00, 00, }, {77, 79, 85, 00, 00, 00, }, {77, 85, 00, 00, 00, 00, }, {78, 65, 00, 00, 00, 00, }, {78, 65, 73, 00, 00, 00, }, {78, 65, 78, 00, 00, 00, }, {78, 65, 78, 71, 00, 00, }, {78, 65, 79, 00, 00, 00, }, {78, 69, 00, 00, 00, 00, }, {78, 69, 73, 00, 00, 00, }, {78, 69, 78, 00, 00, 00, }, {78, 69, 78, 71, 00, 00, }, {78, 73, 00, 00, 00, 00, }, {78, 73, 65, 78, 00, 00, }, {78, 73, 65, 78, 71, 00, }, {78, 73, 65, 79, 00, 00, }, {78, 73, 69, 00, 00, 00, }, {78, 73, 78, 00, 00, 00, }, {78, 73, 78, 71, 00, 00, }, {78, 73, 85, 00, 00, 00, }, {78, 79, 78, 71, 00, 00, }, {78, 79, 85, 00, 00, 00, }, {78, 85, 00, 00, 00, 00, }, {78, 85, 65, 78, 00, 00, }, {78, 85, 78, 00, 00, 00, }, {78, 85, 79, 00, 00, 00, }, {79, 00, 00, 00, 00, 00, }, {79, 85, 00, 00, 00, 00, }, {80, 65, 00, 00, 00, 00, }, {80, 65, 73, 00, 00, 00, }, {80, 65, 78, 00, 00, 00, }, {80, 65, 78, 71, 00, 00, }, {80, 65, 79, 00, 00, 00, }, {80, 69, 73, 00, 00, 00, }, {80, 69, 78, 00, 00, 00, }, {80, 69, 78, 71, 00, 00, }, {80, 73, 00, 00, 00, 00, }, {80, 73, 65, 78, 00, 00, }, {80, 73, 65, 79, 00, 00, }, {80, 73, 69, 00, 00, 00, }, {80, 73, 78, 00, 00, 00, }, {80, 73, 78, 71, 00, 00, }, {80, 79, 00, 00, 00, 00, }, {80, 79, 85, 00, 00, 00, }, {80, 85, 00, 00, 00, 00, }, {81, 73, 00, 00, 00, 00, }, {81, 73, 65, 00, 00, 00, }, {81, 73, 65, 78, 00, 00, }, {81, 73, 65, 78, 71, 00, }, {81, 73, 65, 79, 00, 00, }, {81, 73, 69, 00, 00, 00, }, {81, 73, 78, 00, 00, 00, }, {81, 73, 78, 71, 00, 00, }, {81, 73, 79, 78, 71, 00, }, {81, 73, 85, 00, 00, 00, }, {81, 85, 00, 00, 00, 00, }, {81, 85, 65, 78, 00, 00, }, {81, 85, 69, 00, 00, 00, }, {81, 85, 78, 00, 00, 00, }, {82, 65, 78, 00, 00, 00, }, {82, 65, 78, 71, 00, 00, }, {82, 65, 79, 00, 00, 00, }, {82, 69, 00, 00, 00, 00, }, {82, 69, 78, 00, 00, 00, }, {82, 69, 78, 71, 00, 00, }, {82, 73, 00, 00, 00, 00, }, {82, 79, 78, 71, 00, 00, }, {82, 79, 85, 00, 00, 00, }, {82, 85, 00, 00, 00, 00, }, {82, 85, 65, 78, 00, 00, }, {82, 85, 73, 00, 00, 00, }, {82, 85, 78, 00, 00, 00, }, {82, 85, 79, 00, 00, 00, }, {83, 65, 00, 00, 00, 00, }, {83, 65, 73, 00, 00, 00, }, {83, 65, 78, 00, 00, 00, }, {83, 65, 78, 71, 00, 00, }, {83, 65, 79, 00, 00, 00, }, {83, 69, 00, 00, 00, 00, }, {83, 69, 78, 00, 00, 00, }, {83, 69, 78, 71, 00, 00, }, {83, 72, 65, 00, 00, 00, }, {83, 72, 65, 73, 00, 00, }, {83, 72, 65, 78, 00, 00, }, {83, 72, 65, 78, 71, 00, }, {83, 72, 65, 79, 00, 00, }, {83, 72, 69, 00, 00, 00, }, {83, 72, 69, 78, 00, 00, }, {83, 72, 69, 78, 71, 00, }, {83, 72, 73, 00, 00, 00, }, {83, 72, 79, 85, 00, 00, }, {83, 72, 85, 00, 00, 00, }, {83, 72, 85, 65, 00, 00, }, {83, 72, 85, 65, 73, 00, }, {83, 72, 85, 65, 78, 00, }, {83, 72, 85, 65, 78, 71, }, {83, 72, 85, 73, 00, 00, }, {83, 72, 85, 78, 00, 00, }, {83, 72, 85, 79, 00, 00, }, {83, 73, 00, 00, 00, 00, }, {83, 79, 78, 71, 00, 00, }, {83, 79, 85, 00, 00, 00, }, {83, 85, 00, 00, 00, 00, }, {83, 85, 65, 78, 00, 00, }, {83, 85, 73, 00, 00, 00, }, {83, 85, 78, 00, 00, 00, }, {83, 85, 79, 00, 00, 00, }, {84, 65, 00, 00, 00, 00, }, {84, 65, 73, 00, 00, 00, }, {84, 65, 78, 00, 00, 00, }, {84, 65, 78, 71, 00, 00, }, {84, 65, 79, 00, 00, 00, }, {84, 69, 00, 00, 00, 00, }, {84, 69, 78, 71, 00, 00, }, {84, 73, 00, 00, 00, 00, }, {84, 73, 65, 78, 00, 00, }, {84, 73, 65, 79, 00, 00, }, {84, 73, 69, 00, 00, 00, }, {84, 73, 78, 71, 00, 00, }, {84, 79, 78, 71, 00, 00, }, {84, 79, 85, 00, 00, 00, }, {84, 85, 00, 00, 00, 00, }, {84, 85, 65, 78, 00, 00, }, {84, 85, 73, 00, 00, 00, }, {84, 85, 78, 00, 00, 00, }, {84, 85, 79, 00, 00, 00, }, {87, 65, 00, 00, 00, 00, }, {87, 65, 73, 00, 00, 00, }, {87, 65, 78, 00, 00, 00, }, {87, 65, 78, 71, 00, 00, }, {87, 69, 73, 00, 00, 00, }, {87, 69, 78, 00, 00, 00, }, {87, 69, 78, 71, 00, 00, }, {87, 79, 00, 00, 00, 00, }, {87, 85, 00, 00, 00, 00, }, {88, 73, 00, 00, 00, 00, }, {88, 73, 65, 00, 00, 00, }, {88, 73, 65, 78, 00, 00, }, {88, 73, 65, 78, 71, 00, }, {88, 73, 65, 79, 00, 00, }, {88, 73, 69, 00, 00, 00, }, {88, 73, 78, 00, 00, 00, }, {88, 73, 78, 71, 00, 00, }, {88, 73, 79, 78, 71, 00, }, {88, 73, 85, 00, 00, 00, }, {88, 85, 00, 00, 00, 00, }, {88, 85, 65, 78, 00, 00, }, {88, 85, 69, 00, 00, 00, }, {88, 85, 78, 00, 00, 00, }, {89, 65, 00, 00, 00, 00, }, {89, 65, 78, 00, 00, 00, }, {89, 65, 78, 71, 00, 00, }, {89, 65, 79, 00, 00, 00, }, {89, 69, 00, 00, 00, 00, }, {89, 73, 00, 00, 00, 00, }, {89, 73, 78, 00, 00, 00, }, {89, 73, 78, 71, 00, 00, }, {89, 79, 00, 00, 00, 00, }, {89, 79, 78, 71, 00, 00, }, {89, 79, 85, 00, 00, 00, }, {89, 85, 00, 00, 00, 00, }, {89, 85, 65, 78, 00, 00, }, {89, 85, 69, 00, 00, 00, }, {89, 85, 78, 00, 00, 00, }, {90, 65, 00, 00, 00, 00, }, {90, 65, 73, 00, 00, 00, }, {90, 65, 78, 00, 00, 00, }, {90, 65, 78, 71, 00, 00, }, {90, 65, 79, 00, 00, 00, }, {90, 69, 00, 00, 00, 00, }, {90, 69, 73, 00, 00, 00, }, {90, 69, 78, 00, 00, 00, }, {90, 69, 78, 71, 00, 00, }, {90, 72, 65, 00, 00, 00, }, {90, 72, 65, 73, 00, 00, }, {90, 72, 65, 78, 00, 00, }, {90, 72, 65, 78, 71, 00, }, {90, 72, 65, 79, 00, 00, }, {90, 72, 69, 00, 00, 00, }, {90, 72, 69, 78, 00, 00, }, {90, 72, 69, 78, 71, 00, }, {90, 72, 73, 00, 00, 00, }, {90, 72, 79, 78, 71, 00, }, {90, 72, 79, 85, 00, 00, }, {90, 72, 85, 00, 00, 00, }, {90, 72, 85, 65, 00, 00, }, {90, 72, 85, 65, 73, 00, }, {90, 72, 85, 65, 78, 00, }, {90, 72, 85, 65, 78, 71, }, {90, 72, 85, 73, 00, 00, }, {90, 72, 85, 78, 00, 00, }, {90, 72, 85, 79, 00, 00, }, {90, 73, 00, 00, 00, 00, }, {90, 79, 78, 71, 00, 00, }, {90, 79, 85, 00, 00, 00, }, {90, 85, 00, 00, 00, 00, }, {90, 85, 65, 78, 00, 00, }, {90, 85, 73, 00, 00, 00, }, {90, 85, 78, 00, 00, 00, }, {90, 85, 79, 00, 00, 00, }, }; /** First and last Chinese character with known Pinyin according to zh collation */ private static final String FIRST_PINYIN_UNIHAN = "\u5416"; private static final String LAST_PINYIN_UNIHAN = "\u5497"; /** The first Chinese character in Unicode block */ private static final char FIRST_UNIHAN = '\u3400'; private static final Collator COLLATOR = Collator.getInstance(Locale.CHINA); private static HanziToPinyin sInstance; private final boolean mHasChinaCollator; public static class Token { /** * Separator between target string for each source char */ public static final String SEPARATOR = " "; public static final int LATIN = 1; public static final int PINYIN = 2; public static final int UNKNOWN = 3; public Token() { } public Token(int type, String source, String target) { this.type = type; this.source = source; this.target = target; } /** * Type of this token, ASCII, PINYIN or UNKNOWN. */ public int type; /** * Original string before translation. */ public String source; /** * Translated string of source. For Han, target is corresponding Pinyin. * Otherwise target is original string in source. */ public String target; } protected HanziToPinyin(boolean hasChinaCollator) { mHasChinaCollator = hasChinaCollator; } public static HanziToPinyin getInstance() { synchronized(HanziToPinyin.class) { if (sInstance != null) { return sInstance; } // Check if zh_CN collation data is available final Locale locale[] = Collator.getAvailableLocales(); for (int i = 0; i < locale.length; i++) { if (locale[i].equals(Locale.CHINA)) { sInstance = new HanziToPinyin(true); return sInstance; } } Log.w(TAG, "There is no Chinese collator, HanziToPinyin is disabled"); sInstance = new HanziToPinyin(false); return sInstance; } } private Token getToken(char character) { Token token = new Token(); final String letter = Character.toString(character); token.source = letter; int offset = -1; int cmp; if (character < 256) { token.type = Token.LATIN; token.target = letter; return token; } else if (character < FIRST_UNIHAN) { token.type = Token.UNKNOWN; token.target = letter; return token; } else { cmp = COLLATOR.compare(letter, FIRST_PINYIN_UNIHAN); if (cmp < 0) { token.type = Token.UNKNOWN; token.target = letter; return token; } else if (cmp == 0) { token.type = Token.PINYIN; offset = 0; } else { cmp = COLLATOR.compare(letter, LAST_PINYIN_UNIHAN); if (cmp > 0) { token.type = Token.UNKNOWN; token.target = letter; return token; } else if (cmp == 0) { token.type = Token.PINYIN; offset = UNIHANS.length - 1; } } } token.type = Token.PINYIN; if (offset < 0) { int begin = 0; int end = UNIHANS.length - 1; while (begin <= end) { offset = (begin + end) / 2; final String unihan = Character.toString(UNIHANS[offset]); cmp = COLLATOR.compare(letter, unihan); if (cmp == 0) { break; } else if (cmp > 0) { begin = offset + 1; } else { end = offset - 1; } } } if (cmp < 0) { offset--; } StringBuilder pinyin = new StringBuilder(); for (int j = 0; j < PINYINS[offset].length && PINYINS[offset][j] != 0; j++) { pinyin.append((char)PINYINS[offset][j]); } token.target = pinyin.toString(); return token; } /** * Convert the input to a array of tokens. The sequence of ASCII or Unknown * characters without space will be put into a Token, One Hanzi character * which has pinyin will be treated as a Token. * If these is no China collator, the empty token array is returned. */ public ArrayList<Token> get(final String input) { ArrayList<Token> tokens = new ArrayList<Token>(); if (!mHasChinaCollator || TextUtils.isEmpty(input)) { // return empty tokens. return tokens; } final int inputLength = input.length(); final StringBuilder sb = new StringBuilder(); int tokenType = Token.LATIN; // Go through the input, create a new token when // a. Token type changed // b. Get the Pinyin of current charater. // c. current character is space. for (int i = 0; i < inputLength; i++) { final char character = input.charAt(i); if (character == ' ') { if (sb.length() > 0) { addToken(sb, tokens, tokenType); } } else if (character < 256) { if (tokenType != Token.LATIN && sb.length() > 0) { addToken(sb, tokens, tokenType); } tokenType = Token.LATIN; sb.append(character); } else if (character < FIRST_UNIHAN) { if (tokenType != Token.UNKNOWN && sb.length() > 0) { addToken(sb, tokens, tokenType); } tokenType = Token.UNKNOWN; sb.append(character); } else { Token t = getToken(character); if (t.type == Token.PINYIN) { if (sb.length() > 0) { addToken(sb, tokens, tokenType); } tokens.add(t); tokenType = Token.PINYIN; } else { if (tokenType != t.type && sb.length() > 0) { addToken(sb, tokens, tokenType); } tokenType = t.type; sb.append(character); } } } if (sb.length() > 0) { addToken(sb, tokens, tokenType); } return tokens; } private void addToken(final StringBuilder sb, final ArrayList<Token> tokens, final int tokenType) { String str = sb.toString(); tokens.add(new Token(tokenType, str, str)); sb.setLength(0); } }

你可能感兴趣的:(c,android,String,token,character,collation)