<?php /** * 汉字转拼音类 * 输入的汉字字符串统一先转换为GBK,然后进行拼音转换,英文和标点符号原样输出。 * UTF-8 -> GBK -> PINYIN -> UTF-8 * 注:多音字无法识别,转换汉字为一级汉字3千多个,生僻字不行, * 字符串中个别特殊符号的存在可能造成转换输出不正确 * @author muyi * @version v 0.1.0 2012/10/17 */ class CnToPinYin { // 拼音音节表 private $pinyin=array( 'zuo' => -10254, 'zun' => -10256, 'zui' => -10260, 'zuan' => -10262, 'zu' => -10270, 'zou' => -10274, 'zong' => -10281, 'zi' => -10296, 'zhuo' => -10307, 'zhun' => -10309, 'zhui' => -10315, 'zhuang' => -10322, 'zhuan' => -10328, 'zhuai' => -10329, 'zhua' => -10331, 'zhu' => -10519, 'zhou' => -10533, 'zhong' => -10544, 'zhi' => -10587, 'zheng' => -10764, 'zhen' => -10780, 'zhe' => -10790, 'zhao' => -10800, 'zhang' => -10815, 'zhan' => -10832, 'zhai' => -10838, 'zha' => -11014, 'zeng' => -11018, 'zen' => -11019, 'zei' => -11020, 'ze' => -11024, 'zao' => -11038, 'zang' => -11041, 'zan' => -11045, 'zai' => -11052, 'za' => -11055, 'yun' => -11067, 'yue' => -11077, 'yuan' => -11097, 'yu' => -11303, 'you' => -11324, 'yong' => -11339, 'yo' => -11340, 'ying' => -11358, 'yin' => -11536, 'yi' => -11589, 'ye' => -11604, 'yao' => -11781, 'yang' => -11798, 'yan' => -11831, 'ya' => -11847, 'xun' => -11861, 'xue' => -11867, 'xuan' => -12039, 'xu' => -12058, 'xiu' => -12067, 'xiong' => -12074, 'xing' => -12089, 'xin' => -12099, 'xie' => -12120, 'xiao' => -12300, 'xiang' => -12320, 'xian' => -12346, 'xia' => -12359, 'xi' => -12556, 'wu' => -12585, 'wo' => -12594, 'weng' => -12597, 'wen' => -12607, 'wei' => -12802, 'wang' => -12812, 'wan' => -12829, 'wai' => -12831, 'wa' => -12838, 'tuo' => -12849, 'tun' => -12852, 'tui' => -12858, 'tuan' => -12860, 'tu' => -12871, 'tou' => -12875, 'tong' => -12888, 'ting' => -13060, 'tie' => -13063, 'tiao' => -13068, 'tian' => -13076, 'ti' => -13091, 'teng' => -13095, 'te' => -13096, 'tao' => -13107, 'tang' => -13120, 'tan' => -13138, 'tai' => -13147, 'ta' => -13318, 'suo' => -13326, 'sun' => -13329, 'sui' => -13340, 'suan' => -13343, 'su' => -13356, 'sou' => -13359, 'song' => -13367, 'si' => -13383, 'shuo' => -13387, 'shun' => -13391, 'shui' => -13395, 'shuang' => -13398, 'shuan' => -13400, 'shuai' => -13404, 'shua' => -13406, 'shu' => -13601, 'shou' => -13611, 'shi' => -13658, 'sheng' => -13831, 'shen' => -13847, 'she' => -13859, 'shao' => -13870, 'shang' => -13878, 'shan' => -13894, 'shai' => -13896, 'sha' => -13905, 'seng' => -13906, 'sen' => -13907, 'se' => -13910, 'sao' => -13914, 'sang' => -13917, 'san' => -14083, 'sai' => -14087, 'sa' => -14090, 'ruo' => -14092, 'run' => -14094, 'rui' => -14097, 'ruan' => -14099, 'ru' => -14109, 'rou' => -14112, 'rong' => -14122, 'ri' => -14123, 'reng' => -14125, 'ren' => -14135, 're' => -14137, 'rao' => -14140, 'rang' => -14145, 'ran' => -14149, 'qun' => -14151, 'que' => -14159, 'quan' => -14170, 'qu' => -14345, 'qiu' => -14353, 'qiong' => -14355, 'qing' => -14368, 'qin' => -14379, 'qie' => -14384, 'qiao' => -14399, 'qiang' => -14407, 'qian' => -14429, 'qia' => -14594, 'qi' => -14630, 'pu' => -14645, 'po' => -14654, 'ping' => -14663, 'pin' => -14668, 'pie' => -14670, 'piao' => -14674, 'pian' => -14678, 'pi' => -14857, 'peng' => -14871, 'pen' => -14873, 'pei' => -14882, 'pao' => -14889, 'pang' => -14894, 'pan' => -14902, 'pai' => -14908, 'pa' => -14914, 'ou' => -14921, 'o' => -14922, 'nuo' => -14926, 'nue' => -14928, 'nuan' => -14929, 'nv' => -14930, 'nu' => -14933, 'nong' => -14937, 'niu' => -14941, 'ning' => -15109, 'nin' => -15110, 'nie' => -15117, 'niao' => -15119, 'niang' => -15121, 'nian' => -15128, 'ni' => -15139, 'neng' => -15140, 'nen' => -15141, 'nei' => -15143, 'ne' => -15144, 'nao' => -15149, 'nang' => -15150, 'nan' => -15153, 'nai' => -15158, 'na' => -15165, 'mu' => -15180, 'mou' => -15183, 'mo' => -15362, 'miu' => -15363, 'ming' => -15369, 'min' => -15375, 'mie' => -15377, 'miao' => -15385, 'mian' => -15394, 'mi' => -15408, 'meng' => -15416, 'men' => -15419, 'mei' => -15435, 'me' => -15436, 'mao' => -15448, 'mang' => -15454, 'man' => -15625, 'mai' => -15631, 'ma' => -15640, 'luo' => -15652, 'lun' => -15659, 'lue' => -15661, 'luan' => -15667, 'lv' => -15681, 'lu' => -15701, 'lou' => -15707, 'long' => -15878, 'liu' => -15889, 'ling' => -15903, 'lin' => -15915, 'lie' => -15920, 'liao' => -15933, 'liang' => -15944, 'lian' => -15958, 'lia' => -15959, 'li' => -16155, 'leng' => -16158, 'lei' => -16169, 'le' => -16171, 'lao' => -16180, 'lang' => -16187, 'lan' => -16202, 'lai' => -16205, 'la' => -16212, 'kuo' => -16216, 'kun' => -16220, 'kui' => -16393, 'kuang' => -16401, 'kuan' => -16403, 'kuai' => -16407, 'kua' => -16412, 'ku' => -16419, 'kou' => -16423, 'kong' => -16427, 'keng' => -16429, 'ken' => -16433, 'ke' => -16448, 'kao' => -16452, 'kang' => -16459, 'kan' => -16465, 'kai' => -16470, 'ka' => -16474, 'jun' => -16647, 'jue' => -16657, 'juan' => -16664, 'ju' => -16689, 'jiu' => -16706, 'jiong' => -16708, 'jing' => -16733, 'jin' => -16915, 'jie' => -16942, 'jiao' => -16970, 'jiang' => -16983, 'jian' => -17185, 'jia' => -17202, 'ji' => -17417, 'huo' => -17427, 'hun' => -17433, 'hui' => -17454, 'huang' => -17468, 'huan' => -17482, 'huai' => -17487, 'hua' => -17496, 'hu' => -17676, 'hou' => -17683, 'hong' => -17692, 'heng' => -17697, 'hen' => -17701, 'hei' => -17703, 'he' => -17721, 'hao' => -17730, 'hang' => -17733, 'han' => -17752, 'hai' => -17759, 'ha' => -17922, 'guo' => -17928, 'gun' => -17931, 'gui' => -17947, 'guang' => -17950, 'guan' => -17961, 'guai' => -17964, 'gua' => -17970, 'gu' => -17988, 'gou' => -17997, 'gong' => -18012, 'geng' => -18181, 'gen' => -18183, 'gei' => -18184, 'ge' => -18201, 'gao' => -18211, 'gang' => -18220, 'gan' => -18231, 'gai' => -18237, 'ga' => -18239, 'fu' => -18446, 'fou' => -18447, 'fo' => -18448, 'feng' => -18463, 'fen' => -18478, 'fei' => -18490, 'fang' => -18501, 'fan' => -18518, 'fa' => -18526, 'er' => -18696, 'en' => -18697, 'e' => -18710, 'duo' => -18722, 'dun' => -18731, 'dui' => -18735, 'duan' => -18741, 'du' => -18756, 'dou' => -18763, 'dong' => -18773, 'diu' => -18774, 'ding' => -18783, 'die' => -18952, 'diao' => -18961, 'dian' => -18977, 'di' => -18996, 'deng' => -19003, 'de' => -19006, 'dao' => -19018, 'dang' => -19023, 'dan' => -19038, 'dai' => -19212, 'da' => -19218, 'cuo' => -19224, 'cun' => -19227, 'cui' => -19235, 'cuan' => -19238, 'cu' => -19242, 'cou' => -19243, 'cong' => -19249, 'ci' => -19261, 'chuo' => -19263, 'chun' => -19270, 'chui' => -19275, 'chuang' => -19281, 'chuan' => -19288, 'chuai' => -19289, 'chu' => -19467, 'chou' => -19479, 'chong' => -19484, 'chi' => -19500, 'cheng' => -19515, 'chen' => -19525, 'che' => -19531, 'chao' => -19540, 'chang' => -19715, 'chan' => -19725, 'chai' => -19728, 'cha' => -19739, 'ceng' => -19741, 'ce' => -19746, 'cao' => -19751, 'cang' => -19756, 'can' => -19763, 'cai' => -19774, 'ca' => -19775, 'bu' => -19784, 'bo' => -19805, 'bing' => -19976, 'bin' => -19982, 'bie' => -19986, 'biao' => -19990, 'bian' => -20002, 'bi' => -20026, 'beng' => -20032, 'ben' => -20036, 'bei' => -20051, 'bao' => -20230, 'bang' => -20242, 'ban' => -20257, 'bai' => -20265, 'ba' => -20283, 'ao' => -20292, 'ang' => -20295, 'an' => -20304, 'ai' => -20317, 'a' => -20319 ); // 转换后汉字字符对应的拼音字符串数组 private $arrCNtoPY=array(); public function __construct() { } // 返回首字母 private function _substr($s) { return substr($s,0,1); } // 返回大写首字母 private function _substrtoupper($s) { return strtoupper(substr($s,0,1)); } // 返回首字母大写的字符串 private function _ucfirst($s) { return ucfirst($s); } // 分析字符串,将一级汉字字符转成编码,再从拼音音节表中查询拼音,存入数组中。 private function strToCode($str) { for($i=0;$i<strlen($str);$i++) { $p=ord(substr($str,$i,1)); if($p>160) { $q=ord(substr($str,++$i,1)); $p=$p*256+$q-65536; } $char=$this->codeToPinYin($p); if($char==null) { $c=chr(($p+65536-$q)/256).chr($q); array_push($this->arrCNtoPY,$c); } else { array_push($this->arrCNtoPY,$char); } } } // 将编码从拼音音节表中查询拼音, // [0,160]返回英文字符 // >-10247 or <-20319返回空 // 其它返回拼音字符串 private function codeToPinYin($num) { if($num>0&&$num<160) { return chr($num); } elseif ($num<-20319||$num>-10247) { return null; } else { foreach($this->pinyin as $key=>$value) { if($value<=$num) return $key; } } } /* printPinYin:输出转换后的拼音字符串,汉字转成拼音,其它字符保持原样 $strCn : 输入的中文字符串 $delimiter : 输出每个汉字拼音之间分隔符 $mode : 0为全拼输出(字母小写) 1为简拼输出(每个汉字第一个字母,字母小写) 2为全拼输出(字母全大写)3为简拼输出(字母大写) 4为全拼输出(首字母大写) $code : 输入的中文字符串编码方式,UTF-8,GBK,GB2312 返回值:带有分隔符的拼音字符串 $a=new CnToPinYin(); $cn='字符串'; $str=$a->printPinYin($cn,' ',0,'UTF-8'); */ function printPinYin ($strCn, $delimiter='', $mode=0, $code='GBK') { if(!empty($arrCNtoPY)) unset($arrCNtoPY); if($code!='GBK') $strCn=iconv($code,'GBK',$strCn); $this->strToCode($strCn); if($mode>4 || $mode<0) $mode=0; switch($mode) { case 0: $str=implode($delimiter,$this->arrCNtoPY); break; case 1: $c=array_map(array($this, '_substr'),$this->arrCNtoPY); $str=implode($delimiter,$c); break; case 2: $str=strtoupper(implode($delimiter,$this->arrCNtoPY)); break; case 3: $c=array_map(array($this, '_substrtoupper'),$this->arrCNtoPY); $str=implode($delimiter,$c); break; case 4: $c=array_map(array($this, '_ucfirst'),$this->arrCNtoPY); $str=implode($delimiter,$c); break; default: break; } if($code!='GBK') $str=iconv('GBK',$code,$str); return $str; } } ?>
注:原文地址