最近客户提出新需求说希望可以对数据的某一字段进行拼音检索,因此对这个东西找了点资料研究了一下,一般这种作为查询条件的拼音检索,都通过数据库实现,而非程序实现,因为程序实现无法将条件嵌入到SQL语句中进行匹配。保存汉字可以用ZHS16GBK需要两个字节,而使用UTF8字符集需要三个字节,目前拼音只跟ZHS16GBK有对应关系,对应关系为(第一个字节)*256+((第二个字节)-256*256)。因此如果使用的是UTF8字符集,那么必须先转换成ZHS16GBK字符集。具体实现如下:
1. 创建一个Oracle对象,用于保存拼音和编码的对应关系:
CREATE OR REPLACE TYPE spcode AS OBJECT ( "name" VARCHAR2(10), "code" NUMBER );
2.创建一个PL/SQL表,为下面实现管道表函数做准备,定义如下:
CREATE OR REPLACE TYPE tbl_spcode IS TABLE OF spcode;
3.创建一个管道表函数,用于返回拼音字母和编码对应关系的二维表,具体实现如下:
CREATE OR REPLACE FUNCTION getTblSpcode RETURN tbl_spcode PIPELINED IS BEGIN PIPE ROW(spcode('a', -20319)); PIPE ROW(spcode('ai', -20317)); PIPE ROW(spcode('an', -20304)); PIPE ROW(spcode('ang', -20295)); PIPE ROW(spcode('ao', -20292)); PIPE ROW(spcode('ba', -20283)); PIPE ROW(spcode('bai', -20265)); PIPE ROW(spcode('ban', -20257)); PIPE ROW(spcode('bang', -20242)); PIPE ROW(spcode('bao', -20230)); PIPE ROW(spcode('bei', -20051)); PIPE ROW(spcode('ben', -20036)); PIPE ROW(spcode('beng', -20032)); PIPE ROW(spcode('bi', -20026)); PIPE ROW(spcode('bian', -20002)); PIPE ROW(spcode('biao', -19990)); PIPE ROW(spcode('bie', -19986)); PIPE ROW(spcode('bin', -19982)); PIPE ROW(spcode('bing', -19976)); PIPE ROW(spcode('bo', -19805)); PIPE ROW(spcode('bu', -19784)); PIPE ROW(spcode('ca', -19775)); PIPE ROW(spcode('cai', -19774)); PIPE ROW(spcode('can', -19763)); PIPE ROW(spcode('cang', -19756)); PIPE ROW(spcode('cao', -19751)); PIPE ROW(spcode('ce', -19746)); PIPE ROW(spcode('ceng', -19741)); PIPE ROW(spcode('cha', -19739)); PIPE ROW(spcode('chai', -19728)); PIPE ROW(spcode('chan', -19725)); PIPE ROW(spcode('chang', -19715)); PIPE ROW(spcode('chao', -19540)); PIPE ROW(spcode('che', -19531)); PIPE ROW(spcode('chen', -19525)); PIPE ROW(spcode('cheng', -19515)); PIPE ROW(spcode('chi', -19500)); PIPE ROW(spcode('chong', -19484)); PIPE ROW(spcode('chou', -19479)); PIPE ROW(spcode('chu', -19467)); PIPE ROW(spcode('chuai', -19289)); PIPE ROW(spcode('chuan', -19288)); PIPE ROW(spcode('chuang', -19281)); PIPE ROW(spcode('chui', -19275)); PIPE ROW(spcode('chun', -19270)); PIPE ROW(spcode('chuo', -19263)); PIPE ROW(spcode('ci', -19261)); PIPE ROW(spcode('cong', -19249)); PIPE ROW(spcode('cou', -19243)); PIPE ROW(spcode('cu', -19242)); PIPE ROW(spcode('cuan', -19238)); PIPE ROW(spcode('cui', -19235)); PIPE ROW(spcode('cun', -19227)); PIPE ROW(spcode('cuo', -19224)); PIPE ROW(spcode('da', -19218)); PIPE ROW(spcode('dai', -19212)); PIPE ROW(spcode('dan', -19038)); PIPE ROW(spcode('dang', -19023)); PIPE ROW(spcode('dao', -19018)); PIPE ROW(spcode('de', -19006)); PIPE ROW(spcode('deng', -19003)); PIPE ROW(spcode('di', -18996)); PIPE ROW(spcode('dian', -18977)); PIPE ROW(spcode('diao', -18961)); PIPE ROW(spcode('die', -18952)); PIPE ROW(spcode('ding', -18783)); PIPE ROW(spcode('diu', -18774)); PIPE ROW(spcode('dong', -18773)); PIPE ROW(spcode('dou', -18763)); PIPE ROW(spcode('du', -18756)); PIPE ROW(spcode('duan', -18741)); PIPE ROW(spcode('dui', -18735)); PIPE ROW(spcode('dun', -18731)); PIPE ROW(spcode('duo', -18722)); PIPE ROW(spcode('e', -18710)); PIPE ROW(spcode('en', -18697)); PIPE ROW(spcode('er', -18696)); PIPE ROW(spcode('fa', -18526)); PIPE ROW(spcode('fan', -18518)); PIPE ROW(spcode('fang', -18501)); PIPE ROW(spcode('fei', -18490)); PIPE ROW(spcode('fen', -18478)); PIPE ROW(spcode('feng', -18463)); PIPE ROW(spcode('fo', -18448)); PIPE ROW(spcode('fou', -18447)); PIPE ROW(spcode('fu', -18446)); PIPE ROW(spcode('ga', -18239)); PIPE ROW(spcode('gai', -18237)); PIPE ROW(spcode('gan', -18231)); PIPE ROW(spcode('gang', -18220)); PIPE ROW(spcode('gao', -18211)); PIPE ROW(spcode('ge', -18201)); PIPE ROW(spcode('gei', -18184)); PIPE ROW(spcode('gen', -18183)); PIPE ROW(spcode('geng', -18181)); PIPE ROW(spcode('gong', -18012)); PIPE ROW(spcode('gou', -17997)); PIPE ROW(spcode('gu', -17988)); PIPE ROW(spcode('gua', -17970)); PIPE ROW(spcode('guai', -17964)); PIPE ROW(spcode('guan', -17961)); PIPE ROW(spcode('guang', -17950)); PIPE ROW(spcode('gui', -17947)); PIPE ROW(spcode('gun', -17931)); PIPE ROW(spcode('guo', -17928)); PIPE ROW(spcode('ha', -17922)); PIPE ROW(spcode('hai', -17759)); PIPE ROW(spcode('han', -17752)); PIPE ROW(spcode('hang', -17733)); PIPE ROW(spcode('hao', -17730)); PIPE ROW(spcode('he', -17721)); PIPE ROW(spcode('hei', -17703)); PIPE ROW(spcode('hen', -17701)); PIPE ROW(spcode('heng', -17697)); PIPE ROW(spcode('hong', -17692)); PIPE ROW(spcode('hou', -17683)); PIPE ROW(spcode('hu', -17676)); PIPE ROW(spcode('hua', -17496)); PIPE ROW(spcode('huai', -17487)); PIPE ROW(spcode('huan', -17482)); PIPE ROW(spcode('huang', -17468)); PIPE ROW(spcode('hui', -17454)); PIPE ROW(spcode('hun', -17433)); PIPE ROW(spcode('huo', -17427)); PIPE ROW(spcode('ji', -17417)); PIPE ROW(spcode('jia', -17202)); PIPE ROW(spcode('jian', -17185)); PIPE ROW(spcode('jiang', -16983)); PIPE ROW(spcode('jiao', -16970)); PIPE ROW(spcode('jie', -16942)); PIPE ROW(spcode('jin', -16915)); PIPE ROW(spcode('jing', -16733)); PIPE ROW(spcode('jiong', -16708)); PIPE ROW(spcode('jiu', -16706)); PIPE ROW(spcode('ju', -16689)); PIPE ROW(spcode('juan', -16664)); PIPE ROW(spcode('jue', -16657)); PIPE ROW(spcode('jun', -16647)); PIPE ROW(spcode('ka', -16474)); PIPE ROW(spcode('kai', -16470)); PIPE ROW(spcode('kan', -16465)); PIPE ROW(spcode('kang', -16459)); PIPE ROW(spcode('kao', -16452)); PIPE ROW(spcode('ke', -16448)); PIPE ROW(spcode('ken', -16433)); PIPE ROW(spcode('keng', -16429)); PIPE ROW(spcode('kong', -16427)); PIPE ROW(spcode('kou', -16423)); PIPE ROW(spcode('ku', -16419)); PIPE ROW(spcode('kua', -16412)); PIPE ROW(spcode('kuai', -16407)); PIPE ROW(spcode('kuan', -16403)); PIPE ROW(spcode('kuang', -16401)); PIPE ROW(spcode('kui', -16393)); PIPE ROW(spcode('kun', -16220)); PIPE ROW(spcode('kuo', -16216)); PIPE ROW(spcode('la', -16212)); PIPE ROW(spcode('lai', -16205)); PIPE ROW(spcode('lan', -16202)); PIPE ROW(spcode('lang', -16187)); PIPE ROW(spcode('lao', -16180)); PIPE ROW(spcode('le', -16171)); PIPE ROW(spcode('lei', -16169)); PIPE ROW(spcode('leng', -16158)); PIPE ROW(spcode('li', -16155)); PIPE ROW(spcode('lia', -15959)); PIPE ROW(spcode('lian', -15958)); PIPE ROW(spcode('liang', -15944)); PIPE ROW(spcode('liao', -15933)); PIPE ROW(spcode('lie', -15920)); PIPE ROW(spcode('lin', -15915)); PIPE ROW(spcode('ling', -15903)); PIPE ROW(spcode('liu', -15889)); PIPE ROW(spcode('long', -15878)); PIPE ROW(spcode('lou', -15707)); PIPE ROW(spcode('lu', -15701)); PIPE ROW(spcode('lv', -15681)); PIPE ROW(spcode('luan', -15667)); PIPE ROW(spcode('lue', -15661)); PIPE ROW(spcode('lun', -15659)); PIPE ROW(spcode('luo', -15652)); PIPE ROW(spcode('ma', -15640)); PIPE ROW(spcode('mai', -15631)); PIPE ROW(spcode('man', -15625)); PIPE ROW(spcode('mang', -15454)); PIPE ROW(spcode('mao', -15448)); PIPE ROW(spcode('me', -15436)); PIPE ROW(spcode('mei', -15435)); PIPE ROW(spcode('men', -15419)); PIPE ROW(spcode('meng', -15416)); PIPE ROW(spcode('mi', -15408)); PIPE ROW(spcode('mian', -15394)); PIPE ROW(spcode('miao', -15385)); PIPE ROW(spcode('mie', -15377)); PIPE ROW(spcode('min', -15375)); PIPE ROW(spcode('ming', -15369)); PIPE ROW(spcode('miu', -15363)); PIPE ROW(spcode('mo', -15362)); PIPE ROW(spcode('mou', -15183)); PIPE ROW(spcode('mu', -15180)); PIPE ROW(spcode('na', -15165)); PIPE ROW(spcode('nai', -15158)); PIPE ROW(spcode('nan', -15153)); PIPE ROW(spcode('nang', -15150)); PIPE ROW(spcode('nao', -15149)); PIPE ROW(spcode('ne', -15144)); PIPE ROW(spcode('nei', -15143)); PIPE ROW(spcode('nen', -15141)); PIPE ROW(spcode('neng', -15140)); PIPE ROW(spcode('ni', -15139)); PIPE ROW(spcode('nian', -15128)); PIPE ROW(spcode('niang', -15121)); PIPE ROW(spcode('niao', -15119)); PIPE ROW(spcode('nie', -15117)); PIPE ROW(spcode('nin', -15110)); PIPE ROW(spcode('ning', -15109)); PIPE ROW(spcode('niu', -14941)); PIPE ROW(spcode('nong', -14937)); PIPE ROW(spcode('nu', -14933)); PIPE ROW(spcode('nv', -14930)); PIPE ROW(spcode('nuan', -14929)); PIPE ROW(spcode('nue', -14928)); PIPE ROW(spcode('nuo', -14926)); PIPE ROW(spcode('o', -14922)); PIPE ROW(spcode('ou', -14921)); PIPE ROW(spcode('pa', -14914)); PIPE ROW(spcode('pai', -14908)); PIPE ROW(spcode('pan', -14902)); PIPE ROW(spcode('pang', -14894)); PIPE ROW(spcode('pao', -14889)); PIPE ROW(spcode('pei', -14882)); PIPE ROW(spcode('pen', -14873)); PIPE ROW(spcode('peng', -14871)); PIPE ROW(spcode('pi', -14857)); PIPE ROW(spcode('pian', -14678)); PIPE ROW(spcode('piao', -14674)); PIPE ROW(spcode('pie', -14670)); PIPE ROW(spcode('pin', -14668)); PIPE ROW(spcode('ping', -14663)); PIPE ROW(spcode('po', -14654)); PIPE ROW(spcode('pu', -14645)); PIPE ROW(spcode('qi', -14630)); PIPE ROW(spcode('qia', -14594)); PIPE ROW(spcode('qian', -14429)); PIPE ROW(spcode('qiang', -14407)); PIPE ROW(spcode('qiao', -14399)); PIPE ROW(spcode('qie', -14384)); PIPE ROW(spcode('qin', -14379)); PIPE ROW(spcode('qing', -14368)); PIPE ROW(spcode('qiong', -14355)); PIPE ROW(spcode('qiu', -14353)); PIPE ROW(spcode('qu', -14345)); PIPE ROW(spcode('quan', -14170)); PIPE ROW(spcode('que', -14159)); PIPE ROW(spcode('qun', -14151)); PIPE ROW(spcode('ran', -14149)); PIPE ROW(spcode('rang', -14145)); PIPE ROW(spcode('rao', -14140)); PIPE ROW(spcode('re', -14137)); PIPE ROW(spcode('ren', -14135)); PIPE ROW(spcode('reng', -14125)); PIPE ROW(spcode('ri', -14123)); PIPE ROW(spcode('rong', -14122)); PIPE ROW(spcode('rou', -14112)); PIPE ROW(spcode('ru', -14109)); PIPE ROW(spcode('ruan', -14099)); PIPE ROW(spcode('rui', -14097)); PIPE ROW(spcode('run', -14094)); PIPE ROW(spcode('ruo', -14092)); PIPE ROW(spcode('sa', -14090)); PIPE ROW(spcode('sai', -14087)); PIPE ROW(spcode('san', -14083)); PIPE ROW(spcode('sang', -13917)); PIPE ROW(spcode('sao', -13914)); PIPE ROW(spcode('se', -13910)); PIPE ROW(spcode('sen', -13907)); PIPE ROW(spcode('seng', -13906)); PIPE ROW(spcode('sha', -13905)); PIPE ROW(spcode('shai', -13896)); PIPE ROW(spcode('shan', -13894)); PIPE ROW(spcode('shang', -13878)); PIPE ROW(spcode('shao', -13870)); PIPE ROW(spcode('she', -13859)); PIPE ROW(spcode('shen', -13847)); PIPE ROW(spcode('sheng', -13831)); PIPE ROW(spcode('shi', -13658)); PIPE ROW(spcode('shou', -13611)); PIPE ROW(spcode('shu', -13601)); PIPE ROW(spcode('shua', -13406)); PIPE ROW(spcode('shuai', -13404)); PIPE ROW(spcode('shuan', -13400)); PIPE ROW(spcode('shuang', -13398)); PIPE ROW(spcode('shui', -13395)); PIPE ROW(spcode('shun', -13391)); PIPE ROW(spcode('shuo', -13387)); PIPE ROW(spcode('si', -13383)); PIPE ROW(spcode('song', -13367)); PIPE ROW(spcode('sou', -13359)); PIPE ROW(spcode('su', -13356)); PIPE ROW(spcode('suan', -13343)); PIPE ROW(spcode('sui', -13340)); PIPE ROW(spcode('sun', -13329)); PIPE ROW(spcode('suo', -13326)); PIPE ROW(spcode('ta', -13318)); PIPE ROW(spcode('tai', -13147)); PIPE ROW(spcode('tan', -13138)); PIPE ROW(spcode('tang', -13120)); PIPE ROW(spcode('tao', -13107)); PIPE ROW(spcode('te', -13096)); PIPE ROW(spcode('teng', -13095)); PIPE ROW(spcode('ti', -13091)); PIPE ROW(spcode('tian', -13076)); PIPE ROW(spcode('tiao', -13068)); PIPE ROW(spcode('tie', -13063)); PIPE ROW(spcode('ting', -13060)); PIPE ROW(spcode('tong', -12888)); PIPE ROW(spcode('tou', -12875)); PIPE ROW(spcode('tu', -12871)); PIPE ROW(spcode('tuan', -12860)); PIPE ROW(spcode('tui', -12858)); PIPE ROW(spcode('tun', -12852)); PIPE ROW(spcode('tuo', -12849)); PIPE ROW(spcode('wa', -12838)); PIPE ROW(spcode('wai', -12831)); PIPE ROW(spcode('wan', -12829)); PIPE ROW(spcode('wang', -12812)); PIPE ROW(spcode('wei', -12802)); PIPE ROW(spcode('wen', -12607)); PIPE ROW(spcode('weng', -12597)); PIPE ROW(spcode('wo', -12594)); PIPE ROW(spcode('wu', -12585)); PIPE ROW(spcode('xi', -12556)); PIPE ROW(spcode('xia', -12359)); PIPE ROW(spcode('xian', -12346)); PIPE ROW(spcode('xiang', -12320)); PIPE ROW(spcode('xiao', -12300)); PIPE ROW(spcode('xie', -12120)); PIPE ROW(spcode('xin', -12099)); PIPE ROW(spcode('xing', -12089)); PIPE ROW(spcode('xiong', -12074)); PIPE ROW(spcode('xiu', -12067)); PIPE ROW(spcode('xu', -12058)); PIPE ROW(spcode('xuan', -12039)); PIPE ROW(spcode('xue', -11867)); PIPE ROW(spcode('xun', -11861)); PIPE ROW(spcode('ya', -11847)); PIPE ROW(spcode('yan', -11831)); PIPE ROW(spcode('yang', -11798)); PIPE ROW(spcode('yao', -11781)); PIPE ROW(spcode('ye', -11604)); PIPE ROW(spcode('yi', -11589)); PIPE ROW(spcode('yin', -11536)); PIPE ROW(spcode('ying', -11358)); PIPE ROW(spcode('yo', -11340)); PIPE ROW(spcode('yong', -11339)); PIPE ROW(spcode('you', -11324)); PIPE ROW(spcode('yu', -11303)); PIPE ROW(spcode('yuan', -11097)); PIPE ROW(spcode('yue', -11077)); PIPE ROW(spcode('yun', -11067)); PIPE ROW(spcode('za', -11055)); PIPE ROW(spcode('zai', -11052)); PIPE ROW(spcode('zan', -11045)); PIPE ROW(spcode('zang', -11041)); PIPE ROW(spcode('zao', -11038)); PIPE ROW(spcode('ze', -11024)); PIPE ROW(spcode('zei', -11020)); PIPE ROW(spcode('zen', -11019)); PIPE ROW(spcode('zeng', -11018)); PIPE ROW(spcode('zha', -11014)); PIPE ROW(spcode('zhai', -10838)); PIPE ROW(spcode('zhan', -10832)); PIPE ROW(spcode('zhang', -10815)); PIPE ROW(spcode('zhao', -10800)); PIPE ROW(spcode('zhe', -10790)); PIPE ROW(spcode('zhen', -10780)); PIPE ROW(spcode('zheng', -10764)); PIPE ROW(spcode('zhi', -10587)); PIPE ROW(spcode('zhong', -10544)); PIPE ROW(spcode('zhou', -10533)); PIPE ROW(spcode('zhu', -10519)); PIPE ROW(spcode('zhua', -10331)); PIPE ROW(spcode('zhuai', -10329)); PIPE ROW(spcode('zhuan', -10328)); PIPE ROW(spcode('zhuang', -10322)); PIPE ROW(spcode('zhui', -10315)); PIPE ROW(spcode('zhun', -10309)); PIPE ROW(spcode('zhuo', -10307)); PIPE ROW(spcode('zi', -10296)); PIPE ROW(spcode('zong', -10281)); PIPE ROW(spcode('zou', -10274)); PIPE ROW(spcode('zu', -10270)); PIPE ROW(spcode('zuan', -10262)); PIPE ROW(spcode('zui', -10260)); PIPE ROW(spcode('zun', -10256)); PIPE ROW(spcode('zuo', -10254)); RETURN; END;
4.创建汉字转拼音的函数
CREATE OR REPLACE FUNCTION getSpell ( cnString IN VARCHAR2 ) RETURN VARCHAR2 IS lv_spell VARCHAR2(500); lv_temp VARCHAR2(30); lv_char VARCHAR2(30); lv_bytes VARCHAR2(200); li_bytes INTEGER; li_pos INTEGER; BEGIN --遍历传入的中文的每个字符,做相应处理 FOR i IN 1..LENGTH(cnString) LOOP --先截取出对应的字符 lv_char := SUBSTR(cnString,i,1); IF LENGTHB(lv_char) = 1 THEN --如果字节长度是1,则认为是ASCII码 lv_spell:=lv_spell||lv_char; ELSIF LENGTHB(lv_char) = 2 THEN --如果字节长度是2,则认为是ZHS16GBK编码(即编码格式为GBK) --获取汉字标码 SELECT REPLACE( SUBSTRB( DUMP(lv_char,1010), INSTRB(DUMP(lv_char,1010),'ZHS16GBK:') ), 'ZHS16GBK: ' ,'' ) INTO lv_bytes FROM DUAL; li_pos:=INSTR(lv_bytes,','); li_bytes:=SUBSTR(lv_bytes,1,li_pos-1)*256+SUBSTR(lv_bytes,li_pos+1)-256*256; --查看此汉字的表码是否在区间范围之内 IF (li_bytes < -20319 OR li_bytes > -10254) THEN --如果不在区间之内,则认为是不确定的字符集 lv_spell:=lv_spell||'?'; ELSE --从管道表中查找对应的汉字 SELECT MAX("name") INTO lv_temp FROM TABLE(getTblSpcode) WHERE "code" <= li_bytes; lv_spell:=lv_spell||lv_temp; END IF; ELSIF LENGTHB(lv_char) = 3 THEN --如果字节长度是3,则认为是UTF8编码 --先将UTF8字符集的字符串转换为ZHS16GBK,并取出存储结构 SELECT REPLACE( SUBSTRB( DUMP(CONVERT(lv_char,'ZHS16GBK','UTF8') ,1010), INSTRB(DUMP( CONVERT(lv_char,'ZHS16GBK','UTF8'),1010),'UTF8:') ), 'UTF8: ' ,'' ) INTO lv_bytes FROM DUAL; li_pos:=INSTR(lv_bytes,','); li_bytes:=SUBSTR(lv_bytes,1,li_pos-1)*256+SUBSTR(lv_bytes,li_pos+1)-256*256; --查看此汉字的表码是否在区间范围之内 IF (li_bytes < -20319 OR li_bytes > -10254) THEN --如果不在区间之内,则认为是不确定的字符集 lv_spell:=lv_spell||'?'; ELSE --从管道表中查找对应的汉字 SELECT MAX("name") INTO lv_temp FROM TABLE(getTblSpcode) WHERE "code" <= li_bytes; lv_spell:=lv_spell||lv_temp; END IF; END IF; END LOOP; RETURN lv_spell; END;
5.测试一下
SELECT getSpell('汉字转拼音') FROM DUAL