银行系统中有的核心Core banking采用AS/400系统,而AS/400采用EBCDIC编码,所以渠道整合的时候就会涉及到转码的问题。避免冷僻字的问题, 采用GBK字符集
package com.ebcdic; import java.io.BufferedReader; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.UnsupportedEncodingException; import java.util.LinkedList; import java.util.Queue; import java.util.TreeMap; /** * * @author Kevin * @since jdk1.5 * */ public class EbcdicGbkConverter { final static String GBK_FILE="GBK_CVT.txt"; final static String ebcdicCharsetName="Cp1047"; private static TreeMap <Character,Integer> gbk2Ebcdic=new TreeMap<Character,Integer> (); private static TreeMap <Integer,Character> ebcdic2Gbk=new TreeMap<Integer,Character> (); private static char gbk2EbcdicChar[] ; private static int gbk2EbcdicInt[]; private static char ebcdic2GbkChar[] ; private static int ebcdic2GbkInt[]; static { try { initEbcdicGBKMapping(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } /** * Initial the EBCDIC and GBK mapping * @throws IOException */ private static void initEbcdicGBKMapping() throws IOException{ InputStream input=EbcdicGbkConverter.class.getResource(GBK_FILE).openStream(); BufferedReader reader=new BufferedReader(new InputStreamReader(input)); String record; while((record=reader.readLine())!=null){ char gbkChar=record.charAt(0); String ebcdicChar=record.substring(1, 5); Integer ebcdicInt=Integer.valueOf(ebcdicChar,16); gbk2Ebcdic.put(gbkChar, ebcdicInt); ebcdic2Gbk.put(ebcdicInt, gbkChar); } gbk2EbcdicChar=new char[gbk2Ebcdic.size()]; gbk2EbcdicInt=new int[gbk2Ebcdic.size()]; int index=0; for(Character c:gbk2Ebcdic.keySet()){ gbk2EbcdicChar[index]=c.charValue(); gbk2EbcdicInt[index++]=gbk2Ebcdic.get(c).intValue(); } ebcdic2GbkChar=new char[ebcdic2Gbk.size()]; ebcdic2GbkInt=new int[ebcdic2Gbk.size()]; index=0; for(Integer i:ebcdic2Gbk.keySet()){ ebcdic2GbkChar[index]=ebcdic2Gbk.get(i).charValue(); ebcdic2GbkInt[index++]=i.intValue(); } } private static boolean isEnglishChar(char c){ if(c <0xFF ) return true; else return false; } /** * convert the string into EBCDIC format * @throws IOException */ public static byte[] stringToEbcdic(String data) throws IOException{ ByteArrayOutputStream out=new ByteArrayOutputStream(); char dataChar[]=data.toCharArray(); Queue<Integer> gbkQueue= new LinkedList<Integer>(); String eng; for(int i=0;i<dataChar.length;i++){ char c=dataChar[i]; if(isEnglishChar(c)){ if(gbkQueue.size()>0){ readGBKQueue(out,gbkQueue); } eng=new String(new char[]{c}); byte buff[]=eng.getBytes(ebcdicCharsetName); out.write(buff); }else{ //the Dichotomy is faster than hash table //seaqueue.add(gbk2Ebcdic.get(c)); //Add the Chinese Character in the queue first gbkQueue.add(midSearch(c)); } } if(gbkQueue.size()>0){ readGBKQueue(out,gbkQueue); } return out.toByteArray(); } /** * The Chinese character start with '0x0e' and end with '0x0f' in EBCDIC character set * @param out * @param queue * @throws IOException */ private static void readGBKQueue(OutputStream out,Queue <Integer>queue) throws IOException{ out.write(0x0e); Integer data; while((data=queue.poll())!=null){ int dataInt=data.intValue(); out.write((dataInt>>8)&0xff); out.write(dataInt&0xff); } out.write(0x0f); } /** * Dichotomy to search * @param c * @return */ private static int midSearch(char c){ int start=0; int end=gbk2EbcdicChar.length-1; while(start<=end){ int mid=(start+end)>>1; char midChar=gbk2EbcdicChar[mid]; if(c>midChar){ start=mid +1 ; }else if(c<midChar){ end=mid -1 ; }else{ return gbk2EbcdicInt[mid]; } } return -1; } /** * Dichotomy to search * @param c * @return */ private static char midSearchEbcdicToGBK(int c){ int start=0; int end=ebcdic2GbkInt.length-1; while(start<=end){ int mid=(start+end)>>1; int midValue=ebcdic2GbkInt[mid]; if(c>midValue){ start=mid +1 ; }else if(c<midValue){ end=mid -1 ; }else{ return ebcdic2GbkChar[mid]; } } return ' '; } /** * Convert the EBCDIC to GBK format String * @param data * @return * @throws UnsupportedEncodingException */ public static String ebcdicToGBK(byte data[]) throws UnsupportedEncodingException{ Queue<Integer> ebcdicQueue= new LinkedList<Integer>(); StringBuffer buff=new StringBuffer(); for(int i=0;i<data.length;i++){ int b=data[i]&0xff; if(b==0x0e){ ebcdicQueue.add(data[++i]&0xff); }else if(b==0x0f){ readEbcdicQueue(buff,ebcdicQueue); }else{ if(ebcdicQueue.size()==0) buff.append(new String(new byte[]{data[i]}, ebcdicCharsetName)); else ebcdicQueue.add(data[i]&0xff); } } return buff.toString(); } private static void readEbcdicQueue(StringBuffer buff,Queue <Integer>ebcdicQueue){ Integer data1; while((data1=ebcdicQueue.poll())!=null){ int data2=ebcdicQueue.poll(); int data=(data1<<8)+data2; char c=midSearchEbcdicToGBK(data); buff.append(c); } } /** * Convert the string to 16 Radix format */ public static String str2HexStr(String str) { return byte2HexStr(str.getBytes()); } public static String byte2HexStr(byte data[]) { char[] chars = "0123456789abcdef".toCharArray(); StringBuilder sb = new StringBuilder(""); byte[] bs = data; int bit; for (int i = 0; i < bs.length; i++) { bit = (bs[i] & 0x0f0) >> 4; sb.append(chars[bit]); bit = bs[i] & 0x0f; sb.append(chars[bit]); sb.append(" "); } return sb.toString(); } /** * @param args * @throws IOException */ public static void main(String[] args) throws IOException { // TODO Auto-generated method stub EbcdicGbkConverter con=new EbcdicGbkConverter(); byte buff[]=null; buff=EbcdicGbkConverter.stringToEbcdic("AS400 Ebcdic Converter Testing"); System.out.println("EBCDIC:["+byte2HexStr(buff)+"]"); String st=EbcdicGbkConverter.ebcdicToGBK(buff); System.out.println("Eng:"+st); buff=EbcdicGbkConverter.stringToEbcdic("AS400 Ebcdic Converter Testing 冷僻字测试-镕"); System.out.println("EBCDIC:["+byte2HexStr(buff)+"]"); st=EbcdicGbkConverter.ebcdicToGBK(buff); System.out.println("GBK:"+st); } }
备注:
GBK_CVT.txt 文件
国4d9bb9fa
第一位:中文字符
第二位到第五位:16进制的EBCDIC码
第六位到第九位:16进制的GBK码
附录: GBK同EBCDIC的对应表文件GBK_CVT.txt 从如下URL下载
http://www.900.ibm.com/cn/support/viewdoc/detail?DocId=2222001000000