Unicode汉字内码表2

BBS水木清华站∶精华区

发信人: intranetworm (小虫), 信区: Java 
标  题: Unicode 汉字与GB内码的转换 
发信站: BBS 水木清华站 (Wed Aug 27 13:44:45 1997) 
 
这是我编的一个转换程序,使用时现将前面的码表存成文件,例如table.txt 
创建一个GBUnicode实例,new GBUnicode("table.txt") 
以后调用GB2Uni和Uni2GB作内码转换.注意GB内码是用两个字节表示的. 
 
import java.io.*; 
import java.util.Hashtable; 
 
class GBUnicode{ 
        byte high[]=new byte[6763],low[]=new byte[6763]; 
        char unichar[]=new char[6763]; 
        Hashtable UniGB; 
 
        public GBUnicode(String table_file)throws IOException 
        { 
                //BufferedInputStream tables=new BufferedInputStream (new FileInputStream(table_file)); 
                DataInputStream tables=new DataInputStream (new FileInputStream(table_file)); 
                int i,n=0; 
                byte b,bl,bh,num[]=new byte[20]; 
 
                UniGB=new Hashtable(7000,1); 
                while (n<6763 ){ 
                        do{ 
                                bh=(byte)tables.read(); 
                        }while ((char)bh<=' '); //find first non-blank char 
                        bl=(byte)tables.read(); 
                        high[n]=bh; 
                        low[n]=bl; 
                        do{ 
                                b=(byte)tables.read(); 
                        }while (b!=(byte)':'); //find ':' 
                        do{ 
                                b=(byte)tables.read(); 
                        }while ((char)b<=' '); //find next non-blank char to read as number 
                        i=0; 
                        while ((char)b>='0' && (char)b<='9'){ 
                                num[i++]=b; 
                                b=(byte)tables.read(); 
                        } 
                        unichar[n]=(char)Integer.parseInt(new String(num,0,0,i)); 
                        if (UniGB.get(new Character(unichar[n]))!= null) 
                                System.out.println("Duplicated : "+unichar[n]); 
                        UniGB.put(new Character(unichar[n]),new Integer(n)); 
                        n=n+1; 
                } 
                tables.close(); 
        } 
 
         
        private int getGBindex(byte high,byte low){ 
                int i,j; 
                i=high-(byte)0xb0; 
                j=low-(byte)0xa1; 
                if (i <39) {// L1 Chinese 
                        if (j<0 || j>94) 
                                return -1; 
                        return (i*94+j); 
                } 
                else if (i==39) {//one of the last 89 L1 Chinese 
                        if (j<0 || j>89) 
                                return -1; 
                        return (i*94+j); 
                } 
                else {//L2 Chinese 
                        if (j<0 || j>94) 
                                return -1; 
                        return (i*94+j-5); 
                } 
        } 
 
        public byte[] Uni2GB(char unicode) { 
 
                Integer index=(Integer)UniGB.get(new Character(unicode)); 
                if (index==null) 
                        return null; 
                byte ch[]=new byte[2]; 
                ch[0]=high[index.intValue()]; 
                ch[1]=low[index.intValue()]; 
                return ch; 
        } 
 
        public char GB2Uni(byte high, byte low) { 
                int index=getGBindex(high,low); 
                if (index ==-1) //not GB Chinese 
                        return 0; 
                return(unichar[index]); 
        } 

 
-- 
※ 来源:·BBS 水木清华站 bbs.net.tsinghua.edu.cn·[FROM: organ.ncic.ac.c] 

BBS水木清华站∶精华区

你可能感兴趣的:(编解码)