关于gb2312,unicode,utf-8的一些资料,大家请自行搜索。一下列举几个比较好的资源网址。
http://baike.baidu.com/view/25492.htm
http://www.utf.com.cn/article/s45
http://www.utf.com.cn/article/s74
http://www.haiyan.com/steelk/navigator/ref/gb2312/gbindex.htm
要点:
1,gb2312于unicode或者utf-8之间并不存在直接的映射关系。所以我们只能通过查表法来进行转换。
2,utf-8是unicode用于网络传输的一种形式,它与unicode之间是可以通过运算来进行转换的。
3,j2me环境使用的都是utf-8编码,但是请注意,j2me中的utf-8编码比较特殊,在整个编码前面对了两个字节,用于存放字符串的长度。
过程:
1,制作映射表gb2312-unicode,应为汉字的unicode比utf-8要小,这样做出的表也会小一些,而且对于unicode的可扩展性也强一些。
2,先将gb2312编码串通过查表,转换为unicode。
3,然后通过运算,将unicode转换为utf-8,以便在j2me环境下使用。
我修改了Herong Yang大侠的一个映射表生成函数,原文请参考 http://www.herongyang.com/gb2312/gb2312_unicode.html
它的作用是生成一个二进制的gb2312到unicode的查找表,它按照gb2312的分区,分块特性,将其对应的unicode按顺序存入指定的位置。
这样我们只需要根据gb2312的编码,计算出索引就可以获取编码对应的unicode了。
由于是修改的代码,没脸贴出来,大家有需求可以直接参考Herong Yang的文章,然后根据自己需求修改并生成自己的映射表。
这里我把自己这个转换表文件以及访问代码公开。
http://download.csdn.net/source/263609
转帖请注明。这是个傻瓜化的代码,在java中给它gb2312的byte数组,它就给你构造出字符串。
用在不支持gb2312的手机上非常方便。这个转换表的大小是15228byte,对j2me来说还是可以接受的。
如果有朋友需要沟通,可以发邮件到[email protected]
import
java.io.InputStream;
public class HGB2312 {
private byte [] map = new byte [ 15228 ];
private byte [] buffer;
private int index;
public HGB2312() throws Exception {
InputStream is = getClass().getResourceAsStream( " /gb2u.dat " );
is.read(map);
is.close();
}
public String gb2utf8( byte [] gb) throws Exception {
buffer = new byte [gb.length + gb.length / 2 + 3 ];
index = 0 ;
int c, h, l, ind;
for ( int i = 0 ; i < gb.length;) {
if (gb[i] >= 0 ) {
fillBuffer(gb[i ++ ]);
} else {
h = 256 + gb[i ++ ];
l = 256 + gb[i ++ ];
h = h - 0xA0 - 1 ;
l = l - 0xA0 - 1 ;
if (h < 9 ) {
ind = (h * 94 + l) << 1 ;
c = (byte2Int(map[ind]) << 8 | byte2Int(map[ind + 1 ]));
fillBuffer(c);
} else if (h >= 9 && h <= 14 ) {
fillBuffer( 0 );
} else if (h > 14 ) {
h -= 6 ;
ind = (h * 94 + l) << 1 ;
c = (byte2Int(map[ind]) << 8 | byte2Int(map[ind + 1 ]));
fillBuffer(c);
} else {
fillBuffer( 0 );
}
}
}
// ind = index - 2;
// h = (byte) ((ind >> 8) & 0x7F);
// l = (byte) (ind & 0xFF);
// buffer[0] = h;
// buffer[1] = l;
return new String(buffer, 0 , index, " UTF-8 " );
}
private void fillBuffer( int value) {
if (value <= 0x0000007F ) {
buffer[index ++ ] = ( byte ) value;
} else if (value >= 0x00000080 && value <= 0x000007FF ) {
byte b1 = ( byte ) ( 0x60 | (value >> 6 ));
byte b2 = ( byte ) ( 0x80 | (value & 0x3F ));
buffer[index ++ ] = b1;
buffer[index ++ ] = b2;
} else if (value >= 0x00000800 && value <= 0x0000FFFF ) {
byte b1 = ( byte ) ( 0xE0 | (value >> 12 ));
byte b2 = ( byte ) ( 0x80 | ((value >> 6 ) & 0x3F ));
byte b3 = ( byte ) ( 0x80 | (value & 0x3F ));
buffer[index ++ ] = b1;
buffer[index ++ ] = b2;
buffer[index ++ ] = b3;
} else if (value >= 0x00010000 && value <= 0x001FFFFF ) {
byte b1 = ( byte ) ( 0x1E | (value >> 18 ));
byte b2 = ( byte ) ( 0x80 | ((value >> 12 ) & 0x3F ));
byte b3 = ( byte ) ( 0x80 | ((value >> 6 ) & 0x3F ));
byte b4 = ( byte ) ( 0x80 | (value & 0x3F ));
buffer[index ++ ] = b1;
buffer[index ++ ] = b2;
buffer[index ++ ] = b3;
buffer[index ++ ] = b4;
} else if (value >= 0x00200000 && value <= 0x03FFFFFF ) {
byte b1 = ( byte ) ( 0x3E | (value >> 24 ));
byte b2 = ( byte ) ( 0x80 | ((value >> 18 ) & 0x3F ));
byte b3 = ( byte ) ( 0x80 | ((value >> 12 ) & 0x3F ));
byte b4 = ( byte ) ( 0x80 | ((value >> 6 ) & 0x3F ));
byte b5 = ( byte ) ( 0x80 | (value & 0x3F ));
buffer[index ++ ] = b1;
buffer[index ++ ] = b2;
buffer[index ++ ] = b3;
buffer[index ++ ] = b4;
buffer[index ++ ] = b5;
} else if (value >= 0x04000000 && value <= 0x7FFFFFFF ) {
byte b1 = ( byte ) ( 0x7E | (value >> 30 ));
byte b2 = ( byte ) ( 0x80 | ((value >> 24 ) & 0x3F ));
byte b3 = ( byte ) ( 0x80 | ((value >> 18 ) & 0x3F ));
byte b4 = ( byte ) ( 0x80 | ((value >> 12 ) & 0x3F ));
byte b5 = ( byte ) ( 0x80 | ((value >> 6 ) & 0x3F ));
byte b6 = ( byte ) ( 0x80 | (value & 0x3F ));
buffer[index ++ ] = b1;
buffer[index ++ ] = b2;
buffer[index ++ ] = b3;
buffer[index ++ ] = b4;
buffer[index ++ ] = b5;
buffer[index ++ ] = b6;
}
}
private int byte2Int( byte b) {
if (b < 0 ) {
return 256 + b;
} else {
return b;
}
}
}
public class HGB2312 {
private byte [] map = new byte [ 15228 ];
private byte [] buffer;
private int index;
public HGB2312() throws Exception {
InputStream is = getClass().getResourceAsStream( " /gb2u.dat " );
is.read(map);
is.close();
}
public String gb2utf8( byte [] gb) throws Exception {
buffer = new byte [gb.length + gb.length / 2 + 3 ];
index = 0 ;
int c, h, l, ind;
for ( int i = 0 ; i < gb.length;) {
if (gb[i] >= 0 ) {
fillBuffer(gb[i ++ ]);
} else {
h = 256 + gb[i ++ ];
l = 256 + gb[i ++ ];
h = h - 0xA0 - 1 ;
l = l - 0xA0 - 1 ;
if (h < 9 ) {
ind = (h * 94 + l) << 1 ;
c = (byte2Int(map[ind]) << 8 | byte2Int(map[ind + 1 ]));
fillBuffer(c);
} else if (h >= 9 && h <= 14 ) {
fillBuffer( 0 );
} else if (h > 14 ) {
h -= 6 ;
ind = (h * 94 + l) << 1 ;
c = (byte2Int(map[ind]) << 8 | byte2Int(map[ind + 1 ]));
fillBuffer(c);
} else {
fillBuffer( 0 );
}
}
}
// ind = index - 2;
// h = (byte) ((ind >> 8) & 0x7F);
// l = (byte) (ind & 0xFF);
// buffer[0] = h;
// buffer[1] = l;
return new String(buffer, 0 , index, " UTF-8 " );
}
private void fillBuffer( int value) {
if (value <= 0x0000007F ) {
buffer[index ++ ] = ( byte ) value;
} else if (value >= 0x00000080 && value <= 0x000007FF ) {
byte b1 = ( byte ) ( 0x60 | (value >> 6 ));
byte b2 = ( byte ) ( 0x80 | (value & 0x3F ));
buffer[index ++ ] = b1;
buffer[index ++ ] = b2;
} else if (value >= 0x00000800 && value <= 0x0000FFFF ) {
byte b1 = ( byte ) ( 0xE0 | (value >> 12 ));
byte b2 = ( byte ) ( 0x80 | ((value >> 6 ) & 0x3F ));
byte b3 = ( byte ) ( 0x80 | (value & 0x3F ));
buffer[index ++ ] = b1;
buffer[index ++ ] = b2;
buffer[index ++ ] = b3;
} else if (value >= 0x00010000 && value <= 0x001FFFFF ) {
byte b1 = ( byte ) ( 0x1E | (value >> 18 ));
byte b2 = ( byte ) ( 0x80 | ((value >> 12 ) & 0x3F ));
byte b3 = ( byte ) ( 0x80 | ((value >> 6 ) & 0x3F ));
byte b4 = ( byte ) ( 0x80 | (value & 0x3F ));
buffer[index ++ ] = b1;
buffer[index ++ ] = b2;
buffer[index ++ ] = b3;
buffer[index ++ ] = b4;
} else if (value >= 0x00200000 && value <= 0x03FFFFFF ) {
byte b1 = ( byte ) ( 0x3E | (value >> 24 ));
byte b2 = ( byte ) ( 0x80 | ((value >> 18 ) & 0x3F ));
byte b3 = ( byte ) ( 0x80 | ((value >> 12 ) & 0x3F ));
byte b4 = ( byte ) ( 0x80 | ((value >> 6 ) & 0x3F ));
byte b5 = ( byte ) ( 0x80 | (value & 0x3F ));
buffer[index ++ ] = b1;
buffer[index ++ ] = b2;
buffer[index ++ ] = b3;
buffer[index ++ ] = b4;
buffer[index ++ ] = b5;
} else if (value >= 0x04000000 && value <= 0x7FFFFFFF ) {
byte b1 = ( byte ) ( 0x7E | (value >> 30 ));
byte b2 = ( byte ) ( 0x80 | ((value >> 24 ) & 0x3F ));
byte b3 = ( byte ) ( 0x80 | ((value >> 18 ) & 0x3F ));
byte b4 = ( byte ) ( 0x80 | ((value >> 12 ) & 0x3F ));
byte b5 = ( byte ) ( 0x80 | ((value >> 6 ) & 0x3F ));
byte b6 = ( byte ) ( 0x80 | (value & 0x3F ));
buffer[index ++ ] = b1;
buffer[index ++ ] = b2;
buffer[index ++ ] = b3;
buffer[index ++ ] = b4;
buffer[index ++ ] = b5;
buffer[index ++ ] = b6;
}
}
private int byte2Int( byte b) {
if (b < 0 ) {
return 256 + b;
} else {
return b;
}
}
}