java char and UTF

java中的char 占两个字节,默认使用UNICODE?转换成文件保存时,根据字符范围写入到文件中。

0开使的字符为asc码的128个字符,用1byte保存;x<=0X7F

110开始的字符用2byte保存;0X7F<x<0X8F

111开始的字符用3byte保存。x>0X8F

不多说,上一段hsqldb里的代码。

public static int stringToUTFBytes(String str,
                                       HsqlByteArrayOutputStream out) {

        int strlen = str.length();
        int c,
            count  = 0;

        if (out.count + strlen + 8 > out.buffer.length) {
            out.ensureRoom(strlen + 8);
        }

        char[] arr = str.toCharArray();

        for (int i = 0; i < strlen; i++) {
            c = arr[i];

            if (c >= 0x0001 && c <= 0x007F) {
                out.buffer[out.count++] = (byte) c;

                count++;
            } else if (c > 0x07FF) {
                out.buffer[out.count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
                out.buffer[out.count++] = (byte) (0x80 | ((c >> 6) & 0x3F));
                out.buffer[out.count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
                count                   += 3;
            } else {
                out.buffer[out.count++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
                out.buffer[out.count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
                count                   += 2;
            }

            if (out.count + 8 > out.buffer.length) {
                out.ensureRoom(strlen - i + 8);
            }
        }

        return count;
    }

 

你可能感兴趣的:(java,C++,c,C#,HSQLDB)