字符的编码

package com.alibaba.china.gene.test;

import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;

/**
 * 模拟测试中文字符从浏览器到Web服务器Java端经过的转码过程
 */
public class UrlEncodeTest {

    public static void main(String[] args) {

        System.out.println("模拟测试中文字符从浏览器到Web服务器Java端经过的转码过程");
        System.out.println("--------------------------------------------------");
        String str = "中文";
        String strGbk = "";
        String strUtf8 = "";
        try {
            strGbk = URLEncoder.encode(str, "gbk");
            strUtf8 = URLEncoder.encode(str, "utf8");
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        System.out.print("中文原字符串:");
        System.out.println(str);
        System.out.println("浏览器会做一次编码,FireFox默认gbk、IE默认Utf-8:");
        System.out.print("中文对应gbk编码:");
        System.out.println(strGbk);
        System.out.print("中文对应utf-8编码:");
        System.out.println(strUtf8);
        System.out.println();

        System.out.println("在构造Http请求头时,系统会按特定编码转成Byte流");
        System.out.print("中文原字符串转成的Bytes流:");
        byte[] bytes = getInBytes(str);
        printBytes(bytes);
        System.out.print("中文对应gbk编码转成的Bytes流:");
        byte[] bytesGbk = getInBytes(strGbk);
        printBytes(bytesGbk);
        System.out.print("中文对应utf-8编码转成的Bytes流:");
        byte[] bytesUtf8 = getInBytes(strUtf8);
        printBytes(bytesUtf8);
        System.out.println();

        System.out.println("在发送Http请求给服务器时,做网络传输时,系统都会转成二进制编码");
        System.out.print("中文原字符串Bytes流对应二进制:");
        String[] binary = printAndGetInBinary(bytes);
        System.out.print("中文对应gbk编码Bytes流对应二进制:");
        String[] binaryGbk = printAndGetInBinary(bytesGbk);
        System.out.print("中文对应utf-8编码Bytes流对应二进制:");
        String[] binaryUtf8 = printAndGetInBinary(bytesUtf8);
        System.out.println();

        System.out.println("服务器接收到二进制,系统都会转成Bytes流");
        System.out.print("中文原字符串对应二进制还原得到Bytes流:");
        bytes = restoreBytes(binary);
        printBytes(bytes);
        System.out.print("中文对应gbk编码对应二进制还原得到Bytes流:");
        bytesGbk = restoreBytes(binaryGbk);
        printBytes(bytesGbk);
        System.out.print("中文对应utf-8编码对应二进制还原得到Bytes流:");
        bytesUtf8 = restoreBytes(binaryUtf8);
        printBytes(bytesUtf8);
        System.out.println();

        System.out.println("应用服务器如Tomcat,默认会默认编码还原成字符串编码");
        str = new String(bytes);
        strGbk = new String(bytesGbk);
        strUtf8 = new String(bytesUtf8);
        System.out.print("中文原字符串Byte流还原得到的字符串编码:");
        System.out.println(str);
        System.out.print("中文对应gbk编码Byte流还原得到的字符串编码:");
        System.out.println(strGbk);
        System.out.print("中文对应utf-8编码Byte流还原得到的字符串编码:");
        System.out.println(strUtf8);
        System.out.println();

        try {
            System.out.println("Java应用,如Webx会按指定的编码还原字符串");
            System.out.print("中文原字符串按gbk还原后:");
            System.out.println(URLDecoder.decode(str, "gbk"));
            System.out.println("这说明如果客户端不进行编码直接发送中文给服务端,会造成信息丢失");
            System.out.print("中文对应gbk编码按gbk还原后:");
            System.out.println(URLDecoder.decode(strGbk, "gbk"));
            System.out.print("中文对应utf-8编码按utf-8还原后:");
            System.out.println(URLDecoder.decode(strUtf8, "utf-8"));
            System.out.println();

            System.out.println("Webx如果与浏览器使用的编码不一致,还原出的字符串会是乱码");
            System.out.print("中文对应gbk编码按utf-8还原后:");
            System.out.println(URLDecoder.decode(strGbk, "utf-8"));
            System.out.print("中文对应utf-8编码按gbk还原后:");
            System.out.println(URLDecoder.decode(strUtf8, "gbk"));
            System.out.println();
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }

        try {
            System.out.println("数据库中会转成iso-8859-1编码");
            str = "中文";
            System.out.print("中文字符串原文:");
            System.out.println(str);
            byte[] gbkBytes = str.getBytes("gbk");
            System.out.print("中文字符串对应GBK的Byte流:");
            printBytes(gbkBytes);
            System.out.print("中文字符串对应Byte流转成的iso-8859-1格式字符串:");
            System.out.println(new String(gbkBytes, "iso-8859-1"));
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        System.out.println("--------------------------------------------------");
    }

    private static byte[] restoreBytes(String[] binary) {
        if (binary == null) {
            return new byte[0];
        }
        byte[] bytes = new byte[binary.length];
        for (int i = 0; i < binary.length; i++) {
            bytes[i] = (byte) Integer.parseInt(binary[i], 2);
        }
        return bytes;
    }

    private static String[] printAndGetInBinary(byte[] bytes) {
        if (bytes == null) {
            return new String[0];
        }
        String[] binaryStrs = new String[bytes.length];
        for (int i = 0; i < bytes.length; i++) {
            binaryStrs[i] = byte2bits(bytes[i]);
        }
        for (String string : binaryStrs) {
            System.out.print(string);
        }
        System.out.println();
        return binaryStrs;
    }

    public static String byte2bits(byte b) {
        int z = b;
        z |= 256;
        String str = Integer.toBinaryString(z);
        int len = str.length();
        return str.substring(len - 8, len);
    }

    private static void printBytes(byte[] bytes) {
        if (bytes == null) {
            return;
        }
        StringBuilder strBuilder = new StringBuilder();
        for (byte b : bytes) {
            strBuilder.append(b);
        }
        System.out.println(strBuilder.toString());
    }

    protected static byte[] getInBytes(String str) {
        if (str == null) {
            return null;
        }
        byte[] bytes = null;
        try {// 这里按iso-8859-1转成Byte流
            bytes = str.getBytes("iso-8859-1");
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        return bytes;
    }

}

你可能感兴趣的:(编码)