java 中文转换成Unicode编码和Unicode编码转换成中文

转自:一叶飘舟
http://blog.csdn.net/jdsjlzx/article/details/7058823

 

package lia.meetlucene;

import java.io.IOException;
import org.apache.lucene.index.CorruptIndexException;

public class Unicode {
    public static void main(String[] args) throws CorruptIndexException,
            IOException {
        String s = "简介";
        String tt = gbEncoding(s); // String tt1 = "你好,我想给你说一个事情";
        System.out.println("unicodeBytes is: " + tt);
        // 输出“简介”的unicode编码
        System.out.println("对应的中文: " + decodeUnicode("\\u7b80\\u4ecb")); // System.out.println(decodeUnicode(tt1));
        // 输出unicode编码对应的中文
        System.out.println("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
        System.out.println(s.indexOf("\\"));
    }

    public static String gbEncoding(final String gbString) {
        char[] utfBytes = gbString.toCharArray();
        String unicodeBytes = "";
        for (int byteIndex = 0; byteIndex < utfBytes.length; byteIndex++) {
            String hexB = Integer.toHexString(utfBytes[byteIndex]);
            if (hexB.length() <= 2) {
                hexB = "00" + hexB;
            }
            unicodeBytes = unicodeBytes + "\\u" + hexB;
        }
        return unicodeBytes;
    }

    public static String decodeUnicode(final String dataStr) {
        int start = 0;
        int end = 0;
        final StringBuffer buffer = new StringBuffer();
        while (start > -1) {
            end = dataStr.indexOf("\\u", start + 2);
            String charStr = "";
            if (end == -1) {
                charStr = dataStr.substring(start + 2, dataStr.length());
            } else {
                charStr = dataStr.substring(start + 2, end);
            }
            char letter = (char) Integer.parseInt(charStr, 16); // 16进制parse整形字符串。
            buffer.append(new Character(letter).toString());
            start = end;
        }
        return buffer.toString();
    }

}

 

 

代码详解:

    public static String decodeUnicode(final String dataStr) {
        int start = 0;
        int end = 0;
        final StringBuffer buffer = new StringBuffer();
        while (start > -1) {
            end = dataStr.indexOf("\\u", start + 1);
            //使得第一个unicode在start~end之间,+1,+2,+3均可
            System.out.println(start + "asdfasd~~~~~~~~~~~~~~~~~~~~~``" + end);
            // the index of the first occurrence of the specified substring,
            // starting at the specified index,
            // or -1 if there is no such occurrence.
            String charStr = "";
            if (end == -1) {
                charStr = dataStr.substring(start + 2, dataStr.length());
            } else {
                charStr = dataStr.substring(start + 2, end);
            }
            char letter = 0;
            if (charStr.length() == 4) {
                letter = (char) Integer.parseInt(charStr, 16); // 16进制parse整形字符串。
            }
            //防止出错
            buffer.append(new Character(letter).toString());
            start = end;
        }
        return buffer.toString();
    }

 

你可能感兴趣的:(unicode)