package com.trs.infra.util;
import java.io.IOException;
public class CharacterConvertor {
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
}
static public String convertUTF8String2Unicode(String instr)
throws IOException {
// byte[] strbytes = instr.getBytes();
int charindex = instr.length();
int actualValue;
int inputValue;
StringBuffer sbtemp = new StringBuffer();
for (int i = 0; i < charindex;) {
actualValue = -1;
inputValue = instr.charAt(i++);
inputValue &= 0xff;
if ((inputValue & 0x80) == 0) {
actualValue = inputValue;
} else if ((inputValue & 0xF8) == 0xF0) {
actualValue = (inputValue & 0x1f) << 18;
int nextByte = instr.charAt(i++) & 0xff;
if ((nextByte & 0xC0) != 0x80)
throw new IOException("Invalid UTF-8 format");
actualValue += (nextByte & 0x3F) << 12;
nextByte = instr.charAt(i++) & 0xff;
if ((nextByte & 0xC0) != 0x80)
throw new IOException("Invalid UTF-8 format");
actualValue += (nextByte & 0x3F) << 6;
nextByte = instr.charAt(i++) & 0xff;
if ((nextByte & 0xC0) != 0x80)
throw new IOException("Invalid UTF-8 format");
actualValue += (nextByte & 0x3F);
} else if ((inputValue & 0xF0) == 0xE0) {
actualValue = (inputValue & 0x1f) << 12;
int nextByte = instr.charAt(i++) & 0xff;
if ((nextByte & 0xC0) != 0x80)
throw new IOException("Invalid UTF-8 format");
actualValue += (nextByte & 0x3F) << 6;
nextByte = instr.charAt(i++) & 0xff;
if ((nextByte & 0xC0) != 0x80)
throw new IOException("Invalid UTF-8 format");
actualValue += (nextByte & 0x3F);
} else if ((inputValue & 0xE0) == 0xC0) {
actualValue = (inputValue & 0x1f) << 6;
int nextByte = instr.charAt(i++) & 0xff;
if ((nextByte & 0xC0) != 0x80)
throw new IOException("Invalid UTF-8 format");
actualValue += (nextByte & 0x3F);
}
sbtemp.append((char) actualValue);
}
return sbtemp.toString();
}
/**
* Unicode是双字节编码,一个字符占2个字节 <br>
* UCS-2(Unicode-16):2个字节字符编码 <br>
* UCS-4(Unicode-32):4个字节字符编码
*
* @param instr
* @return
*/
public static byte[] convertUnicode2UTF8Byte(String instr) {
int len = instr.length();
byte[] abyte = new byte[len << 2];//字符长度的四倍
int j = 0;
for (int i = 0; i < len; i++) {
char c = instr.charAt(i);
if (c < 0x80) {
abyte[j++] = (byte) c;
} else if (c < 0x0800) {
abyte[j++] = (byte) (((c >> 6) & 0x1F) | 0xC0);
abyte[j++] = (byte) ((c & 0x3F) | 0x80);
} else if (c < 0x010000) {
abyte[j++] = (byte) (((c >> 12) & 0x0F) | 0xE0);
abyte[j++] = (byte) (((c >> 6) & 0x3F) | 0x80);
abyte[j++] = (byte) ((c & 0x3F) | 0x80);
} else if (c < 0x200000) {
abyte[j++] = (byte) (((c >> 18) & 0x07) | 0xF8);
abyte[j++] = (byte) (((c >> 12) & 0x3F) | 0x80);
abyte[j++] = (byte) (((c >> 6) & 0x3F) | 0x80);
abyte[j++] = (byte) ((c & 0x3F) | 0x80);
}
}
byte[] retbyte = new byte[j];
for (int i = 0; i < j; i++) {
retbyte[i] = abyte[i];
}
return retbyte;
}
/**
* Unicode是双字节编码,一个字符占2个字节 IDSO 是单字节编码
*
* @param myByte
* @return
*/
public static String ISO106462Unicode(byte[] myByte) {
String result = new String("");
StringBuffer sb = new StringBuffer("");
try {
/* 将字符串转换成byte数组 */
// byte[] myByte= str.getBytes("ISO10646");
int len = myByte.length;
for (int i = 0; i < len; i = i + 2) {
byte hiByte = myByte[i]; // 高八位
byte loByte = myByte[i + 1]; // 低八位
int ch = (int) hiByte << 8;
ch = ch & 0xff00;
ch += (int) loByte & 0xff;
sb.append((char) ch);
}
result = new String(sb.toString());
} catch (Exception e) {
System.out.println("Encoding Error");
}
return result;
}
/**
* Unicode是双字节编码,一个字符占2个字节
*
* @param s
* @return
*/
public static byte[] Unicode2Byte(String s) {
int len = s.length();
byte abyte[] = new byte[len << 1];
int j = 0;
for (int i = 0; i < len; i++) {
char c = s.charAt(i);
abyte[j++] = (byte) (c & 0xff);
abyte[j++] = (byte) (c >> 8);
}
return abyte;
}
}