按照指定字符集以及字节数截取字符串

首先看下数据库字符集:

select lengthb("丁") from dual 为3则为utf-8 为2则为gbk。


package bytes;

import java.io.UnsupportedEncodingException;

public class BytesUTFAndGBK {

    /**

    * 按照指定字节数截取utf-8字符串

    * @param str

    * @param num

    * @return

    * @throws UnsupportedEncodingException

    */

    public String cutUTF(String str,int num) throws UnsupportedEncodingException {

        String result = null;

        int count = 0;

        byte[] buf = str.getBytes("utf-8");

        for (int i = num-1;i > =0;i--){

            if (buf[i] < 0){

                count++;

            }else{

                break;

            }

        }

        if (count % 3 == 0){//utf-8中文占3个字节

            result = new String(buf,0,num,"utf-8");

        }else if (count % 3 == 1){

            result = new String(buf,0,num-1,"utf-8");

        }else{

            result = new String(buf,0,num-2,"utf-8");

        }

        return result;

    }

    /**

    * 按照指定字节数截取GBK字符串

    * @param str

    * @param num

    * @return

    * @throws UnsupportedEncodingException

    */

    public String cutGBK(String str,int num) throws UnsupportedEncodingException {

        String result = null;

        int count = 0;

        byte[] buf = str.getBytes("GBK");

        for (int i = num-1;i > =0;i--){

            if (buf[i] < 0){

                count++;

            }else{

                break;

            }

        }

        if (count % 2 == 0){//GBK中文占2个字节

            result = new String(buf,0,num,"utf-8");

        }else{

            result = new String(buf,0,num-1,"GBK");

        }

        return result;

    }

}

你可能感兴趣的:(按照指定字符集以及字节数截取字符串)