在操作文件时遇到的乱码问题

问题描述:想把一个txt文件中的某些太长的句子截成几段,比如行有上千字的句子我想把截断成每行100个字符。

最开始的程序是这样的:

/**
*项目:MyFlex
*$Id$
*/

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;

/**
* TODO请完善类ChangeLine的描述,请使用html格式
*
* @author dragon

*/
public class ChangeLine {

    public static void main(String[] args) throws Exception {

        File file = new File("test.txt");

        if(!file.exists() || !file.canRead()) {
            System.out.println("找不到文件:" + file.getAbsolutePath());
            return;
        }
        File outFile = new File("out.txt");
        if(!outFile.exists()) {
            outFile.createNewFile();
        }
        InputStream inStream = new FileInputStream(file);
        BufferedReader bufRdr = new BufferedReader(new InputStreamReader(inStream
        PrintWriter pw = new PrintWriter(new FileOutputStream(outFile));

        String buffer = null;
        String dest[] = null;
        while((buffer = bufRdr.readLine()) != null) {
            dest = split(buffer, 90);
            output2File(pw, dest);
        }

    }

   

    public static boolean output2File(PrintWriter pw, String[] str) throws IOException {

        for(int i = 0; i < str.length; i++) {
            pw.print(str[i]);
            System.out.println("str:" + str[i]);
        }
        return true;
    }

    public static String[] split(String src, int count) {

        int length = src.length();
        int lines = (length / count) + 1;
        String dest[] = new String[lines];

        int i;
        for(i = 0; i < lines - 1; i++) {
            dest[i] = src.substring(count * i, count * i + count)+"";
        }
        dest[lines - 1] = src.substring(count * i);
        return dest;
    }

   

}

发现打印出来的全是乱码,而且被转换后,连最基本换行符也没有了。最后发现时由于没有设置操作流的字符集,于是加上字符集:

/**
*
*项目:MyFlex
*$Id$
*/

package cn.com.talkweb;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;

/**
* TODO请完善类ChangeLine的描述,请使用html格式
*
* @author dragon

*/
public class ChangeLine {

    public static void main(String[] args) throws Exception {

        File file = new File("test.txt");

        if(!file.exists() || !file.canRead()) {
            System.out.println("找不到文件:" + file.getAbsolutePath());
            return;
        }
        File outFile = new File("out.txt");
        if(!outFile.exists()) {
            outFile.createNewFile();
        }
        InputStream inStream = new FileInputStream(file);
        BufferedReader bufRdr = new BufferedReader(new InputStreamReader(inStream, "utf8"));
        PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream(outFile), "utf8"));

        String buffer = null;
        String dest[] = null;
        while((buffer = bufRdr.readLine()) != null) {
            dest = split(buffer, 90);
            output2File(pw, dest);
        }

    }

    public static boolean output2File(PrintWriter pw, String[] str) throws IOException {

        for(int i = 0; i < str.length; i++) {
            pw.print(str[i]);
            System.out.println("str:" + str[i]);
        }
        return true;
    }

    public static String[] split(String src, int count) {

        int length = src.length();
        int lines = (length / count) + 1;
        String dest[] = new String[lines];

        int i;
        for(i = 0; i < lines - 1; i++) {
            dest[i] = src.substring(count * i, count * i + count)+"";
        }
        dest[lines - 1] = src.substring(count * i);
        return dest;
    }

}

这样在答应出来的时候就没有乱码了,但是又发现根本无法人工添加换行符。

中间我尝试先吧中文字符转换成ascii码,然后再向其添加对应的换行符,可是某些编辑器根本就不识别换行符。而且不同的操作系统和不同的编辑器都有不同的换行符。

最后,直接使用UtralEdit解决了问题。里面有一个现成功能,就是之前说的那个需求功能。

不过这个过程让我了解到了:

1、在操作文件的时候,要注意字符集的问题,以免出现乱码!

2、了解到了ascii转中文,再由中文转ascii码的过程原理:

实现程序如下:

//字符转ascii码,网上找的

private static String convert(String str) {

        String tmp;
        StringBuffer sb = new StringBuffer(1000);
        char c;
        int i, j;
        sb.setLength(0);
        for(i = 0; i < str.length(); i++) {
            c = str.charAt(i);
            if(c > 255) {
                sb.append("//u");
                j = (c >>> 8);
                tmp = Integer.toHexString(j);
                if(tmp.length() == 1)
                    sb.append("0");
                sb.append(tmp);
                j = (c & 0xFF);
                tmp = Integer.toHexString(j);
                if(tmp.length() == 1)
                    sb.append("0");
                sb.append(tmp);
            } else {
                sb.append(c);
            }

        }
        return (new String(sb));
    }

//ascii码转字符,自己写的

    private static String reverse(String str) {

        String tmp;
        StringBuffer sb = new StringBuffer(1000);
        char c;
        int i, j, readed = 0;
        sb.setLength(0);
        String current;
        for(readed = 0; readed < str.length();) {
            if(readed + 6 <= str.length())
                current = str.substring(readed + 0, readed + 6);
            else
                current = str.substring(readed + 0, readed + 1);

            if(current.startsWith("//u")) {
                readed += 6;
                j = Integer.valueOf("" + current.substring(2, 4), 16);
                i = Integer.valueOf("" + current.substring(4, 6), 16);
                sb.append((char) ((j << 8) + i));
            } else {
                current = str.substring(readed + 0, readed + 1);
                readed += 1;
                sb.append(current);
            }

        }
        return (new String(sb));
    }

 

最后还是成功了:

/**
*
*项目:MyFlex
*$Id$
*/

package cn.com.talkweb;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;

/**
* TODO请完善类ChangeLine的描述,请使用html格式
*
* @author dragon

*/
public class ChangeLine {

    public static void main(String[] args) throws Exception {

        File file = new File("test.txt");

        if(!file.exists() || !file.canRead()) {
            System.out.println("找不到文件:" + file.getAbsolutePath());
            return;
        }
        File outFile = new File("out.txt");
        if(!outFile.exists()) {
            outFile.createNewFile();
        }
        InputStream inStream = new FileInputStream(file);
        BufferedReader bufRdr = new BufferedReader(new InputStreamReader(inStream, "utf8"));
        PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream(outFile), "utf8"));

        String buffer = null;
        String dest[] = null;
        while((buffer = bufRdr.readLine()) != null) {
            dest = split(buffer, 90);
            output2File(pw, dest);
        }

    }

    public static boolean output2File(PrintWriter pw, String[] str) throws IOException {

        for(int i = 0; i < str.length; i++) {
            pw.println(str[i]);//改了这里
            System.out.println("str:" + str[i]);
        }
        return true;
    }

    public static String[] split(String src, int count) {

        int length = src.length();
        int lines = (length / count) + 1;
        String dest[] = new String[lines];

        int i;
        for(i = 0; i < lines - 1; i++) {
            dest[i] = src.substring(count * i, count * i + count)+"";

            //标记处:dest[i] = src.substring(count * i, count * i + count)+(char)13+(char)10;要期中的10和13好像都可以,不过会有多余的换行符,有些只要一个也会有多余的换行操作
        }
        dest[lines - 1] = src.substring(count * i);
        return dest;
    }

}

其实也可以在“标记处”换成注释里的句子也是可以的!

你可能感兴趣的:(string,import,file,buffer,exception,output)