问题描述:想把一个txt文件中的某些太长的句子截成几段,比如行有上千字的句子我想把截断成每行100个字符。
最开始的程序是这样的:
/**
*项目:MyFlex
*$Id$
*/
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
/**
* TODO请完善类ChangeLine的描述,请使用html格式
*
* @author dragon
*/
public class ChangeLine {
public static void main(String[] args) throws Exception {
File file = new File("test.txt");
if(!file.exists() || !file.canRead()) {
System.out.println("找不到文件:" + file.getAbsolutePath());
return;
}
File outFile = new File("out.txt");
if(!outFile.exists()) {
outFile.createNewFile();
}
InputStream inStream = new FileInputStream(file);
BufferedReader bufRdr = new BufferedReader(new InputStreamReader(inStream
PrintWriter pw = new PrintWriter(new FileOutputStream(outFile));
String buffer = null;
String dest[] = null;
while((buffer = bufRdr.readLine()) != null) {
dest = split(buffer, 90);
output2File(pw, dest);
}
}
public static boolean output2File(PrintWriter pw, String[] str) throws IOException {
for(int i = 0; i < str.length; i++) {
pw.print(str[i]);
System.out.println("str:" + str[i]);
}
return true;
}
public static String[] split(String src, int count) {
int length = src.length();
int lines = (length / count) + 1;
String dest[] = new String[lines];
int i;
for(i = 0; i < lines - 1; i++) {
dest[i] = src.substring(count * i, count * i + count)+"";
}
dest[lines - 1] = src.substring(count * i);
return dest;
}
}
发现打印出来的全是乱码,而且被转换后,连最基本换行符也没有了。最后发现时由于没有设置操作流的字符集,于是加上字符集:
/**
*
*项目:MyFlex
*$Id$
*/
package cn.com.talkweb;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
/**
* TODO请完善类ChangeLine的描述,请使用html格式
*
* @author dragon
*/
public class ChangeLine {
public static void main(String[] args) throws Exception {
File file = new File("test.txt");
if(!file.exists() || !file.canRead()) {
System.out.println("找不到文件:" + file.getAbsolutePath());
return;
}
File outFile = new File("out.txt");
if(!outFile.exists()) {
outFile.createNewFile();
}
InputStream inStream = new FileInputStream(file);
BufferedReader bufRdr = new BufferedReader(new InputStreamReader(inStream, "utf8"));
PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream(outFile), "utf8"));
String buffer = null;
String dest[] = null;
while((buffer = bufRdr.readLine()) != null) {
dest = split(buffer, 90);
output2File(pw, dest);
}
}
public static boolean output2File(PrintWriter pw, String[] str) throws IOException {
for(int i = 0; i < str.length; i++) {
pw.print(str[i]);
System.out.println("str:" + str[i]);
}
return true;
}
public static String[] split(String src, int count) {
int length = src.length();
int lines = (length / count) + 1;
String dest[] = new String[lines];
int i;
for(i = 0; i < lines - 1; i++) {
dest[i] = src.substring(count * i, count * i + count)+"";
}
dest[lines - 1] = src.substring(count * i);
return dest;
}
}
这样在答应出来的时候就没有乱码了,但是又发现根本无法人工添加换行符。
中间我尝试先吧中文字符转换成ascii码,然后再向其添加对应的换行符,可是某些编辑器根本就不识别换行符。而且不同的操作系统和不同的编辑器都有不同的换行符。
最后,直接使用UtralEdit解决了问题。里面有一个现成功能,就是之前说的那个需求功能。
不过这个过程让我了解到了:
1、在操作文件的时候,要注意字符集的问题,以免出现乱码!
2、了解到了ascii转中文,再由中文转ascii码的过程原理:
实现程序如下:
//字符转ascii码,网上找的
private static String convert(String str) {
String tmp;
StringBuffer sb = new StringBuffer(1000);
char c;
int i, j;
sb.setLength(0);
for(i = 0; i < str.length(); i++) {
c = str.charAt(i);
if(c > 255) {
sb.append("//u");
j = (c >>> 8);
tmp = Integer.toHexString(j);
if(tmp.length() == 1)
sb.append("0");
sb.append(tmp);
j = (c & 0xFF);
tmp = Integer.toHexString(j);
if(tmp.length() == 1)
sb.append("0");
sb.append(tmp);
} else {
sb.append(c);
}
}
return (new String(sb));
}
//ascii码转字符,自己写的
private static String reverse(String str) {
String tmp;
StringBuffer sb = new StringBuffer(1000);
char c;
int i, j, readed = 0;
sb.setLength(0);
String current;
for(readed = 0; readed < str.length();) {
if(readed + 6 <= str.length())
current = str.substring(readed + 0, readed + 6);
else
current = str.substring(readed + 0, readed + 1);
if(current.startsWith("//u")) {
readed += 6;
j = Integer.valueOf("" + current.substring(2, 4), 16);
i = Integer.valueOf("" + current.substring(4, 6), 16);
sb.append((char) ((j << 8) + i));
} else {
current = str.substring(readed + 0, readed + 1);
readed += 1;
sb.append(current);
}
}
return (new String(sb));
}
最后还是成功了:
/**
*
*项目:MyFlex
*$Id$
*/
package cn.com.talkweb;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
/**
* TODO请完善类ChangeLine的描述,请使用html格式
*
* @author dragon
*/
public class ChangeLine {
public static void main(String[] args) throws Exception {
File file = new File("test.txt");
if(!file.exists() || !file.canRead()) {
System.out.println("找不到文件:" + file.getAbsolutePath());
return;
}
File outFile = new File("out.txt");
if(!outFile.exists()) {
outFile.createNewFile();
}
InputStream inStream = new FileInputStream(file);
BufferedReader bufRdr = new BufferedReader(new InputStreamReader(inStream, "utf8"));
PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream(outFile), "utf8"));
String buffer = null;
String dest[] = null;
while((buffer = bufRdr.readLine()) != null) {
dest = split(buffer, 90);
output2File(pw, dest);
}
}
public static boolean output2File(PrintWriter pw, String[] str) throws IOException {
for(int i = 0; i < str.length; i++) {
pw.println(str[i]);//改了这里
System.out.println("str:" + str[i]);
}
return true;
}
public static String[] split(String src, int count) {
int length = src.length();
int lines = (length / count) + 1;
String dest[] = new String[lines];
int i;
for(i = 0; i < lines - 1; i++) {
dest[i] = src.substring(count * i, count * i + count)+"";
//标记处:dest[i] = src.substring(count * i, count * i + count)+(char)13+(char)10;要期中的10和13好像都可以,不过会有多余的换行符,有些只要一个也会有多余的换行操作
}
dest[lines - 1] = src.substring(count * i);
return dest;
}
}
其实也可以在“标记处”换成注释里的句子也是可以的!