程序如下:
import java.io.*;
import java.net.URL;
import java.util.StringTokenizer;
public class HTMLParse {
//***************分析新闻文章开始***************
private static String URL = "http://sports.tom.com/1019/1042/2005313-552400.html";//(new JSpider()).getnewsLink();
// Http连接
// 获取HTML代码,分析后依次返回获取的HTML行
public static String getHtml() {
BufferedReader reader = null;
String line = "";
String title = "";
String time = "";
String content = "";
String temp = "";
try {
URL url = new URL(URL); // Create the URL
reader = new BufferedReader(new InputStreamReader(url.openStream()));
line = reader.readLine();
while(line != null) {
if (line.indexOf("<!--title-->") != -1) {
title = line;
}
if (line.indexOf("<!--time-->") != -1) {
time = line;
}
if (line.indexOf("<!--content-->") != -1) {
content = line;
}
System.out.println(line);
line = reader.readLine();
}
temp = title + time + content;
//System.out.println(temp);
} catch (Exception e) {
System.err.println(e);
} finally {
try {
reader.close();
} catch (Exception e) {
}
}
return temp;
}
// 将获取的HTML保存到文件中
public static void saveToHtml(String line) {
try {
// 获取HTML,生成一个文件
PrintWriter NewFile = new PrintWriter(new BufferedWriter(
new FileWriter(getName())));
// 向文件中写入获取的HTML
NewFile.print(line);
NewFile.close();
} catch (IOException e) {
e.printStackTrace();
}
}
// 获取要保存的文件名
public static String getName() {
StringTokenizer st = new StringTokenizer(URL, "/");
int ct = st.countTokens() - 1;
for (int i = 0; i < ct; i++) {
st.nextToken();
if (i == (ct - 1)) {
return st.nextToken();
}
}
return null;
}
//***************分析新闻文章结束***************
public static void main(String[] args) {
HTMLParse t = new HTMLParse();
//t.setURL("http://sports.tom.com/1019/1094/2005119-512557.html");
System.out.println(URL);
saveToHtml(getHtml());
}
}
如果URL地址是http://sports.tom.com/1019/1042/2005313-552400.html,那么print出来的是乱码,如果把URL改成http://www.tcren.org/bbs,print出来的就是正确的,再把URL改成http://sports.tom.com/1019/1042/2005313-552400.html,就可以print正确的了
但是如果再改成http://sports.tom.com/1019/1094/2005119-512557.html,又是乱码
如果把reader = new BufferedReader(new InputStreamReader(url.openStream()));改成reader = new BufferedReader(new InputStreamReader(url.openStream(), "gb231"));也没用
啊~~~怎么回事??