很多图书馆网站都有较为完整的网页版本《中图分类法》,不同的分类目录都位于不同的链接网页上
为此专门写了一个遍历该种网站的Java程序,我们以“中国图书馆分类法网站”为例
数据库创建表脚本:
CREATE TABLE [zhtclass] (
[type] [nvarchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
[detail] [nvarchar] (400) COLLATE Chinese_PRC_CI_AS NULL
) ON [PRIMARY]
GO
Java代码:
import java.net.URL;
import java.net.URLConnection;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.util.Scanner;
public class Exec {
public static void main(String args[]) {
PreparedStatement pstm = null;
// 下面五个参数可以自行根据网站网页格式调整
String nullFlag = "没有下级分类";
String firstFlagAlph = "<ul id=\"list\" class=\"cent\" style=\"list-style:none;\"><li>";
String startFlagAlph = "<span class=\"code\">";
String endFlagAlph = "</span>";
String startFlagName = "\">";
String endFlagName = "</a>";
try {
Class.forName("com.microsoft.jdbc.sqlserver.SQLServerDriver");
Connection con = DriverManager
.getConnection(
"jdbc:microsoft:sqlserver://localhost:1433;DatabaseName=LIB",
"sa", "");
pstm = con.prepareStatement("insert into zhtclass values(?,?)");
} catch (Exception e) {
System.out.println(e.getLocalizedMessage());
}
for (int i = 1; i < 45837; i++) {
System.out.println(i);
try {
StringBuffer sb = new StringBuffer();
URL url = new URL("http://www.ztflh.com/?c=" + i);
URLConnection urlConn = url.openConnection();
urlConn.setReadTimeout(10000);
urlConn.setConnectTimeout(10000);
urlConn.setDoOutput(true);
urlConn.connect();
Scanner in = new Scanner(urlConn.getInputStream(),"UTF-8");
for (int n = 1; in.hasNextLine(); n++)
sb.append(in.nextLine());
if (sb.indexOf(nullFlag) < 0) {
int start = sb.indexOf(firstFlagAlph, 0)
+ firstFlagAlph.length();
int end = start;
while (true) {
if (sb.indexOf(startFlagAlph, end) < 0)
break;
start = sb.indexOf(startFlagAlph, end)
+ startFlagAlph.length();
end = sb.indexOf(endFlagAlph, start);
String alph = sb.substring(start, end).trim();
start = sb.indexOf(startFlagName, end)
+ startFlagName.length();
end = sb.indexOf(endFlagName, start);
String name = sb.substring(start, end).trim();
pstm.setString(1, alph);
pstm.setString(2, name);
pstm.execute();
}
}
url = null;
} catch (Exception e) {
i--;
System.out.println(e.getLocalizedMessage());
}
System.gc();
}
}
}