package bbt.servleet.request;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
public class Paqu {
public static void main(String[] args) throws IOException, InterruptedException {
query(null);
}
/**
* 查询.
*
* @param address
* @throws IOException
* @throws InterruptedException
*/
public static void query(String address) throws IOException, InterruptedException {
String defaultAddress = "http://www.23us.so/files/article/html/12/12920/4999702.html"; //默认地址
if (null != address) {
defaultAddress = address;
}
Document document = Jsoup.connect(defaultAddress).get(); // 获取链接
Element element = document.getElementById("footlink"); //取下一页,使用前台js
List
boolean isNext = false;
String nextaddress = null;
for (Node node : nodeList) {
if (node.toString().contains("下一页")) {
String pageAddress = node.attr("abs:href"); //绝对url封装
nextaddress = pageAddress;
isNext = true;
}
}
uploadPage(document);
if (isNext) {
Thread.sleep(2000); // 防止请求过繁忙,封锁ip;
query(nextaddress);
}
}
/**
* 下载.
*
* @param document
* 文件
* @throws IOException
*/
public static void uploadPage(Document document) throws IOException {
String bookTitle = document.title().replace("?", "");//截取特殊字符
String bookContent = document.getElementById("contents").html();
byte[] byteContent = bookContent.getBytes(); //获取小说正体内容
File file = new File("/Users/apple-w/Downloads/xiaoqiang/book");
if (!file.exists()) {
file.mkdirs();
}
String path = "/Users/apple-w/Downloads/xiaoqiang/book" + "/" + bookTitle.split("-")[0] + ".html";
File outputFile = new File(path);
FileOutputStream fos = new FileOutputStream(outputFile);
fos.write(byteContent);
fos.close();
System.err.println(path);
}
}