使用Jsoup+AsyncTask爬取网页资源

笔记目标:
1.练习使用Jsoup的相关功能
2.练习使用AsyncTask异步任务
3.复习Android相关知识点

工具:
1.MyEclipse + AndroidStudio

下面贴出代码:

import java.io.IOException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

public class Test {
    private static String BASE_URL = "http://www.xxx.xxx";// 主页
    private static String DETAILS_URL = "123.htm";// 详细页
    private static int currentPageNum = 1;// 当前页
    private static int totalPageNum = 0;// 总页数
    private static int resCount = 0;// 总资源数
    private static String imageSrc = "";// 图片地址

    public static void main(String[] args) {
        try {
            Document document = Jsoup.connect(
                    BASE_URL + "/" + TYPE.anime + "/" + DETAILS_URL + "/"
                            + currentPageNum).get();
            totalPageNum = document.select("div #page-links").select("a")
                    .size() + 1;
            for (int i = currentPageNum; i <= totalPageNum; i++) {
                String fullUrl = BASE_URL + "/" + TYPE.hentai + "/"
                        + DETAILS_URL + "/" + currentPageNum;
                Document doc = Jsoup.connect(fullUrl).get();
                int size = doc.select("div .post").select("p").select("a")
                        .size();
                resCount += size;
                for (int j = 0; j < size; j++) {
                    imageSrc = doc.select("div .post").select("p a img").get(j)
                            .attr("src");
                    System.out.println(imageSrc);
                }
                currentPageNum++;
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

你可能感兴趣的:(使用Jsoup+AsyncTask爬取网页资源)