Jsoup获取csdn文章链接和标题的方法

附件:jsoup-1.8.1.jar



import org.jsoup.Jsoup;
import org.jsoup.helper.Validate;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;

/** * Example program to list links from a URL. */
public class ListLinksFromURL {
    public static void main(String[] args) throws IOException {
        //Validate.isTrue(args.length == 1, "usage: supply url to fetch");

   String url="http://blog.csdn.net/fei20121106/article/category/2924169";
        print("Fetching %s...", url);

        reslove(url);
    }

    public static void reslove(String url) throws IOException {

        Document doc = Jsoup.connect(url).userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31").get();

        Element results=doc.getElementById("article_list");

        Elements links = results.select("a[href]");
        Elements media = results.select("[src]");
        Elements imports = results.select("link[href]");

        print("\nLinks: (%d)", links.size());
/* for (Element link : links) { if(!link.text().equals("阅读")&&!link.text().equals("评论")) print("-[%s](%s)",trim(link.text(), 35),link.attr("abs:href")); }*/
      for (int i=links.size()-1;i>-1;--i) {
            Element link=links.get(i);
            if(!link.text().equals("阅读")&&!link.text().equals("评论")){
            print("- [%s](%s)",link.text(),link.attr("abs:href"));
           // reslovepage(link.attr("abs:href"));
            }

        }
/* print("\nText: (%d)", media.size()); print(" * %s", results.toString()); print("\nMedia: (%d)", media.size()); for (Element src : media) { if (src.tagName().equals("img")) print(" * %s: <%s> %sx%s (%s)", src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"), trim(src.attr("alt"), 20)); else print(" * %s: <%s>", src.tagName(), src.attr("abs:src")); }*/


    }


    public static void reslovepage(String url) throws IOException {

        Document doc = Jsoup.connect(url).userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31").get();

        Element results=doc.getElementById("article_content");

        Elements links = results.select("a[href]");
        Elements media = results.select("[src]");
        Elements imports = results.select("link[href]");

        print("\nLinks: (%d)", links.size());
        for (Element link : links) {
            if(!link.text().equals("copy")&&!link.text().equals("view plain"))
            print(" - [%s](%s)",trim(link.text(), 35),link.attr("abs:href"));
        }
/* for (int i=links.size()-1;i>-1;--i) { Element link=links.get(i); if(!link.text().equals("copy")&&!link.text().equals("view plain")){ print("- [%s](%s)",link.text(),link.attr("abs:href")); } }*/
/* print("\nText: (%d)", media.size()); print(" * %s", results.toString()); print("\nMedia: (%d)", media.size()); for (Element src : media) { if (src.tagName().equals("img")) print(" * %s: <%s> %sx%s (%s)", src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"), trim(src.attr("alt"), 20)); else print(" * %s: <%s>", src.tagName(), src.attr("abs:src")); }*/


    }
    private static void print(String msg, Object... args) {
        System.out.println(String.format(msg, args));
    }

    private static String trim(String s, int width) {
        if (s.length() > width)
            return s.substring(0, width-1) + ".";
        else
            return s;
    }
}

你可能感兴趣的:(Jsoup获取csdn文章链接和标题的方法)