利用Java的okHttp和Jtable实现简单网络爬虫和显示

任务:

1.爬取网页元素:https://www.angular.cn/events

2.找到网页中的table内容

3.利用Java的Jtable进行显示

import okhttp3.*;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import javax.swing.*;
import java.awt.*;
import java.util.ArrayList;

public class demo {

    public static ArrayList > list= new ArrayList >();
     /**
     * 通过url下载网页内容
     * @param url 网页链接
     * @return 返回网页所有内容
     */
    public static String downloadHtml(String url) {
        String body = null;
        OkHttpClient client = new OkHttpClient();
        //请求
        Request request = new Request.Builder().url(url).build();
        //发起请求
        try {
            Response response = client.newCall(request).execute();
            body = new String(response.body().bytes());

        } catch (Exception e) {
            e.printStackTrace();
        }
        return body;//取得目标
    }

    //找到对应元素存于list中
    public static ArrayList> Find_activity(String html) throws Exception {
        ArrayList > list = new ArrayList>();
        ArrayList list_name = new ArrayList();
        ArrayList list_place = new ArrayList();
        ArrayList list_date = new ArrayList();
        Document doc = Jsoup.parse(html);
        Elements elements = doc.getElementsByClass("is-full-width");
        for (Element element : elements) {
            if (element.text() != null && !"".equals(element.text())) {
                Elements es = element.select("tr");
                for (Element tdelement : es) {
                    Elements event = tdelement.select("th");
                    // System.out.println(event.get(0).text());
                    list_name.add(event.get(0).text());
                    Elements tdes = tdelement.select("td");
                    for(int i = 0; i < tdes.size(); i++){
                        if(i%2 == 0){
                            list_place.add(tdes.get(i).text());
                        }else{
                            list_date.add(tdes.get(i).text());
                        }
                    }
                }
            }
        }
        //list_name多于一个元素“活动”剔除
        list_name.remove(0);
        list.add(list_name);
        list.add(list_place);
        list.add(list_date);
        return list;
    }

    public ArrayList > download_getcontentes() {
        demo d = new demo();
        try {
            String body = new demo().downloadHtml("https://www.angular.cn/events");
            list = d.Find_activity(body);
        } catch (Exception e) {
        }
        return list;
    }

    static class jtable extends JFrame {
        ArrayList list1 = list.get(0);
        ArrayList list2 = list.get(1);
        ArrayList list3 = list.get(2);

        private String[][] data = {
                {list1.get(0), list2.get(0), list3.get(0)},
                {list1.get(1), list2.get(1), list3.get(1)},
                {list1.get(2), list2.get(2), list3.get(2)},
                {list1.get(3), list2.get(3), list3.get(3)},
                {list1.get(4), list2.get(4), list3.get(4)}
        };

        private String[] dataTitle = {"活动","地点","时间"};
        private JTable jtable = new JTable(data, dataTitle);
        private JScrollPane jscrollpane = new JScrollPane(jtable);

        public jtable() {
            setTitle("Angular活动");
            setBounds(500, 1000, 600, 600);
            setVisible(true);
            setFont(new Font("楷体", 0, 50));
            setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
            add(jscrollpane, BorderLayout.CENTER);
        }
    }

    public static void main(String[] args) {
        //爬取网页元素放入链表当中
        new demo().download_getcontentes();
        //用jtable显示内容
        new jtable();
    }
}	

 

你可能感兴趣的:(Java)