jsoup爬虫图文实战

jsoup爬虫实战

爬取地址http://wufazhuce.com/

这是一个很不错的网站,每天会更新一个鸡汤以及一幅配图,下面是爬取近7天的鸡汤以及配图的实战。

  • 完整接口示范

    import com.example.api.util.Res;
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.nodes.Element;
    import org.jsoup.select.Elements;
    import org.springframework.stereotype.Controller;
    import org.springframework.web.bind.annotation.RequestMapping;
    import org.springframework.web.bind.annotation.ResponseBody;
    
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.LinkedHashMap;
    
    /**
     * @ClassName: OneGe
     * @Auyher: Ktry
     * @Date: 2020/3/19 23:29
     */
    @Controller
    public class OneGe {
    
        /**
         *
         * @return 返回最近7天的短句及配图
         * @throws IOException
         */
        @ResponseBody
        @RequestMapping("imgText")
        public Res imgText() {
            ArrayList arrayList = new ArrayList();
    
          try {
              doc = Jsoup.connect("http://wufazhuce.com/").get();
          } catch (IOException e) {
              return new Res("-1","访问错误");
          }
            Element body = doc.getElementById("carousel-one");
    
            Elements imgs = body.getElementsByClass("fp-one-imagen");
            Elements texts = body.getElementsByClass("fp-one-cita");
            Elements times1 = body.getElementsByClass("dom");
            Elements times2 = body.getElementsByClass("may");
    
            int i = 0;
            for (Element elem : imgs) {
                LinkedHashMap map = new LinkedHashMap<>();
    
                map.put("img",imgs.get(i).attr("src"));
                map.put("text",texts.get(i).text());
                map.put("time",times1.get(i).text() +" "+ times2.get(i).text());
                arrayList.add(map);
    
                i++;
            }
    
            return new Res("200",arrayList);
    
        }
    }
    

    结果集实体类

    import lombok.AllArgsConstructor;
    import lombok.Data;
    import lombok.NoArgsConstructor;
    import lombok.ToString;
    
    /**
     * @ClassName: Res
     * @Auyher: Ktry
     * @Date: 2020/3/19 23:57
     */
    @Data
    @AllArgsConstructor
    @NoArgsConstructor
    @ToString
    public class Res{
        private String code;
        private Object data;
    }
    

    效果

    {
      "code": "200",
      "data": [
        {
          "img": "http://image.wufazhuce.com/FtwQJesJhVV0Ho_iaanwPF4QnDPw",
          "text": "我并不期待人生可以过得很顺利,但我希望碰到人生难关的时候,自己可以是它的对手。",
          "time": "21 Mar 2020"
        },
        {
          "img": "http://image.wufazhuce.com/FobC3u_uHKxmnc8gf_kOc6loL-gv",
          "text": "当一个人不能拥有的时候,他唯一能做的便是不要忘记。",
          "time": "20 Mar 2020"
        },
        {
          "img": "http://image.wufazhuce.com/Fp-WZpBGvXVtnDTpIH3IuQDtnAQN",
          "text": "改变心态只需一分钟,而这一分钟却能改变一整天。",
          "time": "19 Mar 2020"
        },
        {
          "img": "http://image.wufazhuce.com/Fm-faU1mWIBGdREYoq_SxbueMx8q",
          "text": "我们每个人都是宇宙的囚徒。",
          "time": "18 Mar 2020"
        },
        {
          "img": "http://image.wufazhuce.com/Frjvh22RpfARajcvPKinwhwsPHOM",
          "text": "对世间的一切事物报以虚无的态度其实是轻松的,真正困难的是如何勇敢地介入其中。​​​",
          "time": "17 Mar 2020"
        },
        {
          "img": "http://image.wufazhuce.com/Fnpd4sv1WSdFfTZ7pFO-I9fD2610",
          "text": "有人总说:已经晚了。实际上,现在就是最好的时光。对于一个真正有所追求的人来说,生命的每个时期都是年轻的、及时的。",
          "time": "16 Mar 2020"
        },
        {
          "img": "http://image.wufazhuce.com/Fj8isdfGOFm9RQULX4p41wPsG9JW",
          "text": "维持日常生活,就是抗压的最好药方。",
          "time": "15 Mar 2020"
        }
      ]
    }
    

你可能感兴趣的:(jsoup爬虫图文实战)