spring boot jsoup 爬取数据

pom.xml

 <properties>
        <maven.compiler.source>17maven.compiler.source>
        <maven.compiler.target>17maven.compiler.target>
        <project.build.sourceEncoding>UTF-8project.build.sourceEncoding>
    properties>

    <dependencies>
        <dependency>
            <groupId>org.jsoupgroupId>
            <artifactId>jsoupartifactId>
            <version>1.16.1version>
        dependency>
    dependencies>

ChinaBrand.java

package com.jm.bean;

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.experimental.Accessors;

import java.util.List;

@Data
@Accessors(chain = true)
@NoArgsConstructor
@AllArgsConstructor
public class ChinaBrand {
    private String logo;
    private String name;
    private String company;
    private String area;
    private String date;
    private String industry;
    private List<String> images;
    private String info;
}

JsoupServiceImpl.java

package com.jm.service.impl.jsoup;

import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONObject;
import com.jm.bean.ChinaBrand;
import com.jm.service.i.jsoup.JsoupService;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Service;

import java.util.ArrayList;
import java.util.List;

@Slf4j
@Service
public class JsoupServiceImpl implements JsoupService {

    public static void main(String[] args) {
        JsoupService jsoupService = new JsoupServiceImpl();
        jsoupService.brand();
    }

    @Override
    public Boolean brand() {
        //分类页
        //https://www.chinapp.com/brand/184
        //品牌详情页
        //https://www.chinapp.com/pinpai/3.html


        String target = "https://www.chinapp.com/pinpai/3.html";
        try {
            Document doc = Jsoup.connect(target)
                    .ignoreContentType(true)
                    .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36")
                    .timeout(300000)
                    .header("referer", "www.chinapp.com")
                    .get();
            Elements elements = doc.select(".brandleft img");

            ChinaBrand brand = new ChinaBrand();
            //jsoup 与jquery 标签选择器 一样获取标签
            this.setLogAndName(brand, doc, ".brandleft img");
            this.setCompany(brand, doc, ".company_name_center p");
            this.setAreaAndDateAndIndustry(brand, doc, ".brandCon");
            this.setImages(brand, doc, "#paracontent img");
            this.setInfo(brand, doc, "#paracontent p");

            System.out.println(JSON.toJSONString(brand));
        } catch (Exception e) {
            e.printStackTrace();
        }
        return Boolean.FALSE;
    }

    private void setInfo(ChinaBrand brand, Document doc, String tag) {
        Elements elements = doc.select(tag);
        StringBuilder sb = new StringBuilder();
        for (Element e : elements) {
            sb.append(e.toString());
        }
        brand.setInfo(sb.toString());
    }

    private void setImages(ChinaBrand brand, Document doc, String tag) {
        List<String> images = new ArrayList<>(10);
        Elements elements = doc.select(tag);
        for (Element e : elements) {
            String image = e.attr("src");
            images.add(image);
        }
        brand.setImages(images);
    }

    private void setAreaAndDateAndIndustry(ChinaBrand brand, Document doc, String tag) {
        JSONObject json = new JSONObject();
        Elements elements = doc.select(tag);
        for (Element e : elements) {
            Element class0 = e.getElementsByClass("jiucuo").get(0);
            String area = class0.child(0).text();
            brand.setArea(area);

            Element class1 = e.getElementsByClass("jiucuo").get(1);
            String date = class1.child(0).text();
            brand.setDate(date);

            Element class2 = e.getElementsByClass("jiucuo").get(2);
            String industry = class2.child(0).text();
            brand.setIndustry(industry);
        }
    }

    private void setCompany(ChinaBrand brand, Document doc, String tag) {
        JSONObject json = new JSONObject();
        Elements elements = doc.select(tag);
        for (Element e : elements) {
            String company = e.text();
            brand.setCompany(company);
        }
    }

    public void setLogAndName(ChinaBrand brand, Document doc, String tag) {
        JSONObject json = new JSONObject();
        Elements elements = doc.select(tag);
        for (Element e : elements) {
            String log = e.attr("src");
            String name = e.attr("alt");
            brand.setLogo(log);
            brand.setName(name);
        }
    }
}

你可能感兴趣的:(spring,boot,java,后端)