基于jsoup获取全国省市区区域编码

基于jsoup获取全国省市区区域编码

本文获取全国省市区区域编码路径:http://www.mca.gov.cn//article/sj/xzqh/2020/202006/202008310601.shtml

基于jsoup获取全国省市区区域编码_第1张图片


文章目录

  • 基于jsoup获取全国省市区区域编码
  • 前言
  • 一、jsoup是什么?
  • 二、使用步骤
    • 1.引入库
    • 2.ChinaRegionsInfo.JAVA 实体类
    • 3.爬取省市区区域代码实例
  • 总结


前言

省市区区域编码


一、jsoup是什么?

jsoup是解析HTML得

二、使用步骤

1.引入库

  <!--解析HTML-->
        <dependency>
            <groupId>org.jsoup</groupId>
            <artifactId>jsoup</artifactId>
            <version>1.11.2</version>
        </dependency>

2.ChinaRegionsInfo.JAVA 实体类

public class ChinaRegionsInfo {
     

    /**
     * 行政区域编码
     */
    private String code;

    /**
     * 行政区域名称
     */
    private String name;

    /**
     * 行政区域类型,1:省份,2:城市,3:区或者县城
     */
    private Integer type;

    /**
     * 上一级行政区域编码
     */
    private String parentCode;

    public String getCode() {
     
        return code;
    }

    public void setCode(String code) {
     
        this.code = code;
    }

    public String getName() {
     
        return name;
    }

    public void setName(String name) {
     
        this.name = name;
    }

    public Integer getType() {
     
        return type;
    }

    public void setType(Integer type) {
     
        this.type = type;
    }

    public String getParentCode() {
     
        return parentCode;
    }

    public void setParentCode(String parentCode) {
     
        this.parentCode = parentCode;
    }

    @Override
    public String toString() {
     
        return "ChinaRegionsInfo{" +
                "code='" + code + '\'' +
                ", name='" + name + '\'' +
                ", type=" + type +
                ", parentCode='" + parentCode + '\'' +
                '}';
    }
}

3.爬取省市区区域代码实例

  //需要抓取的网页地址
    private static final String URL = "http://www.mca.gov.cn//article/sj/xzqh/2020/202006/202008310601.shtml";

    public static void main(String[] args) throws IOException {
     
        List<ChinaRegionsInfo> regionsInfoList = new ArrayList<>();
        //抓取网页信息
        Document document = Jsoup.connect(URL).get();
        //获取真实的数据体
        Element element = document.getElementsByTag("tbody").get(0);
        String provinceCode = "";//省级编码
        String cityCode = "";//市级编码
        if (Objects.nonNull(element)) {
     
            Elements trs = element.getElementsByTag("tr");
            for (int i = 3; i < trs.size(); i++) {
     
                Elements tds = trs.get(i).getElementsByTag("td");
                if (tds.size() < 3) {
     
                    continue;
                }
                Element td1 = tds.get(1);//行政区域编码
                Element td2 = tds.get(2);//行政区域名称
                if (StringUtils.isNotEmpty(td1.text())) {
     
                    if (td1.classNames().contains("xl7030796")) {
     
                        if (td2.toString().contains("span")) {
     
                            //市级
                            ChinaRegionsInfo chinaRegions = new ChinaRegionsInfo();
                            chinaRegions.setCode(td1.text());
                            chinaRegions.setName(td2.text());
                            chinaRegions.setType(2);
                            chinaRegions.setParentCode(provinceCode);
                            regionsInfoList.add(chinaRegions);
                            cityCode = td1.text();
                        } else {
     
                            //省级
                            ChinaRegionsInfo chinaRegions = new ChinaRegionsInfo();
                            chinaRegions.setCode(td1.text());
                            chinaRegions.setName(td2.text());
                            chinaRegions.setType(1);
                            chinaRegions.setParentCode("");
                            regionsInfoList.add(chinaRegions);
                            provinceCode = td1.text();
                        }

                    } else {
     
                        //区或者县级
                        ChinaRegionsInfo chinaRegions = new ChinaRegionsInfo();
                        chinaRegions.setCode(td1.text());
                        chinaRegions.setName(td2.text());
                        chinaRegions.setType(3);
                        chinaRegions.setParentCode(StringUtils.isNotEmpty(cityCode) ? cityCode : provinceCode);
                        regionsInfoList.add(chinaRegions);
                    }
                }
            }
        }
        //打印结果
        System.out.println(JSONArray.toJSONString(regionsInfoList));
    }

总结

本文只是一个简单得获取省市区区域编码得案例!非原创!原创是谁俺也忘了!俺很久看见了一个博文!通过那个博文开启了俺jsoup得启蒙!感谢!

你可能感兴趣的:(java)