百度爬虫爬取官方失信被执行人信息

2018.9.28-修改:使用FastJson解析数据

1.爬取地址及使用的编码:

private static final String UTF_8 = "UTF-8";
private static final String SPIDER_URL = "https://sp0.baidu.com/8aQDcjqpAAV3otqbppnN2DJv/api.php";

2.爬取出数据直接返回字符串(注意HTTP传输会乱码,解决方法:http传输乱码-解决new InputStreamReader(response.getEntity().getContent(),UTF_8))

/**
 *  查询出原始数据,返回字符串
 * @param params
 * @return
 */
private static String getStringDishonest(Map params) {
    StringBuffer resultBuffer;
    String fullUrl = null;
    // 创建HTTP请求
    HttpClientBuilder httpClientBuilder = HttpClients.custom();
    HttpClient client = httpClientBuilder.build();
    BufferedReader bufferedReader = null;
    // 构建请求参数
    StringBuilder buffer = new StringBuilder();
    if (params != null && params.size() > 0) {
        for (Map.Entry entry : params.entrySet()) {
            buffer.append(entry.getKey());
            buffer.append("=");
            try {
                buffer.append(URLEncoder.encode((String) entry.getValue(), DishonestUtil.UTF_8));
            } catch (UnsupportedEncodingException e) {
                throw new MyException("失信人查询编码异常:"+e);
            }
            buffer.append("&");
        }
    }
    // 拼接查询URL
    if (buffer.length() > 0) {
        fullUrl = DishonestUtil.SPIDER_URL + "?" + buffer.substring(0, buffer.length() - 1);
    }

    // 请求拼接后的地址获取详细信息
    HttpGet httpGet = new HttpGet(fullUrl);
    try {
        // 获得响应数据
        HttpResponse response = client.execute(httpGet);
        // 读入响应数据体文本信息
        // http传输乱码-解决new InputStreamReader(response.getEntity().getContent(),UTF_8)
        bufferedReader = new BufferedReader(
                new InputStreamReader(response.getEntity().getContent(), StandardCharsets.UTF_8));
        String temp;
        resultBuffer = new StringBuffer();
        while ((temp = bufferedReader.readLine()) != null) {
            resultBuffer.append(temp);
        }
        return resultBuffer.toString();
    } catch (IOException e) {
        logger.error("失信工具IO异常:"+e.getMessage());
        throw new MyException("失信工具IO异常:"+e);
    } finally {
        if (bufferedReader != null) {
            try {
                bufferedReader.close();
            } catch (IOException e) {
                logger.error("失信工具IO关闭异常:"+e.getMessage());
            }
        }
    }
}

3.对字符串进行转换为Json然后读入类中:

/**
 * 查询失信人
 * @param performedName 名称
 * @param cardNumber 证件号码
 * @param areaName 省份
 * @return
 */
public static List listDishonest(String performedName, String cardNumber, String areaName) {

    // 查询条件
    Map map = new HashMap<>();
    map.put("resource_id", "6899");
    map.put("query", "失信被执行人名单");
    map.put("cardNum", cardNumber);
    map.put("iname", performedName);
    map.put("areaName", areaName);
    map.put("ie", "utf-8");
    map.put("oe", "utf-8");
    map.put("format", "json");
    map.put("t", "1524537973200");
    map.put("cb", "jQuery110207319777455577083_1524537959352");
    map.put("_", "1524537959354");

    // 查询结果
    String strResult = getStringDishonest(map);
    strResult = strResult.substring(strResult.indexOf("(")+1,strResult.lastIndexOf(");"));
    logger.info("FastJson转换字符串为对象:"+ JSONObject.parseObject(strResult));
    // json封装
    JSONObject firstMap = JSONObject.parseObject(strResult);
    JSONArray secondMap = (JSONArray) firstMap.get("data");
    // 返回的结果初始化列表
    List infoList = new ArrayList<>();

    if (secondMap != null && secondMap.size() > 0) {
        JSONObject thirdMap = (JSONObject) secondMap.get(0);
        JSONArray forthMap = (JSONArray) thirdMap.get("result");
        for (int i = 0; i < forthMap.size(); i++) {
            JSONObject maps = forthMap.getJSONObject(i);
            logger.info("JSON Object["+ i +"]:"+maps);

            // 自定义类封装
            DishonestCustomerInfo info = new DishonestCustomerInfo();
            info.setPerformedName(performedName);
            info.setCardNumber(cardNumber);
            info.setSex(String.valueOf(maps.get("sexy")).trim());
            info.setAge(Integer.valueOf(String.valueOf(maps.get("age")).trim()));
            info.setAreaName(String.valueOf(maps.get("areaName")).trim());
            info.setCourtName(String.valueOf(maps.get("courtName")).trim());
            info.setGistId((String.valueOf(maps.get("gistId")).trim()));
            info.setRegisterDate(String.valueOf(maps.get("regDate")).trim());
            info.setCaseCode(String.valueOf(maps.get("caseCode")).trim());
            info.setGistInstitution(String.valueOf(maps.get("gistUnit")).trim());
            info.setDuty(String.valueOf(maps.get("duty")).trim());
            info.setPerformance(String.valueOf(maps.get("performance")).trim());
            info.setConcreteReason(String.valueOf(maps.get("disruptTypeName")).trim());
            info.setType(String.valueOf(maps.get("type")).trim());
            info.setPublishedAt(Long.valueOf(String.valueOf(maps.get("publishDateStamp")).trim()+"000") );
            infoList.add(info);
        }
    }
    return infoList;
}

其中读入的类:

package model;

import lombok.Data;

import java.io.Serializable;

@Data
public class DishonestCustomerInfo implements Serializable {
    private static final long serialVersionUID = 2952636877575478687L;
    private Long id;

    private String performedName;

    private String cardNumber;

    private String sex;

    private Integer age;

    private String areaName;

    private String courtName;

    private String gistId;

    private String registerDate;

    private String caseCode;

    private String gistInstitution;

    private String duty;

    private String performance;

    private String concreteReason;

    private String type;

    private Long publishedAt;

    private Long createdAt;

    private Long updatedAt;

    public Long getId() {
        return id;
    }

    public void setId(Long id) {
        this.id = id;
    }

    public String getPerformedName() {
        return performedName;
    }

    public void setPerformedName(String performedName) {
        this.performedName = performedName == null ? null : performedName.trim();
    }

    public String getCardNumber() {
        return cardNumber;
    }

    public void setCardNumber(String cardNumber) {
        this.cardNumber = cardNumber == null ? null : cardNumber.trim();
    }

    public String getSex() {
        return sex;
    }

    public void setSex(String sex) {
        this.sex = sex == null ? null : sex.trim();
    }

    public Integer getAge(Object age) {
        return this.age;
    }

    public void setAge(Integer age) {
        this.age = age;
    }

    public String getAreaName() {
        return areaName;
    }

    public void setAreaName(String areaName) {
        this.areaName = areaName == null ? null : areaName.trim();
    }

    public String getCourtName() {
        return courtName;
    }

    public void setCourtName(String courtName) {
        this.courtName = courtName == null ? null : courtName.trim();
    }

    public String getGistId() {
        return gistId;
    }

    public void setGistId(String gistId) {
        this.gistId = gistId == null ? null : gistId.trim();
    }

    public String getRegisterDate() {
        return registerDate;
    }

    public void setRegisterDate(String registerDate) {
        this.registerDate = registerDate == null ? null : registerDate.trim();
    }

    public String getCaseCode() {
        return caseCode;
    }

    public void setCaseCode(String caseCode) {
        this.caseCode = caseCode == null ? null : caseCode.trim();
    }

    public String getGistInstitution() {
        return gistInstitution;
    }

    public void setGistInstitution(String gistInstitution) {
        this.gistInstitution = gistInstitution == null ? null : gistInstitution.trim();
    }

    public String getDuty() {
        return duty;
    }

    public void setDuty(String duty) {
        this.duty = duty == null ? null : duty.trim();
    }

    public String getPerformance() {
        return performance;
    }

    public void setPerformance(String performance) {
        this.performance = performance == null ? null : performance.trim();
    }

    public String getConcreteReason() {
        return concreteReason;
    }

    public void setConcreteReason(String concreteReason) {
        this.concreteReason = concreteReason == null ? null : concreteReason.trim();
    }

    public String getType() {
        return type;
    }

    public void setType(String type) {
        this.type = type == null ? null : type.trim();
    }

    public Long getPublishedAt() {
        return publishedAt;
    }

    public void setPublishedAt(Long publishedAt) {
        this.publishedAt = publishedAt;
    }

    public Long getCreatedAt() {
        return createdAt;
    }

    public void setCreatedAt(Long createdAt) {
        this.createdAt = createdAt;
    }

    public Long getUpdatedAt() {
        return updatedAt;
    }

    public void setUpdatedAt(Long updatedAt) {
        this.updatedAt = updatedAt;
    }
}

类中字段对应官方的含义:

被执行人姓名/名称 performed_name
身份证号码/组织机构代码 card_number
性别 sex
年龄 age
省份 area_name
执行法院 court_name
执行依据文号 gist_id
立案时间 register_date
案号 case_code
做出执行依据单位 gist_institution
生效法律文书确定的义务 duty
被执行人的履行情况 performance
失信被执行人行为具体情形 concrete_reason
失信类别 type
发布时间戳 published_at

 

4.爬取效果:

百度爬虫爬取官方失信被执行人信息_第1张图片

 

 

完成此文参考博文:https://blog.csdn.net/sin_404/article/details/80141058

你可能感兴趣的:(百度爬虫爬取官方失信被执行人信息)