2018.9.28-修改:使用FastJson解析数据
1.爬取地址及使用的编码:
private static final String UTF_8 = "UTF-8";
private static final String SPIDER_URL = "https://sp0.baidu.com/8aQDcjqpAAV3otqbppnN2DJv/api.php";
2.爬取出数据直接返回字符串(注意HTTP传输会乱码,解决方法:http传输乱码-解决new InputStreamReader(response.getEntity().getContent(),UTF_8))
/**
* 查询出原始数据,返回字符串
* @param params
* @return
*/
private static String getStringDishonest(Map params) {
StringBuffer resultBuffer;
String fullUrl = null;
// 创建HTTP请求
HttpClientBuilder httpClientBuilder = HttpClients.custom();
HttpClient client = httpClientBuilder.build();
BufferedReader bufferedReader = null;
// 构建请求参数
StringBuilder buffer = new StringBuilder();
if (params != null && params.size() > 0) {
for (Map.Entry entry : params.entrySet()) {
buffer.append(entry.getKey());
buffer.append("=");
try {
buffer.append(URLEncoder.encode((String) entry.getValue(), DishonestUtil.UTF_8));
} catch (UnsupportedEncodingException e) {
throw new MyException("失信人查询编码异常:"+e);
}
buffer.append("&");
}
}
// 拼接查询URL
if (buffer.length() > 0) {
fullUrl = DishonestUtil.SPIDER_URL + "?" + buffer.substring(0, buffer.length() - 1);
}
// 请求拼接后的地址获取详细信息
HttpGet httpGet = new HttpGet(fullUrl);
try {
// 获得响应数据
HttpResponse response = client.execute(httpGet);
// 读入响应数据体文本信息
// http传输乱码-解决new InputStreamReader(response.getEntity().getContent(),UTF_8)
bufferedReader = new BufferedReader(
new InputStreamReader(response.getEntity().getContent(), StandardCharsets.UTF_8));
String temp;
resultBuffer = new StringBuffer();
while ((temp = bufferedReader.readLine()) != null) {
resultBuffer.append(temp);
}
return resultBuffer.toString();
} catch (IOException e) {
logger.error("失信工具IO异常:"+e.getMessage());
throw new MyException("失信工具IO异常:"+e);
} finally {
if (bufferedReader != null) {
try {
bufferedReader.close();
} catch (IOException e) {
logger.error("失信工具IO关闭异常:"+e.getMessage());
}
}
}
}
3.对字符串进行转换为Json然后读入类中:
/**
* 查询失信人
* @param performedName 名称
* @param cardNumber 证件号码
* @param areaName 省份
* @return
*/
public static List listDishonest(String performedName, String cardNumber, String areaName) {
// 查询条件
Map map = new HashMap<>();
map.put("resource_id", "6899");
map.put("query", "失信被执行人名单");
map.put("cardNum", cardNumber);
map.put("iname", performedName);
map.put("areaName", areaName);
map.put("ie", "utf-8");
map.put("oe", "utf-8");
map.put("format", "json");
map.put("t", "1524537973200");
map.put("cb", "jQuery110207319777455577083_1524537959352");
map.put("_", "1524537959354");
// 查询结果
String strResult = getStringDishonest(map);
strResult = strResult.substring(strResult.indexOf("(")+1,strResult.lastIndexOf(");"));
logger.info("FastJson转换字符串为对象:"+ JSONObject.parseObject(strResult));
// json封装
JSONObject firstMap = JSONObject.parseObject(strResult);
JSONArray secondMap = (JSONArray) firstMap.get("data");
// 返回的结果初始化列表
List infoList = new ArrayList<>();
if (secondMap != null && secondMap.size() > 0) {
JSONObject thirdMap = (JSONObject) secondMap.get(0);
JSONArray forthMap = (JSONArray) thirdMap.get("result");
for (int i = 0; i < forthMap.size(); i++) {
JSONObject maps = forthMap.getJSONObject(i);
logger.info("JSON Object["+ i +"]:"+maps);
// 自定义类封装
DishonestCustomerInfo info = new DishonestCustomerInfo();
info.setPerformedName(performedName);
info.setCardNumber(cardNumber);
info.setSex(String.valueOf(maps.get("sexy")).trim());
info.setAge(Integer.valueOf(String.valueOf(maps.get("age")).trim()));
info.setAreaName(String.valueOf(maps.get("areaName")).trim());
info.setCourtName(String.valueOf(maps.get("courtName")).trim());
info.setGistId((String.valueOf(maps.get("gistId")).trim()));
info.setRegisterDate(String.valueOf(maps.get("regDate")).trim());
info.setCaseCode(String.valueOf(maps.get("caseCode")).trim());
info.setGistInstitution(String.valueOf(maps.get("gistUnit")).trim());
info.setDuty(String.valueOf(maps.get("duty")).trim());
info.setPerformance(String.valueOf(maps.get("performance")).trim());
info.setConcreteReason(String.valueOf(maps.get("disruptTypeName")).trim());
info.setType(String.valueOf(maps.get("type")).trim());
info.setPublishedAt(Long.valueOf(String.valueOf(maps.get("publishDateStamp")).trim()+"000") );
infoList.add(info);
}
}
return infoList;
}
其中读入的类:
package model;
import lombok.Data;
import java.io.Serializable;
@Data
public class DishonestCustomerInfo implements Serializable {
private static final long serialVersionUID = 2952636877575478687L;
private Long id;
private String performedName;
private String cardNumber;
private String sex;
private Integer age;
private String areaName;
private String courtName;
private String gistId;
private String registerDate;
private String caseCode;
private String gistInstitution;
private String duty;
private String performance;
private String concreteReason;
private String type;
private Long publishedAt;
private Long createdAt;
private Long updatedAt;
public Long getId() {
return id;
}
public void setId(Long id) {
this.id = id;
}
public String getPerformedName() {
return performedName;
}
public void setPerformedName(String performedName) {
this.performedName = performedName == null ? null : performedName.trim();
}
public String getCardNumber() {
return cardNumber;
}
public void setCardNumber(String cardNumber) {
this.cardNumber = cardNumber == null ? null : cardNumber.trim();
}
public String getSex() {
return sex;
}
public void setSex(String sex) {
this.sex = sex == null ? null : sex.trim();
}
public Integer getAge(Object age) {
return this.age;
}
public void setAge(Integer age) {
this.age = age;
}
public String getAreaName() {
return areaName;
}
public void setAreaName(String areaName) {
this.areaName = areaName == null ? null : areaName.trim();
}
public String getCourtName() {
return courtName;
}
public void setCourtName(String courtName) {
this.courtName = courtName == null ? null : courtName.trim();
}
public String getGistId() {
return gistId;
}
public void setGistId(String gistId) {
this.gistId = gistId == null ? null : gistId.trim();
}
public String getRegisterDate() {
return registerDate;
}
public void setRegisterDate(String registerDate) {
this.registerDate = registerDate == null ? null : registerDate.trim();
}
public String getCaseCode() {
return caseCode;
}
public void setCaseCode(String caseCode) {
this.caseCode = caseCode == null ? null : caseCode.trim();
}
public String getGistInstitution() {
return gistInstitution;
}
public void setGistInstitution(String gistInstitution) {
this.gistInstitution = gistInstitution == null ? null : gistInstitution.trim();
}
public String getDuty() {
return duty;
}
public void setDuty(String duty) {
this.duty = duty == null ? null : duty.trim();
}
public String getPerformance() {
return performance;
}
public void setPerformance(String performance) {
this.performance = performance == null ? null : performance.trim();
}
public String getConcreteReason() {
return concreteReason;
}
public void setConcreteReason(String concreteReason) {
this.concreteReason = concreteReason == null ? null : concreteReason.trim();
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type == null ? null : type.trim();
}
public Long getPublishedAt() {
return publishedAt;
}
public void setPublishedAt(Long publishedAt) {
this.publishedAt = publishedAt;
}
public Long getCreatedAt() {
return createdAt;
}
public void setCreatedAt(Long createdAt) {
this.createdAt = createdAt;
}
public Long getUpdatedAt() {
return updatedAt;
}
public void setUpdatedAt(Long updatedAt) {
this.updatedAt = updatedAt;
}
}
类中字段对应官方的含义:
被执行人姓名/名称 | performed_name |
身份证号码/组织机构代码 | card_number |
性别 | sex |
年龄 | age |
省份 | area_name |
执行法院 | court_name |
执行依据文号 | gist_id |
立案时间 | register_date |
案号 | case_code |
做出执行依据单位 | gist_institution |
生效法律文书确定的义务 | duty |
被执行人的履行情况 | performance |
失信被执行人行为具体情形 | concrete_reason |
失信类别 | type |
发布时间戳 | published_at |
4.爬取效果:
完成此文参考博文:https://blog.csdn.net/sin_404/article/details/80141058