2019独角兽企业重金招聘Python工程师标准>>>
package cn.com.czj.front.service.common;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.Date;
import java.util.List;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.commons.lang.StringUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import cn.com.czj.base.entity.data.DataPinganerEntity;
import cn.com.czj.base.entity.data.DataUrlEntity;
import cn.com.czj.front.dao.DataPingAnerDao;
import cn.com.czj.front.dao.DataUrlDao;
import cn.com.czj.front.dao.UrlContentDao;
import cn.com.czj.front.utils.http.SslUtils;
import cn.com.easy.utils.HttpClientUtils;
/**
* 抓平安数据
*
* [@author](https://my.oschina.net/arthor) linwk 2016年10月25日
*
*/
[@Service](https://my.oschina.net/service)
public class CrawerUserPingAnService {
/**
* 网址
*
*/
@Autowired
private DataUrlDao dataUrlDao;
@Autowired
private DataPingAnerDao dataPinganerDao;
@Autowired
private DownLoadPicService DownLoadPicService;
/**
* 内容
*
*/
@Autowired
private UrlContentDao urlContentDao;
public void doCrawerService() throws Exception {
// 网页
List urls = dataUrlDao.findAll();
if (CollectionUtils.isNotEmpty(urls)) {
for (int i = 0; i < urls.size(); i++) {
// 获取分页 循环
int pageSize = 10;
// 加密
DataUrlEntity tDataUrlEntity = urls.get(i);
// 地址
tDataUrlEntity.getUrl();
// 组合
// System.out.println(tDataUrlEntity.getUrl());
String pcString = StringUtils.replace(tDataUrlEntity.getUrl(), "http://life.pingan.com/kehufuwu/fuwugongju/return_select.shtml?", "");
System.out.println(pcString);
String signatureSource = pcString + "®ionCode=&sex=&age=¤tTime=" + new Date().getTime() + "&roundRex=" + RandomStringUtils.random(5, "1234567890");
String urlString = "http://life.pingan.com/binfenxiari/signOfAgent.do?" + signatureSource + "&_=" + new Date().getTime();
// System.out.println(urlString);
String aString = HttpClientUtils.get(urlString);
if (StringUtils.isBlank(aString)) {
continue;
}
// System.out.println(aString);
JSONObject demoJson = new JSONObject(aString);
String sign = getJsonName(demoJson, "sign");
String signature = sign;
// System.out.println(sign);
int j = 0;
while(pageSize > j) {
j++;
try {
String contentString = extracted(signatureSource, signature, j);
if (StringUtils.equals(contentString, "success_jsoncallback({\"RESFLAG\":\"N\",\"errMsg\":\"无效链接\"})")) {
System.out.println("无效链接");
continue;
}
if (StringUtils.equals(contentString, "success_jsoncallback({\"RESFLAG\":\"N\",\"errMsg\":\"链接超时\"})")) {
System.out.println("链接超时");
signatureSource = pcString + "®ionCode=&sex=&age=¤tTime=" + new Date().getTime() + "&roundRex=" + RandomStringUtils.random(5, "1234567890");
urlString = "http://life.pingan.com/binfenxiari/signOfAgent.do?" + signatureSource + "&_=" + new Date().getTime();
// System.out.println(urlString);
aString = HttpClientUtils.get(urlString);
if (StringUtils.isNotBlank(aString)) {
continue;
}
// System.out.println(aString);
demoJson = new JSONObject(aString);
sign = getJsonName(demoJson, "sign");
signature = sign;
// System.out.println(sign);
contentString = extracted(signatureSource, signature, j);
}
if (StringUtils.isNotBlank(contentString)) {
// 解析保存到用户
int start = contentString.indexOf("(");
String newJson = contentString.substring(start + 1, contentString.lastIndexOf(")"));// 组装成新的Json数据
//System.out.println("*******************************************新的数值***************************************************");
//System.out.println(newJson);
//System.out.println("*******************************************新的数值***************************************************");
JSONObject jo = new JSONObject(newJson);
JSONObject pageBean = jo.getJSONObject("pageBean");
// System.out.println("\n将Json数据解析为Map:");
System.out.println("*******************************************totalPageSize***************************************************");
System.out.println("totalPageSize: " + pageBean.getInt("totalPageSize") + " totalResults: " + pageBean.getInt("totalResults"));
System.out.println("*******************************************totalPageSize***************************************************");
if (pageBean.getInt("totalPageSize") > 0) {
pageSize = pageBean.getInt("totalPageSize");
} else {
pageSize = 0;
continue;
}
JSONArray jsonStrArray = jo.getJSONArray("resultList");
for (int k = 0; k < jsonStrArray.length(); k++) {
JSONObject dataPinganerEntity = jsonStrArray.getJSONObject(k);
DataPinganerEntity dataPinganerEntity2 = new DataPinganerEntity();
dataPinganerEntity2.setDEPTNAME(dataPinganerEntity.get("DEPTNAME").toString());
// dataPinganerEntity2.setSELFINTRODUCE(dataPinganerEntity.get("SELFINTRODUCE").toString());
dataPinganerEntity2.setTEL(dataPinganerEntity.get("TEL").toString());
dataPinganerEntity2.setSEX(dataPinganerEntity.get("SEX").toString());
dataPinganerEntity2.setEMAIL(dataPinganerEntity.get("EMAIL").toString());
// dataPinganerEntity2.setAGENTID(dataPinganerEntity.get("AGENTID").toString());
dataPinganerEntity2.setNAME(dataPinganerEntity.get("NAME").toString());
dataPinganerEntity2.setDESCRIPTION(dataPinganerEntity.get("DESCRIPTION").toString());
dataPinganerEntity2.setHEADSHOT(dataPinganerEntity.get("HEADSHOT").toString());
// dataPinganerEntity2.setHOMEALIAS(dataPinganerEntity.get("HOMEALIAS").toString());
dataPinganerEntity2.setMOBILE(dataPinganerEntity.get("MOBILE").toString());
dataPinganerEntity2.setHOMEADDR(dataPinganerEntity.get("HOMEADDR").toString());
// System.out.println(dataPinganerEntity2.getNAME());
int count = dataPinganerDao.countByMobile(dataPinganerEntity2.getMOBILE());
if (count > 0) {
System.out.println("重复了");
} else {
dataPinganerDao.save(dataPinganerEntity2);
System.out.println("保存进度**********************" + i * 100 / urls.size() + "%*********************");
}
}
}
} catch (Exception e) {
continue;
}
}
System.out.println("保存进度**********************" + i * 100 / urls.size() + "%*********************");
}
}
}
private String extracted(String signatureSource, String signature, int j) throws Exception {
String urlString;
// var
// urls="http://sales.pa18.com/life/toolbox.queryAgentsManualSelection.shtml?provinceCode="+province+"&cityCode="+citys+"®ionCode="+area+"&sex="+sex+"&age="+age+"¤tTime="+currentTime*1+"&roundRex="+sui_num*1+"&signature="+signature;
urlString = "https://sales.pa18.com/life/toolbox.queryAgentsManualSelection.shtml?" + signatureSource + "&signature=" + signature + "&pageSize=100"
+ "&jsoncallback=success_jsoncallback" + "&_=" + new Date().getTime() + "¤tPage=" + j;
// // 获取内容
System.out.println(urlString);
String contentString = "";
// HttpUtils.doGet(urlString);
// System.out.println(contentString);
// contentString = doGet(urlString);
contentString = HttpClientUtils.get(urlString);
System.out.println(contentString);
// 获取内容
if (StringUtils.isBlank(contentString)) {
// contentString = doGet(urlString);
contentString = HttpClientUtils.get(urlString);
System.out.println(contentString);
// contentString =
// DownLoadPicService.getHTML(urlString);
// contentString=URLEncoder.encode(contentString,
// "UTF-8");
// System.out.println(contentString);
}
return contentString;
}
private static String getJsonName(JSONObject demoJson, String tString) throws JSONException {
return demoJson.getString(tString);
}
/** the connection connect time out in millionseconds */
private static final int CONNECT_TIME_OUT = 60000;
/** the connection read time out in millionseconds */
private static final int READ_TIME_OUT = 60000;
public static String doGet(String url) throws Exception {
String result = "";
BufferedReader in = null;
try {
URL realUrl = new URL(url.trim());
if ("https".equalsIgnoreCase(realUrl.getProtocol())) {
try {
SslUtils.ignoreSsl();
} catch (Exception e) {
}
}
// open connection
URLConnection connection = realUrl.openConnection();
connection.setConnectTimeout(CONNECT_TIME_OUT);
connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36");
// connection.setRequestProperty("Content-Type",
// "text/json;charset=UTF-8");
connection.setReadTimeout(READ_TIME_OUT);
// connect
connection.connect();
// define BufferedReader to read input content
in = new BufferedReader(new InputStreamReader(connection.getInputStream(), "GBK"));
String line;
while ((line = in.readLine()) != null) {
result += line;
}
StringBuffer buffer = new StringBuffer();
char[] buf = new char[64];
int count = 0;
while ((count = in.read(buf)) != -1) {
buffer.append(buffer, 0, count);
}
System.out.println("**************************************************************************************************************");
System.out.println(result);
System.out.println("**************************************************************************************************************");
System.out.println(buffer.toString());
System.out.println("**************************************************************************************************************");
} finally {
if (in != null) {
in.close();
}
}
return result;
}
}