json 加密解密爬虫

2019独角兽企业重金招聘Python工程师标准>>> hot3.png

package cn.com.czj.front.service.common;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.Date;
import java.util.List;

import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.commons.lang.StringUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import cn.com.czj.base.entity.data.DataPinganerEntity;
import cn.com.czj.base.entity.data.DataUrlEntity;
import cn.com.czj.front.dao.DataPingAnerDao;
import cn.com.czj.front.dao.DataUrlDao;
import cn.com.czj.front.dao.UrlContentDao;
import cn.com.czj.front.utils.http.SslUtils;
import cn.com.easy.utils.HttpClientUtils;

/**
 * 抓平安数据
 * 
 * [@author](https://my.oschina.net/arthor) linwk 2016年10月25日
 * 
 */
[@Service](https://my.oschina.net/service)
public class CrawerUserPingAnService {

	/**
	 * 网址
	 * 
	 */
	@Autowired
	private DataUrlDao dataUrlDao;

	@Autowired
	private DataPingAnerDao dataPinganerDao;

	@Autowired
	private DownLoadPicService DownLoadPicService;

	/**
	 * 内容
	 * 
	 */
	@Autowired
	private UrlContentDao urlContentDao;

	public void doCrawerService() throws Exception {
		// 网页
		List urls = dataUrlDao.findAll();
		if (CollectionUtils.isNotEmpty(urls)) {

			for (int i = 0; i < urls.size(); i++) {
				// 获取分页 循环
				int pageSize = 10;
				// 加密
				DataUrlEntity tDataUrlEntity = urls.get(i);
				// 地址
				tDataUrlEntity.getUrl();
				// 组合
				// System.out.println(tDataUrlEntity.getUrl());
				String pcString = StringUtils.replace(tDataUrlEntity.getUrl(), "http://life.pingan.com/kehufuwu/fuwugongju/return_select.shtml?", "");
				System.out.println(pcString);
				String signatureSource = pcString + "®ionCode=&sex=&age=¤tTime=" + new Date().getTime() + "&roundRex=" + RandomStringUtils.random(5, "1234567890");
				String urlString = "http://life.pingan.com/binfenxiari/signOfAgent.do?" + signatureSource + "&_=" + new Date().getTime();
				// System.out.println(urlString);
				String aString = HttpClientUtils.get(urlString);
				if (StringUtils.isBlank(aString)) {
					continue;
				}
				// System.out.println(aString);
				JSONObject demoJson = new JSONObject(aString);
				String sign = getJsonName(demoJson, "sign");
				String signature = sign;
				// System.out.println(sign);
				int j = 0;
				while(pageSize > j) {
					j++;
					try {
						String contentString = extracted(signatureSource, signature, j);
						if (StringUtils.equals(contentString, "success_jsoncallback({\"RESFLAG\":\"N\",\"errMsg\":\"无效链接\"})")) {
							System.out.println("无效链接");
							continue;
						}
						if (StringUtils.equals(contentString, "success_jsoncallback({\"RESFLAG\":\"N\",\"errMsg\":\"链接超时\"})")) {
							System.out.println("链接超时");
							signatureSource = pcString + "®ionCode=&sex=&age=¤tTime=" + new Date().getTime() + "&roundRex=" + RandomStringUtils.random(5, "1234567890");
							urlString = "http://life.pingan.com/binfenxiari/signOfAgent.do?" + signatureSource + "&_=" + new Date().getTime();
							// System.out.println(urlString);
							aString = HttpClientUtils.get(urlString);
							if (StringUtils.isNotBlank(aString)) {
								continue;
							}
							// System.out.println(aString);
							demoJson = new JSONObject(aString);
							sign = getJsonName(demoJson, "sign");
							signature = sign;
							// System.out.println(sign);
							contentString = extracted(signatureSource, signature, j);
						}
						if (StringUtils.isNotBlank(contentString)) {
							// 解析保存到用户
							int start = contentString.indexOf("(");
							String newJson = contentString.substring(start + 1, contentString.lastIndexOf(")"));// 组装成新的Json数据

							//System.out.println("*******************************************新的数值***************************************************");
							//System.out.println(newJson);
							//System.out.println("*******************************************新的数值***************************************************");

							JSONObject jo = new JSONObject(newJson);
							JSONObject pageBean = jo.getJSONObject("pageBean");

							// System.out.println("\n将Json数据解析为Map:");
							System.out.println("*******************************************totalPageSize***************************************************");
							System.out.println("totalPageSize: " + pageBean.getInt("totalPageSize") + " totalResults: " + pageBean.getInt("totalResults"));
							System.out.println("*******************************************totalPageSize***************************************************");

							if (pageBean.getInt("totalPageSize") > 0) {
								pageSize = pageBean.getInt("totalPageSize");
							} else {
								pageSize = 0;
								continue;
							}

							JSONArray jsonStrArray = jo.getJSONArray("resultList");
							for (int k = 0; k < jsonStrArray.length(); k++) {
								JSONObject dataPinganerEntity = jsonStrArray.getJSONObject(k);
								DataPinganerEntity dataPinganerEntity2 = new DataPinganerEntity();
								dataPinganerEntity2.setDEPTNAME(dataPinganerEntity.get("DEPTNAME").toString());
								// dataPinganerEntity2.setSELFINTRODUCE(dataPinganerEntity.get("SELFINTRODUCE").toString());
								dataPinganerEntity2.setTEL(dataPinganerEntity.get("TEL").toString());
								dataPinganerEntity2.setSEX(dataPinganerEntity.get("SEX").toString());
								dataPinganerEntity2.setEMAIL(dataPinganerEntity.get("EMAIL").toString());
								// dataPinganerEntity2.setAGENTID(dataPinganerEntity.get("AGENTID").toString());
								dataPinganerEntity2.setNAME(dataPinganerEntity.get("NAME").toString());
								dataPinganerEntity2.setDESCRIPTION(dataPinganerEntity.get("DESCRIPTION").toString());
								dataPinganerEntity2.setHEADSHOT(dataPinganerEntity.get("HEADSHOT").toString());
								// dataPinganerEntity2.setHOMEALIAS(dataPinganerEntity.get("HOMEALIAS").toString());
								dataPinganerEntity2.setMOBILE(dataPinganerEntity.get("MOBILE").toString());
								dataPinganerEntity2.setHOMEADDR(dataPinganerEntity.get("HOMEADDR").toString());
								// System.out.println(dataPinganerEntity2.getNAME());
								int count = dataPinganerDao.countByMobile(dataPinganerEntity2.getMOBILE());
								if (count > 0) {
									System.out.println("重复了");
								} else {
									dataPinganerDao.save(dataPinganerEntity2);
									System.out.println("保存进度**********************" + i * 100 / urls.size() + "%*********************");
								}
							}

						}

					} catch (Exception e) {
						continue;
					}
				}
				System.out.println("保存进度**********************" + i * 100 / urls.size() + "%*********************");
			}
		}

	}

	private String extracted(String signatureSource, String signature, int j) throws Exception {
		String urlString;
		// var
		// urls="http://sales.pa18.com/life/toolbox.queryAgentsManualSelection.shtml?provinceCode="+province+"&cityCode="+citys+"®ionCode="+area+"&sex="+sex+"&age="+age+"¤tTime="+currentTime*1+"&roundRex="+sui_num*1+"&signature="+signature;
		urlString = "https://sales.pa18.com/life/toolbox.queryAgentsManualSelection.shtml?" + signatureSource + "&signature=" + signature + "&pageSize=100"
				+ "&jsoncallback=success_jsoncallback" + "&_=" + new Date().getTime() + "¤tPage=" + j;
		// // 获取内容
		System.out.println(urlString);
		String contentString = "";
		// HttpUtils.doGet(urlString);
		// System.out.println(contentString);
		// contentString = doGet(urlString);
		contentString = HttpClientUtils.get(urlString);
		System.out.println(contentString);
		// 获取内容
		if (StringUtils.isBlank(contentString)) {
			// contentString = doGet(urlString);
			contentString = HttpClientUtils.get(urlString);
			System.out.println(contentString);
			// contentString =
			// DownLoadPicService.getHTML(urlString);
			// contentString=URLEncoder.encode(contentString,
			// "UTF-8");
			// System.out.println(contentString);
		}
		return contentString;
	}

	private static String getJsonName(JSONObject demoJson, String tString) throws JSONException {
		return demoJson.getString(tString);
	}

	/** the connection connect time out in millionseconds */
	private static final int CONNECT_TIME_OUT = 60000;
	/** the connection read time out in millionseconds */
	private static final int READ_TIME_OUT = 60000;

	public static String doGet(String url) throws Exception {
		String result = "";
		BufferedReader in = null;
		try {
			URL realUrl = new URL(url.trim());
			if ("https".equalsIgnoreCase(realUrl.getProtocol())) {
				try {
					SslUtils.ignoreSsl();
				} catch (Exception e) {

				}
			}
			// open connection
			URLConnection connection = realUrl.openConnection();
			connection.setConnectTimeout(CONNECT_TIME_OUT);
			connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36");
			// connection.setRequestProperty("Content-Type",
			// "text/json;charset=UTF-8");
			connection.setReadTimeout(READ_TIME_OUT);
			// connect
			connection.connect();
			// define BufferedReader to read input content
			in = new BufferedReader(new InputStreamReader(connection.getInputStream(), "GBK"));
			String line;
			while ((line = in.readLine()) != null) {
				result += line;
			}
			StringBuffer buffer = new StringBuffer();
			char[] buf = new char[64];
			int count = 0;
			while ((count = in.read(buf)) != -1) {
				buffer.append(buffer, 0, count);
			}
			System.out.println("**************************************************************************************************************");
			System.out.println(result);
			System.out.println("**************************************************************************************************************");
			System.out.println(buffer.toString());
			System.out.println("**************************************************************************************************************");
		} finally {
			if (in != null) {
				in.close();
			}
		}
		return result;
	}

}

转载于:https://my.oschina.net/u/1052192/blog/776101

你可能感兴趣的:(json 加密解密爬虫)