java爬周边信息解决方案

日前用java爬虫去爬取某些地理信息周边信息的时候,发现大部分网站的周边信息都是预加载页面,用js去调API来实现,不同网站的具体实现方式还不同。所以我这边的解决方案为,先根据所搜目标的名称以及所在区域去调用百度API,获取经纬度,再根据经纬度去搜索周边信息,具体查看百度地图提供的公有API http://lbsyun.baidu.com/index.php?title=webapi

package com.jrzh.thread;

import java.math.BigDecimal;
import java.net.URLEncoder;
import java.util.List;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.alibaba.fastjson.JSONObject;
import com.jrzh.bean.jwd.JwdBean;
import com.jrzh.bean.jwd.JwdInfo;
import com.jrzh.common.exception.ProjectException;
import com.jrzh.framework.bean.SessionUser;
import com.jrzh.mvc.model.reptile.ReptileNewHouseModel;
import com.jrzh.mvc.service.reptile.manager.ReptileServiceManage;
import com.jrzh.tools.MyPayUtils;

public class LatitudeLongitudeThread extends Thread {

	static Log log = LogFactory.getLog(LatitudeLongitudeThread.class);

	private ReptileServiceManage reptileServiceManage;

	public LatitudeLongitudeThread(ReptileServiceManage reptileServiceManage) {
		this.reptileServiceManage = reptileServiceManage;
	}

	@Override
	public void run() {
		// 更新坐标方法
		Object object = new Object();
		synchronized (object) {
			List reptileNewHouseModelList;
			try {
				// 查找所有带名称的实例
				reptileNewHouseModelList = reptileServiceManage.reptileNewHouseService.findAll();
				for (ReptileNewHouseModel reptileNewHouseModel : reptileNewHouseModelList) {
					if (reptileNewHouseModel.getLatitude() != null && reptileNewHouseModel.getLongitude() != null) {
						// 如果已经有经纬度,直接跳过
						continue;
					}
					try {
						// 具体获取坐标方法
						String lat = null;
						String lng = null;
						List list = getJwd(reptileNewHouseModel.getHouseCity(),
								reptileNewHouseModel.getHouseName());
						if (list == null) {
							if(reptileNewHouseModel.getHouseOtherName()==null) continue;
							list = getJwd(reptileNewHouseModel.getHouseCity(),reptileNewHouseModel.getHouseOtherName());
							if(list==null) continue;
						}
						String[] strs = getJwd(list, reptileNewHouseModel.getHousePart(),
								reptileNewHouseModel.getHouseName());
						lat = strs[0];
						lng = strs[1];
						log.info(lat + "----" + lng);
						if (lat != null && lng != null) {
							reptileNewHouseModel.setLatitude(new BigDecimal(lat));
							reptileNewHouseModel.setLongitude(new BigDecimal(lng));
							reptileServiceManage.reptileNewHouseService.edit(reptileNewHouseModel,
									SessionUser.getSystemUser());
						}
						Thread.sleep(300);
					} catch (Exception e) {
						e.printStackTrace();
						continue;
					}
				}
				log.info("一手房经纬度结束");
			} catch (ProjectException e1) {
				e1.printStackTrace();
			}

		}
	}

	public static List getJwd(String city, String position) {
		String[] appKeyArr = { "key的数组" };
		Integer index = 0;
		String requestContent = null;
		try {
			// 具体获取坐标方法
			position = URLEncoder.encode(position, "utf-8");
			String appkey = appKeyArr[index];
			// 调用url
			String url = "http://api.map.baidu.com/place/v2/suggestion?query=" + position + "®ion=" + city
					+ "&city_limit=true&output=json&ak=" + appkey;
			requestContent = MyPayUtils.sendHttpByGet(url, 2000000, 20000000);
			JwdBean jwdBean = JSONObject.parseObject(requestContent, JwdBean.class); // 获取返回信息
			if (StringUtils.equals(jwdBean.getMessage(), "302")) {
				appkey = appKeyArr[index++];
				url = "http://api.map.baidu.com/place/v2/suggestion?query=" + position + "®ion=" + city
						+ "&city_limit=true&output=json&ak=" + appkey;
				requestContent = MyPayUtils.sendHttpByGet(url, 2000000, 20000000);
				jwdBean = JSONObject.parseObject(requestContent, JwdBean.class);
			}
			if (StringUtils.equals(jwdBean.getMessage(), "ok")) {
				List info = jwdBean.getResult();
				return info;
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
		return null;
	}

	public static String[] getJwd(List list, String part, String houserName) {
		String lat = null;
		String lng = null;
		for (JwdInfo info : list) {
			if (info.getDistrict().contains(part)) {
				if (StringUtils.equals(info.getName(), houserName)) {
					lat = info.getLocation().getLat();
					lng = info.getLocation().getLng();
					return new String[] { lat, lng };
				}
			}
		}
		for (JwdInfo info : list) {
			if (info.getDistrict().contains(part)) {
				if (info.getName().contains(houserName)) {
					lat = info.getLocation().getLat();
					lng = info.getLocation().getLng();
					return new String[] { lat, lng };
				}
			}
		}
		for (JwdInfo info : list) {
			lat = info.getLocation().getLat();
			lng = info.getLocation().getLng();
			return new String[] { lat, lng };
		}
		return null;
	}
}

获取完经纬度之后,再用一个定时器去根据经纬度爬取周边

package com.jrzh.thread;

import java.util.ArrayList;
import java.util.List;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.alibaba.fastjson.JSONObject;
import com.jrzh.bean.testBean;
import com.jrzh.common.exception.ProjectException;
import com.jrzh.contants.Contants;
import com.jrzh.framework.bean.SessionUser;
import com.jrzh.mvc.model.reptile.ReptileNewHouseModel;
import com.jrzh.mvc.search.reptile.ReptileNewHouseSearch;
import com.jrzh.mvc.service.reptile.manager.ReptileServiceManage;
import com.jrzh.tools.MyPayUtils;

public class FindNearByThread extends Thread {

	static Log log = LogFactory.getLog(FindNearByThread.class);

	private ReptileServiceManage reptileServiceManage;

	public FindNearByThread(ReptileServiceManage reptileServiceManage) {
		this.reptileServiceManage = reptileServiceManage;
	}

	@Override
	public void run() {
		// 获取周边方法
		Object object = new Object();
		synchronized (object) {
			ReptileNewHouseSearch rnhSearch = new ReptileNewHouseSearch();
			List list = new ArrayList();
			try {
				// 获取所有房
				list = reptileServiceManage.reptileNewHouseService.list(rnhSearch);
			} catch (ProjectException e) {
				log.error("gg");
			}
			for (ReptileNewHouseModel model : list) {
				try {
					this.getNearBy(model);
				} catch (Exception e) {
					continue;
				}
			}
		}

	}

	public void getNearBy(ReptileNewHouseModel model) {

		if (model.getLatitude() == null || model.getLongitude() == null) {
			return;	//没有经纬度就跳过
		}

		String[] appKeyArr = { "key数组" };
		Integer index = 0;
		String requestContent = null;

		StringBuffer postKD = new StringBuffer();
		StringBuffer postYZ = new StringBuffer();
		StringBuffer subway = new StringBuffer();
		StringBuffer bus = new StringBuffer();
		StringBuffer hospital = new StringBuffer();
		StringBuffer school = new StringBuffer();
		StringBuffer kindergarten = new StringBuffer();
		StringBuffer bank = new StringBuffer();
		StringBuffer sc = new StringBuffer();
		StringBuffer cs = new StringBuffer();

		String latitude = String.valueOf(model.getLatitude());
		String longitude = String.valueOf(model.getLongitude());
		String radius = "2000"; // 距离
		String outPutType = "json"; // 格式
		String appkey = appKeyArr[index];

		// 遍历所有搜索类别
		for (String string : Contants.NEAR_BY_LIST) {

			String query = string; // 搜索條件

			String url = "http://api.map.baidu.com/place/v2/search?query=" + query + "&location=" + latitude + ","
					+ longitude + "&radius=" + radius + "&output=" + outPutType + "&ak=" + appkey;
			try {
				requestContent = MyPayUtils.sendHttpByGet(url, 2000000, 20000000);
			} catch (ProjectException e) {
				e.printStackTrace();
				continue;
			}

			// 无搜索结果
			if (StringUtils.isBlank(requestContent)) {
				System.out.println("=============");
				continue;
			}

			JSONObject j = JSONObject.parseObject(requestContent);
			List ss = JSONObject.parseArray(j.get("results") + "", testBean.class);

			// 生成字段
			for (testBean tt : ss) {
				try {
					if (StringUtils.equals(string, "银行")) {
						bank.append(tt.getName() + "、");
					} else if (StringUtils.equals(string, "医院")) {
						hospital.append(tt.getName() + "、");
					} else if (StringUtils.equals(string, "商场")) {
						if (StringUtils.isBlank(sc.toString())) {
							sc.append("商场:");
						}
						sc.append(tt.getName() + "、");
					} else if (StringUtils.equals(string, "超市")) {
						if (StringUtils.isBlank(cs.toString())) {
							cs.append("超市:");
						}
						cs.append(tt.getName() + "、");
					} else if (StringUtils.equals(string, "邮政")) {
						if (StringUtils.isBlank(postYZ.toString())) {
							postYZ.append("邮政:");
						}
						postYZ.append(tt.getName() + "、");
					} else if (StringUtils.equals(string, "快递")) {
						if (StringUtils.isBlank(postKD.toString())) {
							postKD.append("快递:");
						}
						postKD.append(tt.getName() + "、");
					} else if (StringUtils.equals(string, "学校")) {
						school.append(tt.getName() + "、");
					} else if (StringUtils.equals(string, "幼儿园")) {
						kindergarten.append(tt.getName() + "、");
					} else if (StringUtils.equals(string, "公交")) {
						if (StringUtils.isBlank(bus.toString())) {
							bus.append("公交:");
						}
						bus.append(tt.getName() + "、");
					} else if (StringUtils.equals(string, "地铁")) {
						if (StringUtils.isBlank(subway.toString())) {
							subway.append("地铁:");
						}
						subway.append(tt.getName() + "、");
					}
				} catch (Exception e) {
					e.printStackTrace();
					continue;
				}
			}
		}

		// 存库
		try {
			if (StringUtils.isNotBlank(kindergarten.toString())) {
				model.setKindergarten(kindergarten.substring(0, kindergarten.length() - 1));
			}

			if (StringUtils.isNotBlank(bank.toString())) {
				model.setBank(bank.substring(0, bank.length() - 1));
			}

			if (StringUtils.isNotBlank(hospital.toString())) {
				model.setHospital(hospital.substring(0, hospital.length() - 1));
			}

			if (StringUtils.isNotBlank(school.toString())) {
				model.setSchool(school.substring(0, school.length() - 1));
			}

			if (StringUtils.isNotBlank(cs.toString()) && StringUtils.isNotBlank(sc.toString())) {
				model.setPowerCenter(cs.substring(0, cs.length() - 1) + " " + sc.substring(0, sc.length() - 1));
			} else if (StringUtils.isNotBlank(cs.toString()) && StringUtils.isBlank(sc.toString())) {
				model.setPowerCenter(cs.substring(0, cs.length() - 1));
			} else if (StringUtils.isBlank(cs.toString()) && StringUtils.isNotBlank(sc.toString())) {
				model.setPowerCenter(sc.substring(0, sc.length() - 1));
			}

			if (StringUtils.isNotBlank(bus.toString()) && StringUtils.isNotBlank(subway.toString())) {
				model.setTraffic(bus.substring(0, bus.length() - 1) + " " + subway.substring(0, subway.length() - 1));
			} else if (StringUtils.isNotBlank(bus.toString()) && StringUtils.isBlank(subway.toString())) {
				model.setTraffic(bus.substring(0, bus.length() - 1));
			} else if (StringUtils.isBlank(bus.toString()) && StringUtils.isNotBlank(subway.toString())) {
				model.setTraffic(subway.substring(0, subway.length() - 1));
			}

			if (StringUtils.isNotBlank(postYZ.toString()) && StringUtils.isNotBlank(postKD.toString())) {
				model.setPost(
						postYZ.substring(0, postYZ.length() - 1) + " " + postKD.substring(0, postKD.length() - 1));
			} else if (StringUtils.isNotBlank(postYZ.toString()) && StringUtils.isBlank(postKD.toString())) {
				model.setPost(postYZ.substring(0, postYZ.length() - 1));
			} else if (StringUtils.isBlank(postYZ.toString()) && StringUtils.isNotBlank(postKD.toString())) {
				model.setPost(postKD.substring(0, postKD.length() - 1));
			}

			reptileServiceManage.reptileNewHouseService.edit(model, SessionUser.getSystemUser());
			log.info(model.getHouseName() + "更新周边信息!");
		} catch (Exception e) {
			e.printStackTrace();
			return;
		}

	}

}



你可能感兴趣的:(jsoup,爬虫)