日前用java爬虫去爬取某些地理信息周边信息的时候,发现大部分网站的周边信息都是预加载页面,用js去调API来实现,不同网站的具体实现方式还不同。所以我这边的解决方案为,先根据所搜目标的名称以及所在区域去调用百度API,获取经纬度,再根据经纬度去搜索周边信息,具体查看百度地图提供的公有API http://lbsyun.baidu.com/index.php?title=webapi
package com.jrzh.thread;
import java.math.BigDecimal;
import java.net.URLEncoder;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.alibaba.fastjson.JSONObject;
import com.jrzh.bean.jwd.JwdBean;
import com.jrzh.bean.jwd.JwdInfo;
import com.jrzh.common.exception.ProjectException;
import com.jrzh.framework.bean.SessionUser;
import com.jrzh.mvc.model.reptile.ReptileNewHouseModel;
import com.jrzh.mvc.service.reptile.manager.ReptileServiceManage;
import com.jrzh.tools.MyPayUtils;
public class LatitudeLongitudeThread extends Thread {
static Log log = LogFactory.getLog(LatitudeLongitudeThread.class);
private ReptileServiceManage reptileServiceManage;
public LatitudeLongitudeThread(ReptileServiceManage reptileServiceManage) {
this.reptileServiceManage = reptileServiceManage;
}
@Override
public void run() {
// 更新坐标方法
Object object = new Object();
synchronized (object) {
List reptileNewHouseModelList;
try {
// 查找所有带名称的实例
reptileNewHouseModelList = reptileServiceManage.reptileNewHouseService.findAll();
for (ReptileNewHouseModel reptileNewHouseModel : reptileNewHouseModelList) {
if (reptileNewHouseModel.getLatitude() != null && reptileNewHouseModel.getLongitude() != null) {
// 如果已经有经纬度,直接跳过
continue;
}
try {
// 具体获取坐标方法
String lat = null;
String lng = null;
List list = getJwd(reptileNewHouseModel.getHouseCity(),
reptileNewHouseModel.getHouseName());
if (list == null) {
if(reptileNewHouseModel.getHouseOtherName()==null) continue;
list = getJwd(reptileNewHouseModel.getHouseCity(),reptileNewHouseModel.getHouseOtherName());
if(list==null) continue;
}
String[] strs = getJwd(list, reptileNewHouseModel.getHousePart(),
reptileNewHouseModel.getHouseName());
lat = strs[0];
lng = strs[1];
log.info(lat + "----" + lng);
if (lat != null && lng != null) {
reptileNewHouseModel.setLatitude(new BigDecimal(lat));
reptileNewHouseModel.setLongitude(new BigDecimal(lng));
reptileServiceManage.reptileNewHouseService.edit(reptileNewHouseModel,
SessionUser.getSystemUser());
}
Thread.sleep(300);
} catch (Exception e) {
e.printStackTrace();
continue;
}
}
log.info("一手房经纬度结束");
} catch (ProjectException e1) {
e1.printStackTrace();
}
}
}
public static List getJwd(String city, String position) {
String[] appKeyArr = { "key的数组" };
Integer index = 0;
String requestContent = null;
try {
// 具体获取坐标方法
position = URLEncoder.encode(position, "utf-8");
String appkey = appKeyArr[index];
// 调用url
String url = "http://api.map.baidu.com/place/v2/suggestion?query=" + position + "®ion=" + city
+ "&city_limit=true&output=json&ak=" + appkey;
requestContent = MyPayUtils.sendHttpByGet(url, 2000000, 20000000);
JwdBean jwdBean = JSONObject.parseObject(requestContent, JwdBean.class); // 获取返回信息
if (StringUtils.equals(jwdBean.getMessage(), "302")) {
appkey = appKeyArr[index++];
url = "http://api.map.baidu.com/place/v2/suggestion?query=" + position + "®ion=" + city
+ "&city_limit=true&output=json&ak=" + appkey;
requestContent = MyPayUtils.sendHttpByGet(url, 2000000, 20000000);
jwdBean = JSONObject.parseObject(requestContent, JwdBean.class);
}
if (StringUtils.equals(jwdBean.getMessage(), "ok")) {
List info = jwdBean.getResult();
return info;
}
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
public static String[] getJwd(List list, String part, String houserName) {
String lat = null;
String lng = null;
for (JwdInfo info : list) {
if (info.getDistrict().contains(part)) {
if (StringUtils.equals(info.getName(), houserName)) {
lat = info.getLocation().getLat();
lng = info.getLocation().getLng();
return new String[] { lat, lng };
}
}
}
for (JwdInfo info : list) {
if (info.getDistrict().contains(part)) {
if (info.getName().contains(houserName)) {
lat = info.getLocation().getLat();
lng = info.getLocation().getLng();
return new String[] { lat, lng };
}
}
}
for (JwdInfo info : list) {
lat = info.getLocation().getLat();
lng = info.getLocation().getLng();
return new String[] { lat, lng };
}
return null;
}
}
获取完经纬度之后,再用一个定时器去根据经纬度爬取周边
package com.jrzh.thread;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.alibaba.fastjson.JSONObject;
import com.jrzh.bean.testBean;
import com.jrzh.common.exception.ProjectException;
import com.jrzh.contants.Contants;
import com.jrzh.framework.bean.SessionUser;
import com.jrzh.mvc.model.reptile.ReptileNewHouseModel;
import com.jrzh.mvc.search.reptile.ReptileNewHouseSearch;
import com.jrzh.mvc.service.reptile.manager.ReptileServiceManage;
import com.jrzh.tools.MyPayUtils;
public class FindNearByThread extends Thread {
static Log log = LogFactory.getLog(FindNearByThread.class);
private ReptileServiceManage reptileServiceManage;
public FindNearByThread(ReptileServiceManage reptileServiceManage) {
this.reptileServiceManage = reptileServiceManage;
}
@Override
public void run() {
// 获取周边方法
Object object = new Object();
synchronized (object) {
ReptileNewHouseSearch rnhSearch = new ReptileNewHouseSearch();
List list = new ArrayList();
try {
// 获取所有房
list = reptileServiceManage.reptileNewHouseService.list(rnhSearch);
} catch (ProjectException e) {
log.error("gg");
}
for (ReptileNewHouseModel model : list) {
try {
this.getNearBy(model);
} catch (Exception e) {
continue;
}
}
}
}
public void getNearBy(ReptileNewHouseModel model) {
if (model.getLatitude() == null || model.getLongitude() == null) {
return; //没有经纬度就跳过
}
String[] appKeyArr = { "key数组" };
Integer index = 0;
String requestContent = null;
StringBuffer postKD = new StringBuffer();
StringBuffer postYZ = new StringBuffer();
StringBuffer subway = new StringBuffer();
StringBuffer bus = new StringBuffer();
StringBuffer hospital = new StringBuffer();
StringBuffer school = new StringBuffer();
StringBuffer kindergarten = new StringBuffer();
StringBuffer bank = new StringBuffer();
StringBuffer sc = new StringBuffer();
StringBuffer cs = new StringBuffer();
String latitude = String.valueOf(model.getLatitude());
String longitude = String.valueOf(model.getLongitude());
String radius = "2000"; // 距离
String outPutType = "json"; // 格式
String appkey = appKeyArr[index];
// 遍历所有搜索类别
for (String string : Contants.NEAR_BY_LIST) {
String query = string; // 搜索條件
String url = "http://api.map.baidu.com/place/v2/search?query=" + query + "&location=" + latitude + ","
+ longitude + "&radius=" + radius + "&output=" + outPutType + "&ak=" + appkey;
try {
requestContent = MyPayUtils.sendHttpByGet(url, 2000000, 20000000);
} catch (ProjectException e) {
e.printStackTrace();
continue;
}
// 无搜索结果
if (StringUtils.isBlank(requestContent)) {
System.out.println("=============");
continue;
}
JSONObject j = JSONObject.parseObject(requestContent);
List ss = JSONObject.parseArray(j.get("results") + "", testBean.class);
// 生成字段
for (testBean tt : ss) {
try {
if (StringUtils.equals(string, "银行")) {
bank.append(tt.getName() + "、");
} else if (StringUtils.equals(string, "医院")) {
hospital.append(tt.getName() + "、");
} else if (StringUtils.equals(string, "商场")) {
if (StringUtils.isBlank(sc.toString())) {
sc.append("商场:");
}
sc.append(tt.getName() + "、");
} else if (StringUtils.equals(string, "超市")) {
if (StringUtils.isBlank(cs.toString())) {
cs.append("超市:");
}
cs.append(tt.getName() + "、");
} else if (StringUtils.equals(string, "邮政")) {
if (StringUtils.isBlank(postYZ.toString())) {
postYZ.append("邮政:");
}
postYZ.append(tt.getName() + "、");
} else if (StringUtils.equals(string, "快递")) {
if (StringUtils.isBlank(postKD.toString())) {
postKD.append("快递:");
}
postKD.append(tt.getName() + "、");
} else if (StringUtils.equals(string, "学校")) {
school.append(tt.getName() + "、");
} else if (StringUtils.equals(string, "幼儿园")) {
kindergarten.append(tt.getName() + "、");
} else if (StringUtils.equals(string, "公交")) {
if (StringUtils.isBlank(bus.toString())) {
bus.append("公交:");
}
bus.append(tt.getName() + "、");
} else if (StringUtils.equals(string, "地铁")) {
if (StringUtils.isBlank(subway.toString())) {
subway.append("地铁:");
}
subway.append(tt.getName() + "、");
}
} catch (Exception e) {
e.printStackTrace();
continue;
}
}
}
// 存库
try {
if (StringUtils.isNotBlank(kindergarten.toString())) {
model.setKindergarten(kindergarten.substring(0, kindergarten.length() - 1));
}
if (StringUtils.isNotBlank(bank.toString())) {
model.setBank(bank.substring(0, bank.length() - 1));
}
if (StringUtils.isNotBlank(hospital.toString())) {
model.setHospital(hospital.substring(0, hospital.length() - 1));
}
if (StringUtils.isNotBlank(school.toString())) {
model.setSchool(school.substring(0, school.length() - 1));
}
if (StringUtils.isNotBlank(cs.toString()) && StringUtils.isNotBlank(sc.toString())) {
model.setPowerCenter(cs.substring(0, cs.length() - 1) + " " + sc.substring(0, sc.length() - 1));
} else if (StringUtils.isNotBlank(cs.toString()) && StringUtils.isBlank(sc.toString())) {
model.setPowerCenter(cs.substring(0, cs.length() - 1));
} else if (StringUtils.isBlank(cs.toString()) && StringUtils.isNotBlank(sc.toString())) {
model.setPowerCenter(sc.substring(0, sc.length() - 1));
}
if (StringUtils.isNotBlank(bus.toString()) && StringUtils.isNotBlank(subway.toString())) {
model.setTraffic(bus.substring(0, bus.length() - 1) + " " + subway.substring(0, subway.length() - 1));
} else if (StringUtils.isNotBlank(bus.toString()) && StringUtils.isBlank(subway.toString())) {
model.setTraffic(bus.substring(0, bus.length() - 1));
} else if (StringUtils.isBlank(bus.toString()) && StringUtils.isNotBlank(subway.toString())) {
model.setTraffic(subway.substring(0, subway.length() - 1));
}
if (StringUtils.isNotBlank(postYZ.toString()) && StringUtils.isNotBlank(postKD.toString())) {
model.setPost(
postYZ.substring(0, postYZ.length() - 1) + " " + postKD.substring(0, postKD.length() - 1));
} else if (StringUtils.isNotBlank(postYZ.toString()) && StringUtils.isBlank(postKD.toString())) {
model.setPost(postYZ.substring(0, postYZ.length() - 1));
} else if (StringUtils.isBlank(postYZ.toString()) && StringUtils.isNotBlank(postKD.toString())) {
model.setPost(postKD.substring(0, postKD.length() - 1));
}
reptileServiceManage.reptileNewHouseService.edit(model, SessionUser.getSystemUser());
log.info(model.getHouseName() + "更新周边信息!");
} catch (Exception e) {
e.printStackTrace();
return;
}
}
}