java爬虫实现百度地图数据爬取

本次项目主要实现百度地图地点检索功能的数据爬取,可以获得检索的相关信息。主要是采用百度地图API接口实现,采用的是servlet,数据库采用的是mybatis。话不多说,上代码。

1.DAO层数据

package dao;
import java.util.List;
import dto.CityCodeDTO;
import dto.PageInfoDTO;
/**
 * 
 * @author Administrator
 *
 */
public interface PageInfoDAO {

public void save(PageInfoDTO pageInfo);

public List findPageData(PageInfoDTO param);

public Integer getPageCount();

public Integer getCityCode(CityCodeDTO cityCode);

}

2.百度地图的地区编码与区号不同,因此封装地区编码与百度对应关系

package dto;
import java.io.Serializable;
public class CityCodeDTO implements Serializable{
/**

*/
private static final long serialVersionUID = -7429099700161706593L;
private String code;
private String city;
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getCity() {
return city;
}
public void setCity(String city) {
this.city = city;
}
@Override
public String toString() {
return "CityCodeDTO [code=" + code + ", city=" + city + "]";
}
}

3.实体类文件(内部含有数据库调取参数)

package dto;
import java.io.Serializable;
import java.util.Date;
/**
 * 保存页面信息的实体类
 * @author Administrator
 *
 */
public class PageInfoDTO implements Serializable{

private static final long serialVersionUID = -8667380964768477281L;
/*信息id*/
private int id;
/*信息名字*/
private String name;
/*信息地址*/
private String address;
/*信息电话*/
private String telephone;
/*信息街景图id*/
private String street_id;
/*信息经纬度*/
private Double lat;

private Double lng;

private Integer valid;

private Date createdTime;

private Date modifiedTime;

private String createdName;

private Integer startIndex;

private Integer pageSize;

private String wordKey;


public String getWordKey() {
return wordKey;
}
public void setWordKey(String wordKey) {
this.wordKey = wordKey;
}
public Integer getStartIndex() {
return startIndex;
}
public void setStartIndex(Integer startIndex) {
this.startIndex = startIndex;
}
public Integer getPageSize() {
return pageSize;
}
public void setPageSize(Integer pageSize) {
this.pageSize = pageSize;
}
public Integer getValid() {
return valid;
}
public void setValid(Integer valid) {
this.valid = valid;
}
public Date getCreatedTime() {
return createdTime;
}
public void setCreatedTime(Date createdTime) {
this.createdTime = createdTime;
}
public Date getModifiedTime() {
return modifiedTime;
}
public void setModifiedTime(Date modifiedTime) {
this.modifiedTime = modifiedTime;
}
public String getCreatedName() {
return createdName;
}
public void setCreatedName(String createdName) {
this.createdName = createdName;
}
public String getModifiedName() {
return modifiedName;
}
public void setModifiedName(String modifiedName) {
this.modifiedName = modifiedName;
}
private String modifiedName;
public String getStreet_id() {
return street_id;
}
public void setStreet_id(String street_id) {
this.street_id = street_id;
}
public Double getLat() {
return lat;
}
public void setLat(Double lat) {
this.lat = lat;
}
public Double getLng() {
return lng;
}
public void setLng(Double lng) {
this.lng = lng;
}

public PageInfoDTO() {
super();
}
public PageInfoDTO(int id, String name, String address, String telephone) {
super();
this.id = id;
this.name = name;
this.address = address;
this.telephone = telephone;
}
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getAddress() {
return address;
}
public void setAddress(String address) {
this.address = address;
}
public String getTelephone() {
return telephone;
}
public void setTelephone(String telephone) {
this.telephone = telephone;
}
@Override
public String toString() {
return "PageInfoDTO [id=" + id + ", name=" + name + ", address=" + address + ", telephone=" + telephone
+ ", street_id=" + street_id + ", lat=" + lat + ", lng=" + lng + ", valid=" + valid + ", createdTime="
+ createdTime + ", modifiedTime=" + modifiedTime + ", createdName=" + createdName + ", startIndex="
+ startIndex + ", pageSize=" + pageSize + ", wordKey=" + wordKey + ", modifiedName=" + modifiedName
+ "]";
}


}

4.mapper文件

  
 "http://ibatis.apache.org/dtd/ibatis-3-mapper.dtd">





insert ignore into pageInf
(
wordKey,
name,
address,
telephone,
street_id,
lat,
lng,
valid,
createdTime,
createdName,
modifiedTime,
modifiedName

values
(
#{wordKey},
#{name},#{address},#{telephone},#{street_id},#{lat},#{lng},
0,
now(),
'zt',
now(),
'zt'
)




 


5.servlet文件

package servlet;


import java.io.IOException;
import java.io.PrintWriter;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;


import org.apache.ibatis.session.SqlSession;
import org.apache.ibatis.session.SqlSessionFactory;
import org.apache.ibatis.session.SqlSessionFactoryBuilder;


import dao.PageInfoDAO;
import dto.CityCodeDTO;
import dto.PageInfoDTO;
import net.sf.json.JSONObject;
import util.LngAndLatUtil;


/**
 * Servlet implementation class MapServlet
 */
public class MapServlet extends HttpServlet {
private static final long serialVersionUID = 1L;

protected void service(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {


// 设置编码格式
request.setCharacterEncoding("UTF-8");
response.setContentType("text/html;charset=UTF-8");
PrintWriter out = response.getWriter();


// 请求地址解析
String uri = request.getRequestURI();
String action = uri.substring(uri.lastIndexOf("/"), uri.lastIndexOf("."));


if ("/query".equals(action)) {


// 处理query.do请求实现数据入库
String area = request.getParameter("area");
String word = request.getParameter("word");
LngAndLatUtil util = new LngAndLatUtil();
JSONObject obj = util.getLngAndLat(area, word);
out.println(obj);

} else if ("/list".equals(action)) {


// 处理list请求实现页面分页显示数据
// 获取dao对象
SqlSessionFactoryBuilder ssfd = new SqlSessionFactoryBuilder();
SqlSessionFactory ssf = ssfd
.build(LngAndLatUtil.class.getClassLoader().getResourceAsStream("SqlMapConfig.xml"));
SqlSession session = ssf.openSession();
PageInfoDAO dao = session.getMapper(PageInfoDAO.class);


// 获取页面参数
String area = request.getParameter("area");
String word = request.getParameter("word");
System.out.println("area"+area);
Integer pageCurrent = Integer.valueOf(request.getParameter("pageCurrent"));
if (pageCurrent == null) {
pageCurrent = 1;
}
int pageSize = 10;
int startIndex = (pageCurrent - 1) * 10;
PageInfoDTO param = new PageInfoDTO();
param.setPageSize(pageSize);
param.setStartIndex(startIndex); 

//判断area是区号,还是名称
CityCodeDTO cityCode =new CityCodeDTO();
Pattern p = Pattern.compile("[0-9]*");
Matcher m = p.matcher(area);
if (m.matches()) {
cityCode.setCode(area);
System.out.println(666);
}else{
cityCode.setCity(area);
}
Integer code = getCityCode(cityCode);

param.setWordKey(code+word);
System.out.println(code+word);

// 获取查询数据
List data = dao.findPageData(param);
Integer count = dao.getPageCount();


// 封装传输数据
Map map = new HashMap();
map.put("count", count);
map.put("data", data);
JSONObject obj = JSONObject.fromObject(map);



// 将数据传给页面
out.println(obj);
}
}
public static Integer getCityCode(CityCodeDTO cityCode){
    SqlSessionFactoryBuilder ssfd = new SqlSessionFactoryBuilder();
SqlSessionFactory ssf = ssfd.build(LngAndLatUtil.class.
getClassLoader().getResourceAsStream("SqlMapConfig.xml"));
SqlSession session = ssf.openSession();
PageInfoDAO dao = session.getMapper(PageInfoDAO.class);
Integer code = dao.getCityCode(cityCode);
return code;
    }
}

6.从页面上获取数据的工具类

package util;


import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


import org.apache.ibatis.session.SqlSession;
import org.apache.ibatis.session.SqlSessionFactory;
import org.apache.ibatis.session.SqlSessionFactoryBuilder;


import dao.PageInfoDAO;
import dto.CityCodeDTO;
import dto.PageInfoDTO;
import net.sf.json.JSONObject;


public class LngAndLatUtil{

/**
* 爬取数据存入数据库
* @param address 查询区域
* @param param查询关键词
* @return json字符串
*/
public static JSONObject getLngAndLat(String area, String word) {
Integer code = getCode(area);
JSONObject obj = null;
for (int i = 0; i < 10; i++) {
String url = "http://api.map.baidu.com/place/v2/search?query=" + word + "&tag=&scope=2®ion=" + code
+ "&output=json&page_size=10&page_num=" + i + "&ret_coordtype=gcj02ll&ak=申请百度地图获取AK";
String str = loadJSON(url);
obj = JSONObject.fromObject(str);

//数据解析
if (obj.get("status").toString().equals("0")) {
List list = obj.getJSONArray("results");

//遍历数据实现存储
for (JSONObject jx : list) {

String name = (String) jx.get("name");
String addre = (String) jx.get("address");
String telephone = (String) jx.get("telephone");
Double lat = (Double) jx.getJSONObject("location").get("lat");
Double lng = (Double) jx.getJSONObject("location").get("lng");
String street_id = (String) jx.get("street_id");
PageInfoDTO pi = new PageInfoDTO();
pi.setName(name);
pi.setAddress(addre);
pi.setStreet_id(street_id);
pi.setTelephone(telephone);
pi.setLat(lat);
pi.setLng(lng);
Integer citycode = getCode(area);

pi.setWordKey(citycode+word);
getDao(pi);
}


}
}
return obj;
}
private static Integer getCode(String area) {
//判断area是区号,还是名称
CityCodeDTO cityCode =new CityCodeDTO();
Pattern p = Pattern.compile("[0-9]*");
Matcher m = p.matcher(area);
if (m.matches()) {
cityCode.setCode(area);
}else{
cityCode.setCity(area);
}
Integer code = getCityCode(cityCode);
return code;
}
public static Integer getCityCode(CityCodeDTO cityCode){
    SqlSessionFactoryBuilder ssfd = new SqlSessionFactoryBuilder();
SqlSessionFactory ssf = ssfd.build(LngAndLatUtil.class.
getClassLoader().getResourceAsStream("SqlMapConfig.xml"));
SqlSession session = ssf.openSession();
PageInfoDAO dao = session.getMapper(PageInfoDAO.class);
Integer code = dao.getCityCode(cityCode);
return code;
    }
/**
* 实现存储数据
* @param pi 需要存储的数据
*/
    public static void getDao(PageInfoDTO pi){
    SqlSessionFactoryBuilder ssfd = new SqlSessionFactoryBuilder();
SqlSessionFactory ssf = ssfd.build(LngAndLatUtil.class.
getClassLoader().getResourceAsStream("SqlMapConfig.xml"));
SqlSession session = ssf.openSession();
PageInfoDAO dao = session.getMapper(PageInfoDAO.class);
dao.save(pi);
session.commit();
session.close();
    }
    
    /**
     * 根据地址(url)获取页面数据
     * @param uri
     * @return
     */
    public static String loadJSON (String uri) {
        StringBuilder json = new StringBuilder();
        try {
            URL url = new URL(uri);
            URLConnection yc = url.openConnection();
            BufferedReader in = new BufferedReader(new InputStreamReader(yc.getInputStream(),"utf-8"));
            String inputLine = null;
            while ( (inputLine = in.readLine()) != null) {
                json.append(inputLine);
            }
            in.close();
        } catch (MalformedURLException e) {
        } catch (IOException e) {
        }
        return json.toString();
    }
}

7.mybatis的配置文件

  
"http://ibatis.apache.org/dtd/ibatis-3-config.dtd">



















8.web.xml文件



  baiduMap
  
 
   
    MapServlet
    servlet.MapServlet
   
        character
        utf-8
   

 

 
    MapServlet
    *.do
 



9.页面js

$(document).ready(function(){
$("#body").on('click','#nextPage,#prePage,#firstPage,#lastPage',changePageData);
$(".query").on('click',getData);
})
function changePageData(){
var pageCount = $("#body").data("pageCount");
if(pageCount%10 == 0){
var page= pageCount/10;
}else{
page = parseInt(pageCount/10) + 1;
}
var pageCurrent = $("#body").data("pageCurrent");
if(!pageCurrent){
pageCurrent = 1;
}
if($(this).val()=="首页"){
pageCurrent = 1;
}
if($(this).val()=="上一页"){
pageCurrent --;
}
if($(this).val()=="下一页"&& page){
pageCurrent ++;
}
if($(this).val()=="尾页"){
pageCurrent = page;
}
$("#body").data("pageCurrent",pageCurrent);
getPageData();
}
function getData(){
var url = "query.do";
var area = $(".area").val();
var word = $(".word").val();
var param = {"area":area,"word":word};
console.log(param);
$.getJSON(url,param,function(obj){
console.log(888);
getPageData();
});
}
function getPageData(){
var url = "list.do";
console.log(url);
var pageCurrent = $("#body").data("pageCurrent");
if(!pageCurrent){
pageCurrent = 1;
}
var area = $(".area").val();
var word = $(".word").val();
var param = {"pageCurrent":pageCurrent,"area":area,"word":word};
console.log(345);
$.getJSON(url,param,function(data){
setTableRows(data);
console.log(666);
});
}
function setTableRows(map){
var tbody = $(".tBody");
var pageCount = map.count;
$("#body").data("pageCount",pageCount);
tbody.empty();
console.log(map.data);
for(var i in map.data){
var tr = $("");
var td = ""+map.data[i].name+""+
""+map.data[i].address+""+
""+map.data[i].telephone+""+
""+map.data[i].street_id+""+
"经度:"+map.data[i].lat+"
纬度:"+map.data[i].lng+""+
""+map.data[i].valid+""+
"";
tr.append(td);
tbody.append(tr);
}
}
10.页面



Insert title here








 


















名称地址电话街景图地址经纬度有效性操作










11.页面css

@charset "utf-8";

*{
margin:0;
padding:0;

.getData{
width:500px;
margin:20px auto 10px auto;
}
.tableBody{
width:1000px !important;
}
#table{
width:auto;
margin:20px auto 10px auto;
padding:0 auto;
width:1000px;
}
#body{
width:1000px;
margin:0 auto;
}
.th{
width: 80px;
}
.tr{
width:300px;

}
.tr td{
width:150px;
text-align:center;
}
.foot{
width:300px;
margin:0 auto;
}
.page{
margin-right:30px;
}

12.项目所涉及的jar包


  4.0.0
  com.3b
  baiduMap
  0.0.1-SNAPSHOT
  war
 
 
  net.sf.json-lib
  json-lib
  2.2.3
 

 
  javax.servlet
  javax.servlet-api
  3.1.0
 

 
  org.mybatis
  mybatis
  3.2.8
 

 
  junit
  junit
  3.8.1
 

 
  com.oracle
  ojdbc14
  10.2.0.4.0
  pom.lastUpdated
 

 
  mysql
  mysql-connector-java
  5.1.40
 

 


13.sql语句

SET FOREIGN_KEY_CHECKS=0;
-- ----------------------------
-- Table structure for pageinf
-- ----------------------------
DROP TABLE IF EXISTS `pageinf`;
CREATE TABLE `pageinf` (
  `id` int(11) NOT NULL,
  `wordKey` varchar(255) DEFAULT NULL,
  `name` varchar(255) DEFAULT NULL,
  `address` varchar(500) DEFAULT NULL,
  `telephone` varchar(255) DEFAULT NULL,
  `street_id` varchar(500) DEFAULT NULL,
  `lat` varchar(255) DEFAULT NULL,
  `lng` varchar(255) DEFAULT NULL,
  `valid` int(11) DEFAULT NULL,
  `createdTime` datetime DEFAULT NULL,
  `createdName` varchar(255) DEFAULT NULL,
  `modifiedTime` datetime DEFAULT NULL,
  `modifiedName` varchar(255) DEFAULT NULL,
  PRIMARY KEY (`id`),
  UNIQUE KEY `bdname` (`name`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8;





 

你可能感兴趣的:(java爬虫,疯狂代码)