在百度地图上爬取某市所有的公交车线路

一、爬取目标

爬取某市所有公交线路的站点经纬度数据及整个公交线路的轨迹数据。

二、爬取过程使用的工具包

jsoup、d3.v3

三、爬取某市的公交线路名

如爬取海南市的公交线路数据,首先到https://haikou.8684.cn/line1上爬取海口市所有公交线路名集合。

在百度地图上爬取某市所有的公交车线路_第1张图片

该段爬虫代码:

import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;



public class Bus {
	public Document getDoc(String url) throws IOException{
		Document doc;
		doc = Jsoup.connect(url).get();
		return doc;
	}
	public static void main(String []args) throws IOException{
		String Urls[]={"https://haikou.8684.cn/line1",
	             "https://haikou.8684.cn/line2",
	             "https://haikou.8684.cn/line3",
	             "https://haikou.8684.cn/line4",
	             "https://haikou.8684.cn/line5",
	             "https://haikou.8684.cn/line6"};
		String typs[]={"市区线路","快速公交","夜班线路","旅游公交","停运","机场巴士"};
		Bus bus=new Bus();
		Map> busmap=new HashMap<>(); 
		int i=0;
		ArrayList buslist=new ArrayList<>(); 
		for(String url : Urls){
			Document doc=bus.getDoc(url);
			Elements elements=doc.getElementsByClass("cc-content");
			
//			busmap.put(typs[i], buslist);
			for(Element e :elements)
			{
				
				Elements elements2=e.getElementsByClass("clearfix").get(0).getElementsByTag("a");
				for(Element e2:elements2)
				{
					String busname=e2.text().toString();
					buslist.add(busname);
//					System.out.println(busname);
				}
				
			}
//			i++;
			
		}
		System.out.println(buslist);
		String path="d:"+File.separator+"1"+File.separator;
    	File file=new File(path+"Bus"+".csv");
    	if(!file.getParentFile().exists())//父目录不存在
		{
			file.getParentFile().mkdirs();//创建父目录
		}
		if(file.exists())
		{
			file.delete();
		}
		try(PrintWriter out=new PrintWriter(new File(path+"Bus"+".csv"),"UTF-8");){
			StringBuilder buffers=new StringBuilder();
			int index=0;
			buffers.append("name");
			buffers.append("\n");
			int x=0;
			for(String d:buslist){
				buffers.append("海口"+d.toString());
				if(x

在爬取到所有公交线路名集合后借助百度地图的JavaScript API接口,将每一条公交路线的公交站点及公交的轨迹数据的经纬度数据返回到后台,后台用一个静态的变量将每次返回的数据保存下来,当将所有公交路线数据爬取完后,将其保存到文件中去。

前端代码如下:

其中用到了d3来加载公交路线名数据,和jQuery包



	
		
		
		
		
		
		
	
	
		

后台代码如下:


import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

public class BaseAction extends HttpServlet{
	@Override
	protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
		request.setCharacterEncoding("utf-8");
		response.setCharacterEncoding("utf-8");
		response.setContentType("text/html;charest=UTF-8");
		this.doPost(request, response);
	}

	@Override
	protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
		request.setCharacterEncoding("utf-8");
		response.setCharacterEncoding("utf-8");
		response.setContentType("text/html;charest=UTF-8");
		String methodStr=request.getParameter("method");
		if(null==methodStr||methodStr.trim().isEmpty()){
			methodStr="execute";
		}
		
		try {
			Method method=this.getClass().getDeclaredMethod(methodStr, HttpServletRequest.class,HttpServletResponse.class);
			method.setAccessible(true);
			method.invoke(this, request,response);
		} catch (NoSuchMethodException | SecurityException | IllegalAccessException | IllegalArgumentException | InvocationTargetException e) {
			e.printStackTrace();
		}
	}
	
}



import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.Map;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

public class BusAction extends BaseAction{

	/**
	 * 
	 */
	private static final long serialVersionUID = 1L;
	private static Map list=new HashMap();
	public void getsaveData(HttpServletRequest request,HttpServletResponse response)throws ServletException, IOException{
		response.setCharacterEncoding("utf-8");
		response.setContentType("text/html;charest=UTF-8");
		request.setCharacterEncoding("utf-8");
		System.out.println(request.getParameter("data"));
		String finshed=request.getParameter("finshed");
		String data=request.getParameter("data");
		System.out.println(data);
		if(data==null||data.length()<=0)
			return;
		if(!finshed.equals("finshed"))
		{
			list.put(data,1);
			response.getWriter().println("succes");
			return;
		}
		else{
			write();
		}
		
		
		
	}
	private void write(){
		String path="d:"+File.separator+"1"+File.separator;
    	File file=new File(path+"aBus"+".txt");
    	if(!file.getParentFile().exists())//父目录不存在
		{
			file.getParentFile().mkdirs();//创建父目录
		}
		if(file.exists())
		{
			file.delete();
		}
		try(PrintWriter out=new PrintWriter(new File(path+"aBus"+".txt"),"UTF-8");){
			StringBuilder buffers=new StringBuilder();
			int index=0;
			buffers.append(list.keySet().toString());
			
			out.println(buffers.toString());	
			out.close();
		} catch (Exception e) {
		}
		System.out.println("完成");
	}
}

最终结果效果图

在百度地图上爬取某市所有的公交车线路_第2张图片

 

你可能感兴趣的:(爬虫)