首先,源代码下载地址:http://down.51cto.com/data/2270088
github: https://github.com/whsgzcy/Data_From_GaoDe
然后,如果有问题,或者是积分不够可以发送邮件到我的邮箱[email protected],我可以把源程序分享给大家,很高兴能和各位同行分享,如果是打电话要我上厕所,我可以要骂人的呦~。
最后,直接上代码。
package com.iwant.download2geodata;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import javax.naming.InitialContext;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import com.iwant.download2geodata.data.ShopInfo;
import com.iwant.download2geodata.data.ShopList;
import com.iwant.download2geodata.data.TemplateData;
import jxl.Workbook;
import jxl.write.Label;
import jxl.write.WritableSheet;
import jxl.write.WritableWorkbook;
import net.sf.json.JSONArray;
import net.sf.json.JSONException;
import net.sf.json.JSONObject;
/**
* @Description: 获取高德所有数据
* @author: whsgzcy
* @date: 2016-12-17 下午1:04:31 首先是从高德copy一个链接,只要修改pagernum参数即可拼接成新的链接
* 抓取方式较原始,但可维护性高
*/
public class GeoGetDataThread extends Thread {
public int pagernum = 1;// 跳转到下一页参数
public String murl = "http://ditu.amap.com/service/poiInfo?query_type=TQUERY&pagesize=20&pagenum=";
public String nurl = "&qii=true&cluster_state=5&need_utd=true&utd_sceneid=1000&"
+ "div=PC1000&addr_poi_merge=true&is_classify=true&"
+ "city=321200&geoobj=119.848676%7C32.40107%7C119.93279%7C32.527222&keywords=%E7%94%B5%E5%8A%A8%E8%BD%A6";
private HSSFWorkbook workbook = null;
private ShopList shopList = new ShopList();
private List shopInfoList = new ArrayList();
private List templateDataList = new ArrayList();
/**
* @Description: 根据pagernum跳转下一页
* @author: whsgzcy
* @date: 2016-12-17 下午1:17:08
* @param pagernum
*/
@Override
public void run() {
super.run();
// // /先用本次文件进行测试
// String encoding = "UTF-8";
// StringBuilder builder = null;
// try {
// File file = new File("C:\\Users\\suzhe\\Desktop\\t.txt");
// InputStreamReader read = new InputStreamReader(new FileInputStream(
// file), encoding);// 考虑到编码格式
// BufferedReader bufferedReader = new BufferedReader(read);
// builder = new StringBuilder();
// for (String s = bufferedReader.readLine(); s != null; s = bufferedReader
// .readLine()) {
// builder.append(s);
// }
// } catch (Exception e) {
// e.printStackTrace();
// }
// if (null == builder) return;
// jsonObject = JSONObject.fromObject(builder.toString());
// 请求数据
// 根据现有的数据量没有到list极限
// 使用list存储数据,并在每次请求数据时休眠5秒,放缓线程以及防止扒取数据IP被封
try {
for(;;pagernum++){
// 线程休眠
// try {Thread.sleep(5000);} catch (InterruptedException e1) {e1.printStackTrace();}
String url = murl+pagernum+nurl;
System.out.println(url);
JSONObject jsonObject = new JSONObject();
jsonObject = HttpRequestUtil.getJsonObject(url);
// 先打印count 判断是否有数据
int count = jsonObject.getInt("count");
if(0 == count) break;
System.out.println("count = " + count);
if (0 != count) {
JSONArray dataArray = jsonObject.getJSONArray("data");
JSONArray listArray = dataArray.getJSONObject(0).getJSONArray("list");
JSONObject line = null;
for (int i = 0; i < listArray.size(); i++) {
ShopInfo shopInfo = new ShopInfo();
TemplateData template = new TemplateData();
line = listArray.getJSONObject(i);
String name = line.getString("name");
shopInfo.setName(name);
String tel = line.getString("tel");
shopInfo.setTel(tel);
String address = line.getString("address");
shopInfo.setAddress(address);
String longitude = line.getString("longitude");
shopInfo.setLongitude(longitude);
String latitude = line.getString("latitude");
shopInfo.setLatitude(latitude);
JSONObject templateData = line.getJSONObject("templateData");
if (templateData.has("pic_info")) {
String pic_info = templateData.getString("pic_info");
template.setPic_info(pic_info);
templateDataList.add(template);
shopInfo.setTemplateData(templateDataList);
}else{
template.setPic_info("");
templateDataList.add(template);
shopInfo.setTemplateData(templateDataList);
}
shopInfoList.add(shopInfo);
System.out.println("数据读写中");
}
// 至此 第一页请求数据完成
shopList.setmShopInfoList(shopInfoList);
}
// 将数据写进Excel
String title[] = {"店铺名称","店主","店铺简介","店铺电话","地址","经度","纬度","101充电桩","10有充电桩","照片","是否提供×××","图片链接"};
createExcel("C:/Users/suzhe/Desktop/test2.xls","sheet1",title);
try {
workbook = new HSSFWorkbook(new FileInputStream("C:/Users/suzhe/Desktop/test2.xls"));
//流
FileOutputStream out = null;
HSSFSheet sheet = workbook.getSheet("sheet1");
// 获取表格的总行数
int rowCount = sheet.getLastRowNum() + 1; // 需要加一
// 获取表头的列数
int columnCount = sheet.getRow(0).getLastCellNum();
Row row = sheet.createRow(rowCount); //最新要添加的一行
HSSFRow titleRow = sheet.getRow(0);
if(titleRow!=null){
for(int i = 0; i < shopList.getmShopInfoList().size(); i++){
row = sheet.createRow(i+1); //最新要添加的一行
for(int k = 0; k < 12; k++){
String name = shopInfoList.get(i).getName();
Cell cellName = row.createCell(0);
cellName.setCellValue(name);
Cell cellMaster = row.createCell(1);
cellMaster.setCellValue("无");
Cell cellSay = row.createCell(2);
cellSay.setCellValue("无");
String tel = shopInfoList.get(i).getTel();
Cell cellTel = row.createCell(3);
cellTel.setCellValue(tel);
String address = shopInfoList.get(i).getAddress();
Cell cellAddress = row.createCell(4);
cellAddress.setCellValue(address);
String longitude = shopInfoList.get(i).getLongitude();
Cell cellLongitude = row.createCell(5);
cellLongitude.setCellValue(longitude);
String latitude = shopInfoList.get(i).getLatitude();
Cell cellLatitude = row.createCell(6);
cellLatitude.setCellValue(latitude);
Cell cellIsCharge = row.createCell(7);
cellIsCharge.setCellValue("0");
Cell cellHaveCharge = row.createCell(8);
cellHaveCharge.setCellValue("0");
String pic_info = shopInfoList.get(i).getTemplateData().get(i).getPic_info();
if(pic_info.equals("")){
Cell cellPicUrl= row.createCell(11);
cellPicUrl.setCellValue("");
Cell cellPic = row.createCell(9);
cellPic.setCellValue("");
}else{
Cell cellPicUrl= row.createCell(11);
cellPicUrl.setCellValue(pic_info);
Cell cellPic = row.createCell(9);
cellPic.setCellValue("taizhou/"+(i+1)+".jpg");
}
Cell cellHome = row.createCell(10);
cellHome.setCellValue("暂不提供×××");
}
}
}
out = new FileOutputStream("C:/Users/suzhe/Desktop/test2.xls");
workbook.write(out);
} catch (Exception e) {
e.printStackTrace();
}
}
} catch (JSONException e) {
e.printStackTrace();
}
}
public void writeToExcel(String fileDir,String sheetName){
//创建workbook
File file = new File(fileDir);
try {
workbook = new HSSFWorkbook(new FileInputStream(file));
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
//流
FileOutputStream out = null;
HSSFSheet sheet = workbook.getSheet(sheetName);
// 获取表格的总行数
int rowCount = sheet.getLastRowNum() + 1; // 需要加一
// 获取表头的列数
int columnCount = sheet.getRow(0).getLastCellNum();
try {
Row row = sheet.createRow(rowCount); //最新要添加的一行
//通过反射获得object的字段,对应表头插入
// 获取该对象的class对象
// Class class_ = object.getClass();
// 获得表头行对象
HSSFRow titleRow = sheet.getRow(0);
if(titleRow!=null){
for (int columnIndex = 0; columnIndex < columnCount; columnIndex++) { //遍历表头
String title = titleRow.getCell(columnIndex).toString().trim().toString().trim();
Cell cell1 = row.createCell(0);
cell1.setCellValue("111");
Cell cell2 = row.createCell(1);
cell2.setCellValue("222");
Cell cell3 = row.createCell(2);
cell3.setCellValue("333");
}
}
out = new FileOutputStream(fileDir);
workbook.write(out);
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* 创建新excel.
* @param fileDir excel的路径
* @param sheetName 要创建的表格索引
* @param titleRow excel的第一行即表格头
*/
public void createExcel(String fileDir,String sheetName,String titleRow[]){
//创建workbook
workbook = new HSSFWorkbook();
//添加Worksheet(不添加sheet时生成的xls文件打开时会报错)
Sheet sheet1 = workbook.createSheet(sheetName);
//新建文件
FileOutputStream out = null;
try {
//添加表头
Row row = workbook.getSheet(sheetName).createRow(0); //创建第一行
for(int i = 0;i < titleRow.length;i++){
Cell cell = row.createCell(i);
cell.setCellValue(titleRow[i]);
}
out = new FileOutputStream(fileDir);
workbook.write(out);
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
*
* @Description:读取本地文件---/Users/whsgzcy/Desktop/t.txt
* @author: whsgzcy
* @date: 2016-12-17 下午5:35:21
* @param filePath
* void
* @throws
*/
public static void readTxtFile(String filePath) {
try {
String encoding = "UTF-8";
File file = new File(filePath);
if (file.isFile() && file.exists()) { // 判断文件是否存在
InputStreamReader read = new InputStreamReader(
new FileInputStream(file), encoding);// 考虑到编码格式
BufferedReader bufferedReader = new BufferedReader(read);
String lineTxt = null;
while ((lineTxt = bufferedReader.readLine()) != null) {
System.out.println(lineTxt);
}
read.close();
} else {
System.out.println("找不到指定的文件");
}
} catch (Exception e) {
System.out.println("读取文件内容出错");
e.printStackTrace();
}
}
public static void main(String[] args) {
GeoGetDataThread dg = new GeoGetDataThread();
dg.start();
}
}
package com.iwant.download2geodata;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.params.HttpConnectionParams;
import org.apache.http.params.HttpParams;
import net.sf.json.JSONObject;
/**
* @Description:
* @author: whsgzcy
* @date: 2016-12-17 下午1:30:05
*
*/
public class HttpRequestUtil {
/**
* @Description:纯get请求
* @author: whsgzcy
* @date: 2016-12-17 下午1:51:14
* @param url
* @return
* JSONObject
* @throws
*/
public static JSONObject getJsonObject(String url) {
JSONObject jsonObject = null;
try {
HttpClient httpClient = new DefaultHttpClient();
HttpGet httpGet = new HttpGet(url);
HttpParams httpParams = httpClient.getParams();
HttpConnectionParams.setConnectionTimeout(httpParams, 5000);
HttpResponse response = httpClient.execute(httpGet);
StringBuilder builder = new StringBuilder();
BufferedReader bufferedReader = new BufferedReader(
new InputStreamReader(response.getEntity().getContent(),
"utf-8"));
for (String s = bufferedReader.readLine(); s != null; s = bufferedReader
.readLine()) {
builder.append(s);
}
jsonObject = JSONObject.fromObject(builder.toString());
} catch (Exception e) {
e.printStackTrace();
jsonObject = null;
}
return jsonObject;
}
}
package com.iwant.download2geodata.data;
import java.io.Serializable;
import java.util.List;
/**
* @Description:
* @author: whsgzcy
* @date: 2016-12-17 下午2:07:25
*
*/
public class ShopInfo implements Serializable{
private String name;
private String tel;
private String address;
private String longitude;//经度
private String latitude;//纬度
private List templateData;// 图片
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getTel() {
return tel;
}
public void setTel(String tel) {
this.tel = tel;
}
public String getAddress() {
return address;
}
public void setAddress(String address) {
this.address = address;
}
public String getLongitude() {
return longitude;
}
public void setLongitude(String longitude) {
this.longitude = longitude;
}
public String getLatitude() {
return latitude;
}
public void setLatitude(String latitude) {
this.latitude = latitude;
}
public List getTemplateData() {
return templateData;
}
public void setTemplateData(List templateData) {
this.templateData = templateData;
}
}
package com.iwant.download2geodata.data;
import java.io.Serializable;
import java.util.List;
/**
* @Description:
* @author: whsgzcy
* @date: 2016-12-17 下午2:40:12
*
*/
public class ShopList implements Serializable{
private List mShopInfoList;
public List getmShopInfoList() {
return mShopInfoList;
}
public void setmShopInfoList(List mShopInfoList) {
this.mShopInfoList = mShopInfoList;
}
}
package com.iwant.download2geodata.data;
import java.io.Serializable;
/**
* @Description:
* @author: whsgzcy
* @date: 2016-12-17 下午2:20:13
*
*/
public class TemplateData implements Serializable{
private String pic_info;
public String getPic_info() {
return pic_info;
}
public void setPic_info(String pic_info) {
this.pic_info = pic_info;
}
}
很简单,懂java的应该都能看懂,在此提出一个设计思路,我代码中屏蔽了一个睡眠线程,是防止我的IP被屏蔽,如果加入sleep,那么,线程就会在这段时间执行主线程上的逻辑,我是一次性通过,就懒得加了,有兴趣的同学可以将这块加上,thanks a lot。