数据爬取

（一）地理偏移

高德地图：GCJ-02 坐标系（火星坐标系）

百度地图：BD-09坐标系（百度坐标系）
不同地图定位偏移以及坐标系转换
可以根据公式转换，也可以用地图产品的API。

bd_encrypt 将 GCJ-02 坐标转换成 BD-09 坐标， bd_decrypt 反之。

void bd_encrypt(double gg_lat, double gg_lon, double &bd_lat, double &bd_lon)
{
   double x = gg_lon, y = gg_lat;
   double z = sqrt(x * x + y * y) + 0.00002 * sin(y * x_pi);
   double theta = atan2(y, x) + 0.000003 * cos(x * x_pi);
   bd_lon = z * cos(theta) + 0.0065;
   bd_lat = z * sin(theta) + 0.006;
}

void bd_decrypt(double bd_lat, double bd_lon, double &gg_lat, double &gg_lon)
{
   double x = bd_lon - 0.0065, y = bd_lat - 0.006;
   double z = sqrt(x * x + y * y) - 0.00002 * sin(y * x_pi);
   double theta = atan2(y, x) - 0.000003 * cos(x * x_pi);
   gg_lon = z * cos(theta);
   gg_lat = z * sin(theta);
}

（二）每日限额

高德地图10w

（三）程序

from urllib.parse import quote
from urllib import request
import json
import xlwt
import requests

amap_web_key = '9a33b151429e45ba24d1e71b55a7c282'
poi_search_url = "http://restapi.amap.com/v3/place/text"
poi_boundary_url = "https://ditu.amap.com/detail/get/detail"

# 根据城市名称和分类关键字获取poi数据
def getpois(cityname, keywords):
    i = 1
    poilist = []
    while True:  # 使用while循环不断分页获取数据
        result = getpoi_page(cityname, keywords, i)
        result = json.loads(result)  # 将字符串转换为json
        if result['count'] == '0':
            break
        poilist.extend(result['pois'])
        i = i + 1
    return poilist

# 单页获取pois
def getpoi_page(cityname, keywords, page):
    req_url = poi_search_url + "?key=" + amap_web_key + '&extensions=all&keywords=' + quote(
        keywords) + '&city=' + quote(cityname) + '&citylimit=true' + '&offset=25' + '&page=' + str(
        page) + '&output=json'
    data = ''
    with request.urlopen(req_url) as f:
        data = f.read()
        data = data.decode('utf-8')
    return data

# 根据id获取边界数据
def getBounById(id):
    req_url = poi_boundary_url + "?id=" + id
    with request.urlopen(req_url) as f:
        data = f.read()
        data = data.decode('utf-8')
        dataList = []
        datajson = json.loads(data)  # 将字符串转换为json
        print(datajson)
        datajson = datajson['data']
        datajson = datajson['spec']
        if len(datajson) == 1:
            return dataList
        if datajson.get('mining_shape') != None:
            datajson = datajson['mining_shape']
            shape = datajson['shape']
            dataArr = shape.split(';')

            for i in dataArr:
                innerList = []
                f1 = float(i.split(',')[0])
                innerList.append(float(i.split(',')[0]))
                innerList.append(float(i.split(',')[1]))
                dataList.append(innerList)
        return dataList

数据爬取

你可能感兴趣的:(数据爬取)