python_使用地址或公司名_爬虫爬取高德地图_百度地图_腾讯地图经纬度
import os
import pandas as pd
import urllib.parse
import requests
from utils.geolocataion_converting import gcj02_to_wgs84
'''
此文件用于使用地址到高德地图API、百度地图API和腾讯地图API去找经纬度,
判断三者抓到经纬度的距离,以及用公司与地址抓到的经纬度之间的距离,并判断API返回的置信度
'''
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 '
'(KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'
}
def location_amap(address, row):
try:
url = 'https://restapi.amap.com/v3/geocode/geo?address={}&key='.format(
urllib.parse.quote(address))
r = requests.get(url, headers=headers)
dic = r.json()
if dic['status'] == '1':
geocode = dic['geocodes']
if geocode:
location = geocode[0]['location']
level = geocode[0]['level']
loncation_obj = location.split(',')
lon = loncation_obj[0]
lat = loncation_obj[1]
return level, lon, lat
return '', 0, 0
except Exception as e:
wrong_write(row)
finsh_data = pd.read_csv(path)
start = finsh_data.shape[0]
start_write(start)
def location_baidu(address, row):
try:
url = 'http://api.map.baidu.com/geocoding/v3/?address={}&' \
'ret_coordtype=gcj02ll&ak=&output=json'.format(
urllib.parse.quote(address))
response = requests.get(url, headers=headers)
dic = response.json()
if dic['status'] == 0:
result = dic['result']
location = result['location']
comprehension = result['comprehension']
lon = location['lng']
lat = location['lat']
return comprehension, lon, lat
return 0, 0, 0
except Exception as e:
wrong_write(row)
finsh_data = pd.read_csv(path)
start = finsh_data.shape[0]
start_write(start)
def location_tx(address, row):
try:
url = 'https://apis.map.qq.com/ws/geocoder/v1/?address={}&' \
'key=&output=json'.format(
urllib.parse.quote(address))
response = requests.get(url, headers=headers)
dic = response.json()
if dic['status'] == 0:
result = dic['result']
location = result['location']
reliability = result['reliability']
lon = location['lng']
lat = location['lat']
return reliability, lon, lat
return 0, 0, 0
except Exception as e:
wrong_write(row)
finsh_data = pd.read_csv(path)
start = finsh_data.shape[0]
start_write(start)
def wrong_write(row):
row.iloc[0, 12] = 0
row.iloc[0, 10] = 0.0
row.iloc[0, 11] = 0.0
row.to_csv(path, header=False, index=False, mode='a')
def bd_write(company, address, row):
comprehension, lon_baidu, lat_baidu = location_baidu(company , row)
if comprehension > 60:
lon_baidu_wgs, lat_baidu_wgs = gcj02_to_wgs84(float(lon_baidu), float(lat_baidu))
row.iloc[0, 12]= comprehension
row.iloc[0, 10] = lon_baidu_wgs
row.iloc[0, 11] = lat_baidu_wgs
row.to_csv(path, header=False, index=False, mode='a')
return True
return False
def tx_write(company, address, row):
reliability, lon_tx, lat_tx = location_tx(company, row)
lon_tx_wgs, lat_tx_wgs = gcj02_to_wgs84(float(lon_tx), float(lat_tx))
if reliability > 6:
row.iloc[0, 12]= reliability
row.iloc[0, 10] = lon_tx_wgs
row.iloc[0, 11] = lat_tx_wgs
row.to_csv(path, header=False, index=False, mode='a')
else:
wrong_write(row)
def gd_write(company, address, row):
level, lon_amap, lat_amap = location_amap(company , row)
if level in ['兴趣点', '门牌号', '单元号']:
lon_amap_wgs, lat_amap_wgs = gcj02_to_wgs84(float(lon_amap), float(lat_amap))
row.iloc[0, 12] = level
row.iloc[0, 10] = lon_amap_wgs
row.iloc[0, 11] = lat_amap_wgs
row.to_csv(path, header=False, index=False, mode='a')
return True
return False
def start_write(start):
for n in range(start, end):
row = address_company.ix[n:n]
address = ''
company = row.iat[0, 9]
write = bd_write(company, address, row)
if not write:
if '号' in address:
address = address.split('号')[0] + '号'
if '栋' in address:
address = address.split('栋')[0] + '栋'
if '幢' in address:
address = address.split('幢')[0] + '幢'
write = gd_write(company, address, row)
if not write:
tx_write(company, address, row)
#读取excel文件
def find_coordinate(address_company_file, write_path):
global address_company
global end
global path
path = write_path
address_company = pd.read_excel(address_company_file,sheet_name=1)
finish_data = pd.read_csv(write_path)
start = finish_data.shape[0]
end = address_company.shape[0]
start_write(start)
def get_geo_company(company,file_path,one):
global path
path = file_path
write = bd_write(company, company, one)
if not write:
write = gd_write(company, company, one)
if not write:
tx_write(company, company, one)
if __name__ == '__main__':
file = '.csv'
finsh_file = '.csv'
find_coordinate(file, finsh_file)
如需要帮忙或讲解代码,请私聊我!