python_使用地址或公司名_爬虫爬取高德地图_百度地图_腾讯地图经纬度

python_使用地址或公司名_爬虫爬取高德地图_百度地图_腾讯地图经纬度

import os
import pandas as pd
import urllib.parse
import requests
from utils.geolocataion_converting import gcj02_to_wgs84

'''
    此文件用于使用地址到高德地图API、百度地图API和腾讯地图API去找经纬度,
    判断三者抓到经纬度的距离,以及用公司与地址抓到的经纬度之间的距离,并判断API返回的置信度
'''
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 '
                  '(KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'
}


def location_amap(address, row):
    try:
        url = 'https://restapi.amap.com/v3/geocode/geo?address={}&key='.format(
            urllib.parse.quote(address))
        r = requests.get(url, headers=headers)
        dic = r.json()
        if dic['status'] == '1':
            geocode = dic['geocodes']
            if geocode:
                location = geocode[0]['location']
                level = geocode[0]['level']
                loncation_obj = location.split(',')
                lon = loncation_obj[0]
                lat = loncation_obj[1]
                return level, lon, lat
        return '', 0, 0
    except Exception as e:
        wrong_write(row)
        finsh_data = pd.read_csv(path)
        start = finsh_data.shape[0]
        start_write(start)


def location_baidu(address, row):
    try:
        url = 'http://api.map.baidu.com/geocoding/v3/?address={}&' \
              'ret_coordtype=gcj02ll&ak=&output=json'.format(
            urllib.parse.quote(address))
        response = requests.get(url, headers=headers)
        dic = response.json()
        if dic['status'] == 0:
            result = dic['result']
            location = result['location']
            comprehension = result['comprehension']
            lon = location['lng']
            lat = location['lat']
            return comprehension, lon, lat
        return 0, 0, 0
    except Exception as e:
        wrong_write(row)
        finsh_data = pd.read_csv(path)
        start = finsh_data.shape[0]
        start_write(start)



def location_tx(address, row):
    try:
        url = 'https://apis.map.qq.com/ws/geocoder/v1/?address={}&' \
              'key=&output=json'.format(
            urllib.parse.quote(address))
        response = requests.get(url, headers=headers)
        dic = response.json()
        if dic['status'] == 0:
            result = dic['result']
            location = result['location']
            reliability = result['reliability']
            lon = location['lng']
            lat = location['lat']
            return reliability, lon, lat
        return 0, 0, 0
    except Exception as e:
        wrong_write(row)
        finsh_data = pd.read_csv(path)
        start = finsh_data.shape[0]
        start_write(start)

def wrong_write(row):
    row.iloc[0, 12] = 0
    row.iloc[0, 10] = 0.0
    row.iloc[0, 11] = 0.0
    row.to_csv(path, header=False, index=False, mode='a')

def bd_write(company, address, row):
    comprehension, lon_baidu, lat_baidu = location_baidu(company , row)
    if comprehension > 60:
        lon_baidu_wgs, lat_baidu_wgs = gcj02_to_wgs84(float(lon_baidu), float(lat_baidu))
        row.iloc[0, 12]= comprehension
        row.iloc[0, 10] = lon_baidu_wgs
        row.iloc[0, 11]  = lat_baidu_wgs
        row.to_csv(path, header=False, index=False, mode='a')
        return True
    return False


def tx_write(company, address, row):
    reliability, lon_tx, lat_tx = location_tx(company, row)
    lon_tx_wgs, lat_tx_wgs = gcj02_to_wgs84(float(lon_tx), float(lat_tx))
    if reliability > 6:
        row.iloc[0, 12]= reliability
        row.iloc[0, 10] = lon_tx_wgs
        row.iloc[0, 11] = lat_tx_wgs
        row.to_csv(path, header=False, index=False, mode='a')
    else:
        wrong_write(row)


def gd_write(company, address, row):
    level, lon_amap, lat_amap = location_amap(company , row)
    if level in ['兴趣点', '门牌号', '单元号']:
        lon_amap_wgs, lat_amap_wgs = gcj02_to_wgs84(float(lon_amap), float(lat_amap))
        row.iloc[0, 12] = level
        row.iloc[0, 10]  = lon_amap_wgs
        row.iloc[0, 11]  = lat_amap_wgs
        row.to_csv(path, header=False, index=False, mode='a')
        return True
    return False


def start_write(start):
    for n in range(start, end):
        row = address_company.ix[n:n]
        address = ''
        company = row.iat[0, 9]
        write = bd_write(company, address, row)
        if not write:
            if '号' in address:
                address = address.split('号')[0] + '号'
            if '栋' in address:
                address = address.split('栋')[0] + '栋'
            if '幢' in address:
                address = address.split('幢')[0] + '幢'
            write = gd_write(company, address, row)
            if not write:
                tx_write(company, address, row)


#读取excel文件
def find_coordinate(address_company_file, write_path):
    global address_company
    global end
    global path
    path = write_path
    address_company = pd.read_excel(address_company_file,sheet_name=1)
    finish_data = pd.read_csv(write_path)
    start = finish_data.shape[0]
    end = address_company.shape[0]
    start_write(start)

def get_geo_company(company,file_path,one):
    global path
    path = file_path
    write = bd_write(company, company, one)
    if not write:
        write = gd_write(company, company, one)
        if not write:
            tx_write(company, company, one)


if __name__ == '__main__':
    file = '.csv'
    finsh_file = '.csv'
    find_coordinate(file, finsh_file)

如需要帮忙或讲解代码,请私聊我!

你可能感兴趣的:(python爬虫)