爬取地铁站数据,计算两点距离

爬取地铁站数据,计算两点距离

      • 爬取地铁站数据代码
      • 计算两点距离

效果图:
爬取地铁站数据,计算两点距离_第1张图片

爬取地铁站数据代码

import json
import requests
from bs4 import BeautifulSoup

headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}

def get_message(ID, cityname, name):
    """
    地铁线路信息获取
    """
    url = 'http://map.amap.com/service/subway?_1555502190153&srhdata=' + ID + '_drw_' + cityname + '.json'
    response = requests.get(url=url, headers=headers)
    html = response.text
    result = json.loads(html)
    with open('subway.csv', 'a+', encoding='gbk') as f:
        f.write('city' + ',' + 'line_name' + ',' + 'site_name' + ',' + 'gd_lon' + ',' + 'gd_lat' + '\n')
    for i in result['l']:
        for j in i['st']:
            # 判断是否含有地铁分线
            if len(i['la']) > 0:
                print(name, i['ln'] + '(' + i['la'] + ')', j['n'])
                # 经度lon 纬度lat
                gd_lon = j['sl'].split(',')[0]
                gd_lat = j['sl'].split(',')[1]
                with open('subway.csv', 'a+', encoding='gbk') as f:
                    f.write(name + ',' + i['ln'] + '(' + i['la'] + ')' + ',' + j['n'] + ',' + gd_lon + ',' + gd_lat + '\n')
            else:
                print(name, i['ln'], j['n'])
                # 经度lon 纬度lat
                gd_lon = j['sl'].split(',')[0]
                gd_lat = j['sl'].split(',')[1]
                with open('subway.csv', 'a+', encoding='gbk') as f:
                    f.write(name + ',' + i['ln'] + ',' + j['n'] + ',' + gd_lon + ',' + gd_lat + '\n')

def get_city():
    """
    城市信息获取
    """
    url = 'http://map.amap.com/subway/index.html?&1100'
    response = requests.get(url=url, headers=headers)
    html = response.text
    # 编码
    html = html.encode('ISO-8859-1')
    html = html.decode('utf-8')
    soup = BeautifulSoup(html, 'lxml')
    # 城市列表
    res1 = soup.find_all(class_="city-list fl")[0]
    res2 = soup.find_all(class_="more-city-list")[0]
    for i in res1.find_all('a'):
        # 城市ID值
        ID = i['id']
        # 城市拼音名
        cityname = i['cityname']
        # 城市名
        name = i.get_text()
        get_message(ID, cityname, name)
    for i in res2.find_all('a'):
        # 城市ID值
        ID = i['id']
        # 城市拼音名
        cityname = i['cityname']
        # 城市名
        name = i.get_text()
        get_message(ID, cityname, name)

if __name__ == '__main__':
    get_city()

计算两点距离

可以使用地球上的两点间距离公式(Haversine formula)计算经纬度之间的距离。以下是使用 Python 实现这个公式的示例代码,其中参数 lat1、lon1 和 lat2、lon2 分别表示两个经纬度值。

from math import radians, cos, sin, asin, sqrt

def haversine(lat1, lon1, lat2, lon2):
    """
    Calculate the distance between two points
    on the earth (specified in decimal degrees)
    """
    # 将经纬度值转换为弧度制
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # 计算经纬度之间的差值
    dlon = lon2 - lon1
    dlat = lat2 - lat1

    # 根据 Haversine formula 计算距离
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a))
    r = 6371  # 地球平均半径,单位为千米
    distance = c * r

    return distance

其中,r 为地球平均半径,单位为千米。 haversine 函数返回的 distance 即为两个经纬度之间的距离,单位为千米。
比如说计算北京市中关村的经纬度 (39.983766,116.309427) 和 北京市北京站的经纬度 (39.904737,116.427231) 之间的距离,可以这样调用 haversine 函数:

distance = haversine(39.983766, 116.309427, 39.904737, 116.427231)

print(f"The distance between the two points is: {distance:.2f} km.")

输出结果为:
The distance between the two points is: 7.77 km.

你可能感兴趣的:(python,大数据,爬虫)