【百度地图】使用百度地图API爬取小区名字并获取小区范围边界

获取小区名字

# -*- coding: utf-8 -*-

import numpy as np  # 导入数值计算扩展模块
import requests     # 导入网页请求模块
import pymysql      # 导入pymysql模块
import time, math

print("connecting mysql......\n")
db = pymysql.connect("localhost", "root", "123456", "community", charset='utf8')  # 链接mysql数据库community
print("connect successfully,start creating table community_zh in database community\n")
cursor = db.cursor()  # 创建游标对象

cursor.execute("DROP TABLE IF EXISTS community_zh")  # 如果community_zh存在则drop掉
c_sql = """CREATE TABLE community_zh(
    province char(10),
    city char(10),
    area char(10),
    community char(30),
    address char(50),
    lat float(9,6),
    lng float(9,6),
    uid char(25),
    detail int(1)  
)"""

cursor.execute(c_sql)   # 创建一个表
cursor.execute("ALTER TABLE community_zh CONVERT TO CHARACTER SET utf8 COLLATE utf8_general_ci;")  # 如果保存内容包含中文需要修改字符集为utf-8
print("table community_zh has been created\n")


unit = 60 #60*60个栅格
lat_partion = [round(x,6) for x in list(np.linspace(21.874881,22.418767, unit))]    # 维度划分,保留6位小数
lng_partion = [round(y,6) for y in list(np.linspace(113.112379,113.641301, unit))]  # 经度划分,保留6位小数

def get_community():            # 自定义按区域获取小区名称的函数
    for i in range(53, 59):      # 横向栅格索引
        # time.sleep(10)
        for j in range(0, 59):  # 纵向栅格索引
            not_max_page = True # 没有到达最大页面时循环
            page_num = 0        # 从第一页开始

            while not_max_page:
                url = "http://api.map.baidu.com/place/v2/search?query=小区&page_size=20&page_num=" + str(page_num) + "\
                &bounds=" + str(lat_partion[i]) + "," + str(lng_partion[j])+"," + str(lat_partion[i+1]) + "," + str(lng_partion[j+1]) + \
                "&output=json&ak="  # 构造请求网址,其中你的ak是你要向百度申请的密钥
                header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'}  # 构造请求头
                response = requests.get(url, headers = header)  # 发出请求
                time.sleep(1)

                answer = response.json()  # json化
                total = int(answer["total"])
                max_page = math.ceil(total / 20)

                if answer['status'] == 0:  # 如果正常返回
                    print("\n# Rectangle (%d,%d)  Page %s" % (i, j, page_num))
                    print("# Amount: %s" % len(answer['results']))
                    print("# Total: %s" % total)

                    page_num += 1
                    if page_num > max_page:  # 到达最大页时推出循环到下一个单元
                        break

                    for k in range(len(answer['results'])): 
                        province = answer['results'][k]['province']    # 省份
                        if province == "澳门特别行政区":
                            break
                        city = answer['results'][k]['city']            # 城市
                        area = answer['results'][k]['area']            # 区域
                        community = answer['results'][k]['name']       # 小区名称
                        address = answer['results'][k]['address']      # 地址
                        lat = answer['results'][k]['location']['lat']  # 纬度
                        lng = answer['results'][k]['location']['lng']  # 经度
                        uid = answer['results'][k]['uid']              # 唯一标识
                        detail = answer['results'][k]['detail']        # 是否具有详情内容
                        insert_data = ("INSERT INTO community_zh(province, city, area, community, address, lat, lng, uid, detail)""VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s)")
                        community_data = ([province, city, area, community, address, lat, lng, uid, detail])  # 组成一条记录
                        print(community_data)
                        cursor.execute(insert_data, community_data)  # 执行插入操作
                        db.commit()  # 主动提交
                else:
                    print("* The rectangle (%d,%d) contains no community"%(i, j)) 
                    break

        
if __name__=='__main__':
    get_community()

获取小区边界

# -*- coding: utf-8 -*-

import requests     # 导入网页请求模块
import pymysql      # 导入pymysql模块
import time, math


# 百度米转百度经纬度
def meter2Degree(x, y):
    url = "http://api.map.baidu.com/geoconv/v1/?coords="+ x + "," + y + "&from=6&to=5&output=json&ak="
    header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'}  # 构造请求头
    response = requests.get(url, headers = header)  # 发出请求
    answer = response.json()  # json化
    result = answer["result"]
    lng = result[0]["x"]
    lat = result[0]["y"]
    return lng, lat

# 提取百度米坐标字符串,转为经纬度坐标串
def coordinateToPoints(coordinates):
    points = ""
    if coordinates and coordinates.index("-") >= 0:
        coordinates = coordinates.split("-")
        temp_coordinates = coordinates[1]
        if temp_coordinates and temp_coordinates.index(",") >= 0:
            temp_coordinates = temp_coordinates.replace(";", "").split(",")
            temp_points = []
            for i in range(0, len(temp_coordinates), 2): 
                x = temp_coordinates[i]
                y = temp_coordinates[i + 1]
                point = {}
                point["x"] = x
                point["y"] = y
                temp_points.append(point)

            for point in temp_points:
                x = point["x"]
                y = point["y"]
                lng, lat = meter2Degree(x, y)
                points += str(lng) + "," + str(lat) + ";"

    return points

# 获取边界
def getBorder(uid):
    url = "http://map.baidu.com/?pcevaname=pc4.1&qt=ext&ext_ver=new&l=12&uid=" + uid
    header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'}  # 构造请求头
    response = requests.get(url, headers = header)  # 发出请求
    answer = response.json()  # json化
    content = answer["content"]
    points = ""

    if "geo" in content and content["geo"] != None and content["geo"] != "":
        geo = content["geo"]
        points = coordinateToPoints(geo)
    
    return points


if __name__ == "__main__":
    print("connecting mysql......\n")
    db = pymysql.connect("localhost", "root", "123456", "community", charset='utf8')  # 链接mysql数据库community
    print("connect successfully,start creating table community_zh in database community\n")
    cursor = db.cursor()  # 创建游标对象
    cursor.execute("select community,uid from community_zh")
    rows = cursor.fetchall()
    
    start_flag = False
    with open("./community_border.txt", "a") as wf:
        for row in rows:
            name = row[0]
            uid = row[1]
            if name == "香溪兴苑(小区)":
                start_flag = True
            if start_flag:
                points = getBorder(uid)
                time.sleep(1)
                content = name + "\t" + points + "\n"
                print(content)
                wf.write(content)

你可能感兴趣的:(技术:网络爬虫)