Python爬虫——urllib_ajax请求的post请求

爬取肯德基官网的门店位置信息(现在已经进不去了,所以现在返回的全是-1000):

import urllib.request
import urllib.parse

def create_request(page):
    base_url = 'http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname'

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Core/1.94.200.400 QQBrowser/11.8.5310.400',
    }

    data = {
        'canme': '北京',
        'pid': '',
        'pageIndex': page,
        'pageSize': '10'
    }

    data = urllib.parse.urlencode(data).encode('utf-8')
    request = urllib.request.Request(base_url, data, headers)

    return request
def get_content(request):
    response = urllib.request.urlopen(request)
    content = response.read().decode('utf-8')

    return content

def down_load(page, content):
    fp = open('files/kfc' + str(page) + '.json', 'w', encoding='utf-8')
    fp.write(content)

if __name__ == '__main__':
    start_page = 1
    end_page = 10
    for page in range(start_page, end_page+1):
        # 请求对象的定制
        request = create_request(page)
        # 获取网页源码
        content = get_content(request)
        # 下载到本地
        down_load(page, content)

你可能感兴趣的:(Python爬虫,python,爬虫)