爬取肯德基官网的门店位置信息(现在已经进不去了,所以现在返回的全是-1000):
import urllib.request
import urllib.parse
def create_request(page):
base_url = 'http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Core/1.94.200.400 QQBrowser/11.8.5310.400',
}
data = {
'canme': '北京',
'pid': '',
'pageIndex': page,
'pageSize': '10'
}
data = urllib.parse.urlencode(data).encode('utf-8')
request = urllib.request.Request(base_url, data, headers)
return request
def get_content(request):
response = urllib.request.urlopen(request)
content = response.read().decode('utf-8')
return content
def down_load(page, content):
fp = open('files/kfc' + str(page) + '.json', 'w', encoding='utf-8')
fp.write(content)
if __name__ == '__main__':
start_page = 1
end_page = 10
for page in range(start_page, end_page+1):
# 请求对象的定制
request = create_request(page)
# 获取网页源码
content = get_content(request)
# 下载到本地
down_load(page, content)