最近在看B站上的视频学习资料,此文是关于requests模块香港的一些使用实例。
import requests
headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36' } #1.爬取搜狗首页的页面源码数据 url = 'https://www.sougou.com/' response = requests.get(url=url) with open('./sougou.html','w',encoding='utf-8') as f: f.write(response.text) print('download successfully') #2. 简易的网页采集器 word = input('enter a key word:') url = 'https://www.sogou.com/web' params = { 'query': word } # UA伪装 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36' } # 将伪装的UA作用到请求的请求头中 response = requests.get(url=url, params=params, headers=headers) response.encoding = 'utf-8' # 手动修改响应对象的编码格式,处理乱码 page_text = response.text filename = word + '.html' with open(filename, 'w', encoding='utf-8') as f: f.write(page_text) print(word, '下载成功') # 3. 需求:爬取豆瓣电影的详细数据(动态加载数据) # url:https://movie.douban.com/typerank?type_name=%E5%96%9C%E5%89%A7&type=24&interval_id=100:90&action= url = 'https://movie.douban.com/j/chart/top_list' # 参数动态化 params = { 'type': '24', 'interval_id': '100:90', 'action':'', 'start': '0', 'limit': '20', } response = requests.get(url=url,headers=headers,params=params) page_text = response.json() # json返回序列化好的对象 # print(page_text) for dic in page_text: name = dic['title'] score = dic['score'] print(name + ':' + score) # 4. 抓取KFC餐厅查询:http://www.kfc.com.cn/kfccda/storelist/index.aspx url = 'http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword' for page_num in range(1,8): data = { 'cname':'', 'pid': '', 'keyword': '深圳',