Python 爬取 ajax 返回的数据

 1 from urllib.request import quote, unquote
 2 import random
 3 import requests
 4 
 5 #quote 将单个字符串编码转化为 %xx 的形式
 6 # strip() 方法用于移除字符串头尾指定的字符(默认为空格或换行符)或字符序列
 7 baidu_cat = quote('总榜').strip();
 8 
 9 refer_url = 'https://data.wxb.com/rankArticle'
10 ajax_url = 'https://data.wxb.com/rank/article?baidu_cat=%s&baidu_tag=&page=1&pageSize=50&type=2&order='%baidu_cat
11 
12 headers = {
13     'Accept': 'application/json',
14     'Accept-Encoding': 'gzip, deflate, br',
15     'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
16     'Connection': 'keep-alive',
17     'Host': 'data.wxb.com',
18     'Referer': refer_url,
19     'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
20 }
21 
22 try:
23     resp = requests.get(ajax_url, headers=headers)
24     if resp.status_code == 200:
25         print(resp.json())  #解析内容为json返回
26 except requests.ConnectionError as e:
27     print('Error',e.args) #输出异常信息
28 
29 result = resp.json()
30 for item in result['data']:
31     print('url:',item['url'])
32     print('title:',item['title'])
33 
34 print("程序结束")
35 
36 
37 
38 ''' 
39 pip install requests[security]
40 
41 这样会额外的安装如下3个包:
42 pyOpenSSL
43 cryptography
44 idna
45 
46 '''

 

你可能感兴趣的:(Python 爬取 ajax 返回的数据)