import requests
############get请求#####################
url = 'http://top.hengyan.com/dianji/default.aspx?'
将get请求的参数放在字典中
params = {
'p':1,
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36',
}
url:目标url
params:get请求的参数
headers:请求头
"""
请求参数:
:param method: 请求方式
:param url: 要请求的目标url
:param params: get请求参数(dict类型)
:param data: post请求的参数 (dict类型)
:param json: post请求的参数(json类型)
:param headers: 请求头(dict类型)(User-Agent、Cookies、Refere、....)
:param cookies: 请求的cookies信息(dict类型)
:param files:上产文件(字典类型)
:param auth: 认证
:param timeout: 设置请求超时
:param allow_redirects:布尔类型,是否允许重定向, Defaults to True
:param proxies: 设置代理(字典类型)
:param verify: CA证书认证 Defaults to True
.
"""
response = requests.get(
url=url,params=params,
headers=headers
)
获取html页面源码
html = response.text
获取页面的二进制数据
b_content = response.content
获取响应的状态码
code = response.status_code
获取请求的响应头
response_headers = response.headers
获取请求的url地址
url = response.url
print(code,html)
获取cookies信息(使用requests模拟登录网站后获取cookies)
cookies = response.cookies
print(cookies)
将RequestsCookieJar转换成字典
cookies_dict = requests.utils.dict_from_cookiejar(cookies)
print(cookies_dict)
将字典转换成RequestsCookieJar
cookiejar_obj = requests.utils.cookiejar_from_dict(cookies_dict)
print(cookiejar_obj)
######################POST请求#############
世纪佳缘网
url = 'http://search.jiayuan.com/v2/search_v2.php'
构建请求头
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
}
post请求参数
form_data = {
'sex': 'f', 'key': '', 'stc': '1:11,2:20.28,23:1',
'sn': 'default', 'sv': '1', 'p': 1,
'f': 'search', 'listStyle': 'bigPhoto',
'pri_uid': '0', 'jsversion': 'v5',
}
response = requests.post(
url=url,data=form_data,
headers=headers
)
if response.status_code == 200:
print(response.text)
# json.decoder.JSONDecodeError:
# Expecting value: line 1 column 1 (char 0)(json字符串格式不正确)
# response.json() => json.loads()
# print(response.json())
import re,json
pattern = re.compile('##jiayser##(.*?)##jiayser##//',re.S)
json_str = re.findall(pattern=pattern,string=response.text)[0]
json_data = json.loads(json_str)
print(type(json_data))
print(json_data)