Request使用学习笔记

python编码和解码

# 编码和解码的encoding必须一样！

# 把str转成bytes类型
bytes_data = data.encode(encoding='gbk')
print(type(bytes_data))
print(bytes_data)

# 把bytes转成str类型
str_data = bytes_data.decode(encoding='gbk')
print(type(str_data))
print(str_data)

request简单使用

import requests

# 发送请求，获取百度首页的内容
url = 'http://www.baidu.com'
# url2 = 'https://www.baidu.com/img/bd_logo1.png'
resp = requests.get(url)

with open('baidulogo.png','wb') as f:
    f.write(resp.content)

# headers = {
#     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
# }

# 不使用user-agent发送请求
# resp = requests.get(url)
# # 使用user-agent发送请求
# resp1 = requests.get(url,headers=headers)
#
# # 获取的是响应对象
# # print(type(resp))
#
# # 获取响应的状态码
# print(resp.status_code)
# 获取响应的源码,是str类型,是text属性自己猜测的编码
# print(resp.text)
# print(type(resp.text))
# 把响应修改成utf8编码
# resp.encoding = 'utf8'
# print(resp.text)

# 查看响应的bytes,content默认为bytes类型
# print(resp.content)

# 把相应的bytes类型解码成utf8
# print(resp.content.decode())

# 查看响应中请求对象
# print(resp.request)
# # 看看响应头
# print(resp.headers)
# 查看响应中的请求头信息
# print(resp.request.headers)

# 不用用户代理，百度返回的响应不是真正的网页源码
# print(len(resp.text))
# print(len(resp1.text))

# 获取百度源码写文件
# with open('baidu.html','wb') as f:
#     f.write(resp1.content)

# 不使用user-agent获取的百度源码
# with open('baidu1.html','wb') as f:
#     f.write(resp.content)

import requests

url = 'https://www.baidu.com/s?'

headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}

kw = {
    'wd':'python'
}
# params参数是给get请求传入查询字符串参数，并获取响应
resp = requests.get(url,headers=headers,params=kw)

with open('baidu_params.html','w') as f:
    f.write(resp.content.decode())

import requests

url = 'http://www.baidu.com'

# 免费代理
proxies = {
    # 'http':'http://27.46.21.44:8888'
    'http':'http://117.127.0.209:8080'
}
# 付费代理
# proxies = {
#     'http':'http://usernmae:[email protected]:8080'
# }
# # 不使用代理
resp1 = requests.get(url)
print(resp1.status_code)

# 使用代理发送请求
resp2 = requests.get(url,proxies=proxies)
print(resp2.status_code)

模拟登录，使用cookie

import requests

# 需求：模拟登录，使用cookie
url = 'http://www.renren.com/438718956/profile'
headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
    'Cookie': '测试数据cookie '
}

resp = requests.get(url,headers=headers)
import re
print(re.findall('测试数据',resp.content.decode()))

import requests
# 需求：模拟登录，使用cookie
url = 'http://www.renren.com/438718956/profile'
headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}
temp = '测试数据cookie'
ck = {}
# 字符串转字典，字符串的切割
for x in temp.split('; '):
    key = x.split('=')[0]
    value = x.split('=')[-1]
    ck[key] = value

# print(ck)
# 不是使用headers传入cookie，使用参数名cookies传入字典cookie
resp = requests.get(url,headers=headers,cookies=ck)
import re
print(re.findall('测试数据',resp.content.decode()))

模拟登录，使用session

import requests

url = 'http://www.renren.com/PLogin.do'

headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}

data = {
    'email':'账号',
    'password':'密码'
}

# 构造session对象，用来发送post请求，实现登录
session = requests.Session()

resp = session.post(url,headers=headers,data=data)
print(resp)
import re
print(re.findall('测试数据',resp.content.decode()))

requests 小技巧

import requests

url = 'http://www.baidu.com'
resp = requests.get(url)
print(resp.cookies)
# 保存百度的cookie信息
cookies = resp.cookies

# 把cookiejar对象转成字典
dict_cookies = requests.utils.dict_from_cookiejar(cookies)
print(dict_cookies)

# 把字典转成cookiejar对象
cookie_jar = requests.utils.cookiejar_from_dict(dict_cookies)
print(cookie_jar)

import requests
# 重新发送请求
from retrying import retry
url = 'https://www.12306.cn/mormhweb/'
# verify默认是开启的，关闭ssl安全认证，在访问某些特殊的不是国际认证的网站需要用到。
# resp = requests.get(url,verify=False)
# 作业：使用西刺免费代理，向百度发送get请求，不使用代理能够访问成功，使用代理不成功。使用timeout
# 结论：一线互联网公司它也能看到免费代理的ip，不代表这个ip不能用，只代表不能访问百度。
resp = requests.get(url)
print(resp.content.decode())

import json
data = {"小明":"程序员"}
print(data)

# 转成json字符串,如果数据中有中文，需要让ensure_ascii为假
json_data = json.dumps(data,ensure_ascii=False)
print(json_data)

# 转成字典
dict_data = json.loads(json_data)
print(dict_data)

# 操作文件对象的方法
# f = open('data.json','w')
# 第一个参数为数据，第二个参数为文件对象
# json.dump(dict_data,f,ensure_ascii=False) # dump把字典转成json字符串，写入文件

# 打开json文件
# f2 = open('data.json','r',encoding='utf8')
# print('*'*20)
# # load把json数据转成字典
# print(json.load(f2))

Request使用学习笔记

python编码和解码

request简单使用

模拟登录，使用cookie

模拟登录，使用session

requests 小技巧

你可能感兴趣的:(Request使用学习笔记)