import urllib.request
#获取一个get请求
response = urllib.request.urlopen("http://www.baidu.com") #封装在response中
print(response.read().decode('utf-8')) #decode('utf-8')对获取到的网页代码解码,防止出现中文乱码,打出网页源码
#获取一个post请求(用于模拟登录(密码,用户))
用httpbin.org
import urllib.parse #解析器,解析键值对
data = bytes(urllib.parse.urlencode({"hello":"world"}),encoding = "utf-8")#表单,将键值对信息封装为二进制的包,encoding = "utf-8"封装方式
response = urllib.request.urlopen("http://httpbin.org/post",data = data)
print(response.read().decode('utf-8'))
try:
response = urllib.request.urlopen("http://httpbin.org/post",timeout=0.01)#时间超过0.01秒
print(response.read().decode('utf-8'))
except urllib.error.URLError as e:
print("time out!")
url = "https://httpbin.org/post"
headers = {"User-Agent":"……"}
data = bytes(urllib.parse.urlencode({"hello":"world"}),encoding = "utf-8")
req = urllib.request.Request(url=url,data=data,headers=headers,method='post')#封装,模拟成真的浏览器
response = urllib.request.urlopen(req)#封装
print(response.read().decode("utf-8"))
在网络中找
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-yaNApJ6z-1644636635823)(C:\Users\荔枝\AppData\Roaming\Typora\typora-user-images\image-20220204161745986.png)]
#爬取网页
def getData(baseurl):
dataist = []
for i in range(0,10):#调用获取页面信息的函数,10次
url = baseurl + str(i*25)
html = askURL(url)#保存获取到的网页源码
return datalist
#得到指定的一个URL的网页内容
def askURL(url):
head = {
"User-Agent":"……"
}#用来伪装,模拟浏览器头部信息
request = urllib.request.Request(url,headers=head)#携带headers去访问url
try:
response = urllib.request.urlopen(request)#获取整个网页的信息
html = response.read().decode("utf-8")#读取信息(网页源码)
except urllib.error.URLError as e:#捕获错误
if hasattr(e,"code"):
print(e.code)#打印code,看编码有什么问题
if hasattr(e,"reason"):
print(e.reason)#打印出没有成功的原因
return html
r(e,“reason”):
print(e.reason)#打印出没有成功的原因
return html