Python-requests
import requests
def getHTMLText(url):
try:
r = requests.get(url)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return 0
if __name__ == "__main__":
url = "https://item.jd.com/100011743024.html#crumb-wrap"
t = getHTMLText(url)
if t != 0:
print(t)
我们继续使用上面的代码,但是把异常处理注释掉。
import requests
def getHTMLText(url):
try:
r = requests.get(url)
#r.raise_for_status()
r.encoding = r.apparent_encoding
print(r.status_code)
return r.text
except:
return 0
if __name__ == "__main__":
url = "https://www.amazon.cn/dp/B06XFR194H"
t = getHTMLText(url)
if t != 0:
print(t)
可以看到我们并不能成功访问,返回了状态码”503″
503
抱歉,我们只是想确认一下当前访问者并非自动程序。为了达到最佳效果,请确保您浏览器上的 Cookie 已启用。
使用下面这段代码,将Request消息头中,加入“user-agent”字段,成功取得网页的内容。
import requests
def getHTMLText(url):
try:
kv = {'user-agent':'Mozilla/5.0'}
r = requests.get(url,headers = kv)
r.raise_for_status()
r.encoding = r.apparent_encoding
print(r.status_code)
return r.text
except:
return 0
if __name__ == "__main__":
url = "https://www.amazon.cn/dp/B06XFR194H"
t = getHTMLText(url)
if t != 0:
print(t)
首先要找到到该链接所提交的键值对。
通过params来提交搜索键值对。
import requests
def getHTMLText(url):
try:
kv1 = {'user-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36'}
kv2 = {'wd':'python'}
r = requests.get(url,headers = kv1,params = kv2)
print(r.request.url)
r.raise_for_status()
r.encoding = r.apparent_encoding
print(r.status_code)
return r.text
except:
return 0
if __name__ == "__main__":
url = "http://www.baidu.com"
t = getHTMLText(url)
if t != 0:
print(t)
import requests
def getHTMLText(url):
try:
kv1 = {'user-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36'}
kv2 = {'oq':'python'}
r = requests.get(url,headers = kv1,params = kv2)
print(r.request.url)
r.raise_for_status()
r.encoding = r.apparent_encoding
print(r.status_code)
return r.text
except:
return 0
if __name__ == "__main__":
url = "http://www.google.com/search"
t = getHTMLText(url)
if t != 0:
print(t)
import requests
def getJPG(url):
try:
r = requests.get(url)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.content
except:
return 0
if __name__ == "__main__":
f = open('a.jpg','wb')
url = "https://timgsa.baidu.com/timg?image&quality=80&size=b9999_10000&sec=1584007598907&di=c47132e3d2b868fd67160cae2772ab48&imgtype=0&src=http%3A%2F%2Fbpic.588ku.com%2Felement_origin_min_pic%2F16%2F09%2F16%2F0857db430110251.jpg"
t = getJPG(url)
if t != 0:
f.write(t)
f.close()
Tags: requests