Python3 抓取网页需要用到urllib.request模块
import urllib.request
def download(url, free_proxy=None, user_agent='test', num_retries=2, data=None):
print("download开始", url)
headers = {"User_agent": user_agent}
request = urllib.request.Request(url, data, headers=headers)
opener = urllib.request.build_opener()
if free_proxy:
proxy_params = {urllib.request.urlparse(url).scheme: free_proxy}
opener.add_handler(urllib.request.ProxyHandler(proxy_params))
try:
html5 = opener.open(request).read()
except urllib.request.URLError as e:
print("download error", e.reason)
html5 = None
if num_retries > 0:
if hasattr(e, 'code') and 500 <= e.code < 600:
html5 = download5(url, free_proxy, user_agent, num_retries - 1)
return html5.decode('utf-8')
url = 'http://www.thefaceshop.com.cn/store-locations'
print(download(url))
运行结果: