from urllib.error import URLError
from urllib.request import ProxyHandler,build_opener
proxy='cmproxy.gmcc.net:8081' #使用本地代理
proxy_handler=ProxyHandler({
'http':'http://'+proxy,
'https':'https://'+proxy
})
opener=build_opener(proxy_handler)
try:
x=opener.open('http://xueshu.baidu.com/')
print(x.read())
#rint(response.read().decode('utf-8'))
except URLError as e:
print(e.reason)
例子:
from urllib.error import URLError
from urllib.request import ProxyHandler,build_opener
proxy='cmproxy.gmcc.net:8081' #使用本地代理
proxy_handler=ProxyHandler({
'http':'http://'+proxy,
'https':'https://'+proxy
})
opener=build_opener(proxy_handler)
try:
for i in range(1, 5): # 爬取全部177页数据
# url = 'http://s.askci.com/stock/a/?reportTime=2020-08-01&pageNum=%s' % (str(i))
url = 'https://s.askci.com/data/economy/00001/1/#te_b_tltie1'
tb = pd.read_html(opener.open(url))[i] #
tb.to_csv(r'1.csv', mode='a', encoding='utf_8_sig', header=1, index=0)
print('第' + str(i - 1) + '页抓取完成')
except URLError as e:
print(e.reason)