python爬虫主要需要urllib
方法1
import urllib.parse,urllib.request
import ssl ssl._create_default_https_context = ssl._create_unverified_context
req = urllib.request.Request(url = url,headers = headers)
response = urllib.request.urlopen(req)
return response.read().decode(coding)
方法2
#导入包
import urllib.request
#函数
def main():
preservation()
def gethtml_http(url):
try:
response = urllib.request.urlopen(url,timeout = 5)
htmlfile = response.read().decode("utf-8")
except urllib.error.URLError as e:
print("超时")
return htmlfile
def preservation():
h = gethtml_http("http://www.baidu.com")
print(h)
#import os
#os.rename("内部储存\hhh.py\baidu.txt","内部储存\hhh.py\baidu.html")
if __name__ == "__main__":
main()