代理列表

import urllib.request as urllib2
import urllib
import random

url="http://www.baidu.com/"
#创建一个User-Agent列表,以后也可以是代理列表
ua_list=[
                "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 \
            (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36",
                "Opera/9.80 (Windows NT 10.0; U; en) Presto/2.8.131 Version/11.11",
                "Mozilla/5.0 (Windows NT 10.0; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
                "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
                 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv2.0.1) Gecko/20100101 Firefox/4.0.1",
                 "Mozilla/5.0 (Windows NT 6.1; rv2.0.1) Gecko/20100101 Firefox/4.0.1",
                 "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11",
                 "Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11"
                ]

#斜线换行
#在列表里随机选取一个作为User-Agent
user_agent=random.choice(ua_list)

#构造一个请求
request=urllib2.Request(url)

#add_header()方法添加一个http报头
request.add_header("User-Agent",user_agent)

#发送请求
response=urllib2.urlopen(request)
html=response.read().decode('UTF-8')

#get_header()获取一个已有http报头的值,(User-agent必须是这个格式,第一个字母大写后面小写)
print(request.get_header("User-agent"))

你可能感兴趣的:(Python基础和应用)