4.浏览器伪装技术

 1.用户伪装

uapools = [
    "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134",
]

def ua(uapools):
    thisua = random.choice(uapools)
    print(thisua)
    headers = ('user-agent',thisua)
    opener = urllib.request.build_opener()
    opener.addheaders = [headers]
    urllib.request.install_opener(opener)

2.IPaddress伪装

1. 可以去爬一些免费的 IP

2. 自己去申请(稳定)

thisip = ippools.pop()
proxy = urllib.request.ProxyHandler({"http":thisip})
opener = urllib.request.build_opener(proxy,urllib.request.HTTPHandler)
urllib.request.install_openner(opener)

uapools = [
    "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134",
]

ippools = []
def update_ippools(ippools):
    pass

def ua_ip_pools(uapools,iplist=ippools, parameter=0):
    thisip = ""
    #参数=0 ipself  参数=1 use ippools
    if (parameter == 0):
        thisip = "127.0.0.1:8888"
    elif (parameter == 1 ):
        if (len(iplist) == 0):
            update_ippools(iplist)
        thisip = random.choice(iplist)
        print('current ip:{}'.format(thisip))
        ippools.pop(ippools.index(thisip))

    proxy = urllib.request.ProxyHandler({"http":thisip})
    thisua = random.choice(uapools)
    headers = ("user-agent", thisua)
    opener = urllib.request.build_opener(proxy, urllib.request.HTTPHandler)
    opener.addheaders = [headers]
    urllib.request.install_opener(opener)

 

你可能感兴趣的:(爬虫,python)