采用cookie模拟登录csdn网站

有些网站需要填写账户和密码,如果直接爬去网页信息,自然是进不去了,这里采用cookiejar工具来实现这个目的

import urllib.request, urllib.parse, urllib.error
import http.cookiejar
import ppretty
from bs4 import  BeautifulSoup

if __name__ == '__main__':
    LOGIN_URL = 'https://passport.csdn.net/account/login'
    values = {'user': '**********', 'password': '**********'}
    postdata = urllib.parse.urlencode(values).encode()
    user_agent = r'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36'
    headers = {'User-Agent': user_agent, 'Connection': 'keep-alive'}
#cookiejar可以理解为存储cookie的一个工具吧,在这里我们把第一次http请求返回的cookie存储在cookie.txt文件中
    cookie_filename = 'cookie.txt'
    cookie = http.cookiejar.MozillaCookieJar(cookie_filename)
    handler = urllib.request.HTTPCookieProcessor(cookie)
    opener = urllib.request.build_opener(handler)

    request = urllib.request.Request(LOGIN_URL, postdata, headers)
    try:
        response = opener.open(request)
        page = response.read().decode()
        # print(page)
    except urllib.error.URLError as e:
        print(e.code, ':', e.reason)

    cookie.save(ignore_discard=True, ignore_expires=True)  # 保存cookie到cookie.txt中
    print(cookie)
    for item in cookie:
        print('Name = ' + item.name)
        print('Value = ' + item.value)

    get_url = 'https://i.csdn.net/#/uc/profile'
    get_request = urllib.request.Request(get_url, headers=headers)
#采用之前的第一次请求返回的cookie作为参数再次进行二次请求,就可以
    get_response = opener.open(get_request)
    soup=BeautifulSoup(get_response,'lxml')
    print(soup.head)

你可能感兴趣的:(Python)