【微博爬虫】获取微博登陆cookie(带验证码识别)

近期公司买的微博账号,在登陆的时候总会出现验证码,所以我就想着出一个带验证码的微博登陆教程

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
from log import get_logger
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from verification_code import get_verify_code

logger = get_logger(__name__)


class WebDriver(object):

    def __init__(self, user_agent=None, proxy=None):
        chrome_options = Options()
        chrome_options.add_argument('--headless')
        chrome_options.add_argument('--no-sandbox')
        chrome_options.add_argument('--disable-gpu')
        chrome_options.add_argument('--disable-dev-shm-usage')
        if user_agent:
            chrome_options.add_argument('user-agent=%s' % user_agent)
        if proxy:
            chrome_options.add_argument("–proxy-server=%s" % proxy)
        self.driver = webdriver.Chrome(chrome_options=chrome_options)
        self.driver.set_window_size(1400, 700)

    def download_page(self, url):
        logger.info('打开浏览器,访问:%s' % url)
        self.driver.get(url)
        html = self.driver.page_source
        logger.info('关闭浏览器')
        return html

    def login_weibo(self, url, username, password):
        logger.info('打开浏览器,访问:%s' % url)
        self.driver.get(url)
        cookies = {
     }
        try:
            WebDriverWait(self.driver, 20).until(EC.presence_of_element_located((By.ID, "loginname")))
            # 输入用户名
            logger.info('输入用户名: %s' % username)
            time.sleep(1)
            self.location_element_by_id('loginname', username)
            # 输入密码
            logger.info('输入密码: %s' % password)
            time.sleep(1)
            self.location_element_by_name('password', password)
            # 点击登陆
            logger.info('点击登陆')
            time.sleep(1)
            self.location_element_by_xpath(
                '//div[@class="W_login_form"][1]/div[@class="info_list login_btn"]/a[@action-type="btn_submit"]',
                click=True)
            try:
                WebDriverWait(self.driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'gn_name')))
            except:
                logger.info('%s有验证码' % username)
                for i in range(1, 6):
                    logger.info('第%s次验证' % i)
                    # 判断是否有验证码  //input[@name="verifycode"]
                    ret = self.location_element_by_xpath('//div[contains(@class,"info_list verify")]')
                    ret = ret.get_attribute('style')
                    if 'none' not in ret:
                        # 获取验证码图片
                        verify_code = self.save_img(username)

                        # 输入密码
                        logger.info('输入验证码: %s' % verify_code)
                        time.sleep(1)
                        self.location_element_by_name('verifycode', verify_code)

                        # 点击登陆
                        time.sleep(1)
                        self.location_element_by_xpath(
                            '//div[@class="W_login_form"][1]/div[@class="info_list login_btn"]/a[@action-type="btn_submit"]',
                            click=True)
                        try:
                            WebDriverWait(self.driver, 5).until(
                                EC.presence_of_element_located((By.CLASS_NAME, 'gn_name')))
                            break
                        except:
                            pass
                    else:
                        logger.info('验证码识别成功')
                        break
                else:
                    logger.error('登陆失败')
                    return
            logger.info('登陆成功')
            cookie = self.driver.get_cookies()
            for item in cookie:
                cookies[item['name']] = item['value']
            logger.info("获取到的cookie:%s" % cookies)
            return cookies
        except Exception as e:
            logger.error('登陆失败:%s' % username)
            logger.error(str(e))
        finally:
            logger.info('关闭浏览器')
            self.close()

    def location_element_by_xpath(self, xpath, value=None, click=False):
        element = self.driver.find_element_by_xpath(xpath)
        if value:
            element.send_keys(value)
        if click:
            element.click()
        return element

    def location_element_by_tag_name(self, tag_name, value=None, click=False):
        element = self.driver.find_element_by_tag_name(tag_name)
        if value:
            element.send_keys(value)
        if click:
            element.click()
        return element

    def location_element_by_name(self, name, value=None, click=False):
        element = self.driver.find_element_by_name(name)
        if value:
            element.send_keys(value)
        if click:
            element.click()
        return element

    def location_element_by_id(self, id, value=None, click=False):
        element = self.driver.find_element_by_id(id)
        if value:
            element.send_keys(value)
        if click:
            element.click()
        return element

    def save_img(self, username):
        self.driver.save_screenshot('%s.png' % username)
        verify_code = get_verify_code('%s.png' % username)
        return verify_code

    def close(self):
        self.driver.quit()


if __name__ == '__main__':
    driver = WebDriver()
    driver.login_weibo('https://weibo.com/login?', username, password)

微博登陆使用的是selenium模拟浏览器方式,原理就是访问微博登陆接口,输入用户名密码,判断是否有验证码,有验证码的话先进行验证码识别再登陆

get_verify_code:对图片进行保存,预测
predict:识别验证码函数,大家可以看我出的[微博验证码识别系列教程](https://blog.csdn.net/hbk5241?spm=1010.2135.3001.5343),里面有对该函数的讲解
from PIL import Image
from model.predict import predict
import os
def get_verify_code(img_name):
    img = Image.open(img_name).crop((2165, 480, 2362, 541))
    img.save(img_name)
    img.close()
    verify_code = predict(img_name)
    os.remove(img_name)
    return verify_code

你可能感兴趣的:(微博验证码,验证码识别,微博,python,selenium)