爬虫模拟登陆手机验证码_Python+scrapy爬虫之模拟登陆

一、126,163邮箱模拟登陆

爬虫模拟登陆手机验证码_Python+scrapy爬虫之模拟登陆_第1张图片
# -*- coding:utf-8 -*-import timefrom selenium import webdriverdef login126_or_163emall(url):    login_name = input("请输入账号:")    login_password = input("请输入密码:")    # 打开自动测试软件Chrome    driver = webdriver.Chrome(executable_path="D:chromedriver.exe")    # 模拟窗口最大化    driver.maximize_window()    # 打开目标网站    driver.get(url=url)    time.sleep(10)    # 切换为密码登录    password_login_button = driver.find_element_by_id("lbNormal")    password_login_button.click()    # 由于126邮箱是iframe嵌套,所以要切换到iframe窗口    elem = driver.find_element_by_css_selector("iframe[id^='x-URS-iframe']")    # 用frame的index来定位,定位iframe窗口    driver.switch_to.frame(elem)    # 定位到账号输入框,不需要输入@126.com    user_name = driver.find_element_by_name("email")    user_name.send_keys(login_name)    # 定位到密码输入框    user_password = driver.find_element_by_name("password")    user_password.send_keys(login_password)    time.sleep(3)    # 定位到登录按钮    login_button = driver.find_element_by_id("dologin")    login_button.click()    time.sleep(5)    # 获取用户登录的cookies,返回一个字典    cookies = driver.get_cookies()[0]    print(cookies)    time.sleep(10)    # 关闭模拟浏览器窗口    driver.close()if __name__ == '__main__':    # url = "https://mail.126.com/"     # 126邮箱url    url = "https://mail.163.com/"       # 163邮箱url    login126_or_163emall(url=url)

输出的cookies如下:

"""{'domain': '.163.com', 'expiry': 4717308714, 'httpOnly': False, 'name': '_ntes_nnid', 'path': '/', 'secure': False, 'value': 'cf36cf83b0562fccb3ab872f3b1dfa4c,1563708714807'} """

二、B站模拟登陆

爬虫模拟登陆手机验证码_Python+scrapy爬虫之模拟登陆_第2张图片
import timeimport randomfrom PIL import Imagefrom io import BytesIOfrom selenium import webdriverfrom selenium.webdriver.common.by import Byfrom selenium.webdriver import ActionChainsfrom selenium.webdriver.support.wait import WebDriverWaitfrom selenium.webdriver.support import expected_conditions as ECborder = 6          # 滑块左边框到验证图片左边框的距离class CrackGeetest():    def __init__(self):        self.url = 'https://passport.bilibili.com/login'        self.browser = webdriver.Chrome(r"D:chromedriver.exe")        # 设置浏览器为最大窗口        self.browser.maximize_window()        self.wait = WebDriverWait(self.browser,timeout=5)    def close(self):        self.browser.close()        # 关闭浏览器        self.browser.quit()         # 退出并停止执行chromedriver.exe    # 获取带缺口的图片    def get_geetest_image(self, name='captcha.png'):        # 获取完整的验证图片        img = self.wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/div[2]/div[2]/div[6]/div/div[1]/div[1]/div/a/div[1]')))        time.sleep(2)        location = img.location                             # 获取元素位置        size = img.size                                     # 获取元素尺寸        print(location,size)        top = location['y']        bottom = location['y'] + size['height']        left = location['x']        right = location['x'] + size['width']        print('验证码位置', top, bottom, left, right)        # 获取当前窗口的屏幕截图(二进制数据)        screenshot = self.browser.get_screenshot_as_png()        # 使用BytesIO对象在内存中读写bytes(就是读取截图)        screenshot = Image.open(BytesIO(screenshot))        screenshot.save(r"D:photoimagescreenshot.png" )        # 按照图片验证码的大小尺寸进行剪切        captcha = screenshot.crop((left, top, right, bottom))        # 将图片验证码保存到指定路径        captcha.save(r"D:photoimage%s"%name)        return captcha        # 获取缺口位置    def get_gap(self, img1, img2):        left = 60                                               # 滑块的宽度+滑块左边框到验证图片左边框的距离        for i in range(left, img1.size[0]):                     # 遍历不带缺口的图片img1的RGB像素点            for j in range(img1.size[1]):                if not self.is_pixel_equal(img1, img2, i, j):   # 判断两张图片同一位置的像素点是否相等                    left = i                    return left        return left    # 判断两张验证图片同一位置的像素点是否相同    def is_pixel_equal(self, img1, img2, x, y):        # 取两个图片的像素点        pix1 = img1.load()[x,y]        pix2 = img2.load()[x,y]        threshold = 60                      # 阈值        pix_r = abs(pix1[0] - pix2[0])      # R        pix_g = abs(pix1[1] - pix2[1])      # G        pix_b = abs(pix1[2] - pix2[2])      # B        if (pix_r < threshold) and (pix_g < threshold) and (pix_b < threshold):            return True        else:            return False    # 获取移动轨迹    def get_track(self, distance):        track = []                          # 移动轨迹        current = 0                         # 当前位移        mid = distance * 3 / 4              # 减速阈值        t = random.randint(2,3)/10          # 计算间隔        v = 0                               # 初速度        distance += 5        while current < distance:           # 判断当前位移是否小于缺口距离            if current < mid:               # 如果当前位移小于减速的阈值                a = 2                       # 则加速度为正            else:                a = -3                      # 否则加速度为负            v0 = v                          # 初速度v0            v = v0 + a * t                  # 当前速度v (v = v0 + at)            x = v0*t + 1/2*a*t*t            # 移动距离x (x = v0t + 0.5at^2)            current += x                    # 当前位移            track.append(round(x))          # 加入轨迹(round为四舍五入)        return track    # 移动缺口滑块    def move_to_gap(self, slider, tracks):        """        :param slider: 滑块        :param tracks: 移动轨迹        """        random.shuffle(tracks)        # 创建一个鼠标移动的动作链,在滑块上按住的鼠标左键,并执行。        ActionChains(self.browser).click_and_hold(slider).perform()        # 正向移动轨迹        for x in tracks:            # 创建一个鼠标移动的动作链,将鼠标移动到当前鼠标位置的偏移位置(x,0)上,并执行。            ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform()        # 模拟人工滑动超过缺口位置返回至缺口的情况,同时还加入了随机数,都是为了更贴近人工滑动轨迹        action = ActionChains(self.browser).move_by_offset(xoffset=-1, yoffset=0)        time.sleep(0.015)        action.perform()        time.sleep(random.randint(6, 10) / 10)        action.perform()        time.sleep(0.04)        action.perform()        time.sleep(random.randint(6, 10) / 10)        action.perform()        time.sleep(0.019)        action.perform()        time.sleep(random.randint(6, 10) / 10)        ActionChains(self.browser).move_by_offset(xoffset=1, yoffset=0).perform()        # 模拟抖动(由于释放鼠标是会产生抖动)        ActionChains(self.browser).move_by_offset(xoffset=-3, yoffset=0).perform()        ActionChains(self.browser).move_by_offset(xoffset=2, yoffset=0).perform()        time.sleep(0.5)        # 创建一个鼠标行为的动作链,释放滑块上的鼠标按钮,并执行。        ActionChains(self.browser).release().perform()    def crack(self):        try:            # 打开网页            self.browser.get(self.url)            # 获取用户名输入框            emall = self.wait.until(EC.presence_of_all_elements_located((By.XPATH,'//*[@id="login-username"]')))[0]            # 获取密码输入框            password = self.wait.until(EC.presence_of_all_elements_located((By.XPATH,'//*[@id="login-passwd"]')))[0]            # 发送用户名            emall.send_keys("15612345678")            # 发送密码            password.send_keys("1234567890")            # 点击登录按钮使之显示验证图片            loginbutton = self.wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="geetest-wrap"]/ul/li[5]/a[1]')))            loginbutton.click()            # 确认验证图片加载完成(获取完整的验证码div)            self.wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/div[2]/div[2]/div[6]/div')))            # 获取移动滑块(slider:滑块)            slider = self.wait.until(EC.element_to_be_clickable((By.XPATH, '/html/body/div[2]/div[2]/div[6]/div/div[1]/div[2]/div[2]')))            # 获取带缺口的验证码图片(完整的验证图片)            image1 = self.get_geetest_image('captcha1.png')            #========= 在当前窗口执行JavaScript语句(由于验证码原图被切分成搞多块)=========#            # 组合验证码方法一:            element = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'canvas.geetest_canvas_fullbg.geetest_fade.geetest_absolute')))            self.browser.execute_script("arguments[0].style=arguments[1]",element,"display: block;")            # 组合验证码方法二:(本人建议使用此方法)            # self.browser.execute_script('document.querySelectorAll("canvas")[2].style=""')  # 获取缺块儿验证码            # self.browser.execute_script('document.querySelectorAll("canvas")[3].style=""')  # 获取完整的验证码            # 获取带缺口的验证码图片(不完整的验证图片)            image2 = self.get_geetest_image('captcha2.png')            # 调用获取缺口位置函数(滑块的位置)            gap = self.get_gap(image1, image2)            # 减点滑块左边框到验证图片左边框的距离            gap -= border            print('滑块的位置', gap)            # 调用获取移动轨迹函数(track:移动轨迹)            track = self.get_track(gap)            # 调用移动缺口滑块函数进行滑动            self.move_to_gap(slider, track)            time.sleep(1)            # 获取验证完成后返回的数据“验证成功”            success = self.wait.until(EC.text_to_be_present_in_element((By.XPATH, '/html/body/div[2]/div[2]/div[3]/div[2]'), '验证成功'))            print(success)            time.sleep(5)            # 关闭浏览器            self.close()        except:            print('Failed-Retry')       # 失败重试            self.crack()if __name__ == '__main__':    crack = CrackGeetest()    crack.crack()

由于哔哩哔哩验证码是极验的滑动验证码,验证码图片是由很多个小块图片碎片组合而成,所以解决办法如下:

#========= 在当前窗口执行JavaScript语句(由于验证码原图被切分成搞多块)=========#            # 组合验证码方法一:            element = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'canvas.geetest_canvas_fullbg.geetest_fade.geetest_absolute')))            self.browser.execute_script("arguments[0].style=arguments[1]",element,"display: block;")            # 组合验证码方法二:(本人建议使用此方法)            # self.browser.execute_script('document.querySelectorAll("canvas")[2].style=""')  # 获取缺块儿验证码            # self.browser.execute_script('document.querySelectorAll("canvas")[3].style=""')  # 获取完整的验证码

最后就是运行了,运行结果就不展示了,测试了一下成功了还是特别高的。

你可能感兴趣的:(爬虫模拟登陆手机验证码)