Python 爬虫 - 验证码识别

import time
from io import BytesIO

from PIL import Image
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

from chaojiying import main1

chrome_options = webdriver.ChromeOptions()
# chrome_options.add_argument('--headless')
browser = webdriver.Chrome(chrome_options=chrome_options)

screen_width = 1400
screen_height = 700

# browser = webdriver.Chrome()
browser.set_window_size(screen_width, screen_height)
# 显式等待 针对某个节点的等待
wait = WebDriverWait(browser, 5)


# 取浏览器窗口内全图
def get_big_image():
    # browser.execute_script('window.scrollTo(0, 300)')
    screenshot = browser.get_screenshot_as_png()
    screenshot = Image.open(BytesIO(screenshot))
    return screenshot


def get_captha_position():
    captha = wait.until(EC.presence_of_element_located
                                ((By.CSS_SELECTOR, '#captchaImg')))
    location = captha.location
    size = captha.size
    x1 = location['x']
    y1 = location['y']
    width = size['width']
    height = size['height']
    x2 = x1 + width
    y2 = y1 + height
    print(x1, y1, x2, y2)
    print(width, height)
    return (x1, y1, x2, y2)


def get_page():
    url = 'https://login.10086.cn/html/login/login.html?channelID=12002&backUrl=https%3A%2F%2Fshop.10086.cn%2Fmall_280_280.html%3Fforcelogin%3D1#'
    browser.get(url)

    button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#mail_login_2')))
    button.click()

    username = '[email protected]'
    password = 'ypc426069'
    input_username = wait.until(EC.presence_of_element_located
                                ((By.CSS_SELECTOR, '#e_name')))
    input_password = wait.until(EC.presence_of_element_located
                                 ((By.CSS_SELECTOR, '#e_pwd')))
    input_username.clear()
    input_username.send_keys(username)
    input_password.clear()
    input_password.send_keys(password)
    time.sleep(3)

    full_image = get_big_image()
    print(full_image.width, full_image.height)
    full_image.save('mobile_full_image.png')

    img_width = full_image.width
    img_height = full_image.height

    width_ratio = img_width / screen_width
    height_ratio = img_height / screen_height

    print(width_ratio)

    # 获取验证码左上角和右下角的坐标
    x1, y1, x2, y2 = get_captha_position()
    x1, y1, x2, y2 = (x1 * width_ratio, y1, x2 * width_ratio, y2)

    print(x1, y1, x2, y2)

    captha_img = full_image.crop((x1, y1, x2, y2))
    captha_img.save('mobile_captcha.png')
    captha_str = main1('mobile_captcha.png')
    print(captha_str)

    input_code = wait.until(EC.presence_of_element_located
                                ((By.CSS_SELECTOR, '#inputCode')))
    input_code.clear()
    input_code.send_keys(captha_str)

    button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#submit_bt')))
    button.click()




def main():
    html = get_page()


if __name__ == '__main__':
    main()

你可能感兴趣的:(Python 爬虫 - 验证码识别)