selenium 模拟登陆 古诗文网 含验证码

selenium 模拟登陆 古诗文网 含验证码_第1张图片

ocr.py / 阿里云市场
import base64
import json
import urllib.request
from urllib import parse
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

def get_code():
    #修改API说明修改接口地址
    host = 'https://imgurlocr.market.alicloudapi.com/urlimages'
    method = 'POST'
    appcode = 'c657ecb2f1cd4f779ff4f8bf3ebb0af1'
    querys = ''
    bodys = {}
    url = host

    # 组装本地需要识别的 图片
    fp = open('./code.jpg', 'rb')
    res = base64.b64encode(fp.read()).decode()
    bodys['image'] = 'data:image/jpeg;base64,' + res

    post_data = urllib.parse.urlencode(bodys).encode(encoding='UTF8')
    request = urllib.request.Request(url, post_data)
    #根据API的要求,定义相对应的Content-Type
    request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
    request.add_header('Authorization', 'APPCODE ' + appcode)
    ctx = ssl.create_default_context()
    ctx.check_hostname = False
    ctx.verify_mode = ssl.CERT_NONE
    response = urllib.request.urlopen(request, context=ctx)

    content = response.read()
    if content:
        res = json.loads(content.decode('UTF-8'))
        code = res['result'][0]['words']
        return code
模拟登陆
import time

from selenium import webdriver

from .ocr import get_code

chrome_path = '/Users/apple/soft/chromedriver'

driver = webdriver.Chrome(executable_path=chrome_path)

driver.get('https://so.gushiwen.org/user/login.aspx?from=http://so.gushiwen.org/user/collect.aspx')

driver.find_element_by_id('email').send_keys('[email protected]')
time.sleep(1)
driver.find_element_by_id('pwd').send_keys('python123_')
time.sleep(1)
driver.find_element_by_id('imgCode').screenshot('./code.jpg')
time.sleep(1)
# 通过接口 获取 验证码信息
code = get_code()

# 填写验证码
driver.find_element_by_id('code').send_keys(code)
time.sleep(1)

# 点击登陆
driver.find_element_by_id('denglu').click()

你可能感兴趣的:(spider,Python)