python 除了使用selenium还可以使用pyppeteer,模拟登录淘宝

import random
import asyncio
from pyppeteer import launch
from retrying import retry


async def main(user, pw, url):
    browser = await launch({
                            "headless": False,
                            # 重新指定临时数据路径,解决windows系统 OSError: Unable to remove Temporary User Data报错问题
                            "userDataDir": r"F:\temporary",
                            # 有头的不要加这句话,容易导致浏览器进程无法结束
                            # "args": ['--no-sandbox'], 
                            'dumpio': True})  # "dumpio": True 解决浏览器卡住问题
    """浏览器启动的时候,自动使用cookies信息或者缓存填写了账号输入框,通过系新建上下文,可以解决多个浏览器数据共享的问题,暂时没想到其他的解决方案"""
    context = await browser.createIncognitoBrowserContext()
    page = await context.newPage()
    # page = await browser.newPage() # 启动个新的浏览器页面
    # 新定义的注入js函数每次导航或者加载新的页面时会自动执行js注入 比起page.evaluate()每打开一个页面都要单独注入一次好用
    await inject_js(page)
    await page.setUserAgent(
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) \
        Chrome/72.0.3626.109 Safari/537.36')
    # goto到指定网页并且等到网络空闲为止
    await page.goto(url, {"waitUntil": 'networkidle2'})
    await page.type("#TPL_username_1", user, {"delay": input_time_random()-50})  # 毫秒
    await page.type("#TPL_password_1", pw, {"delay": input_time_random()})
    await asyncio.sleep(random.random()+0.5)  # 毫秒
    # 判断是否有滑块
    slider = await page.Jeval("#nocaptcha", "node => node.style")
    if slider:
        print("slider appear")
        flag, page = await mouse_slide(page=page)
        fresh = ""
        # 处理刷新重新验证的情况
        try:
            fresh = await page.Jeval(".errloading", "node => node.textContent")
        except:
            pass
        if fresh:
            # 刷新滑块验证
            await page.hover("a[href='javascript:noCaptcha.reset(1)']")
            await page.mouse.down()
            await page.mouse.up()
            await asyncio.sleep(random.uniform(1, 2))
            try:
                # page.J相当于page.querySelector()
                await page.J(".nc-lang-cnt[data-nc-lang='_startTEXT']")
                print("refresh slider success: begin verify slide again...")
                # 二次滑块验证
                flag, page = await mouse_slide(page=page)
                if flag:
                    return await click_login(page)
                else:
                    print("second verify slider faild: quit")
                    return
            except:
                print("refresh slider failed: please check your code")
                return
        if flag:
            return await click_login(page)
        else:
            print("login failed: please check out your code")
            return
    else:
        print("No slider")
        return await click_login(page)

# 点击登陆
async def click_login(page):
    """ 
    	1 作为一个整体,因是异步,导航等待触发,去执行点击操作,点击操作完,触发导航结束
        2 只有导航结束才能进行登陆结果判断
        3 看到其他大神写的是先按回车键,不是很明白,模拟回车键相当于登陆了,页面跳转后执行点击就会报错,所以这儿直接把回车操作干掉,有点疑惑,还希望懂的老哥指点一下 
    """
    await asyncio.gather(page.waitForNavigation(), page.evaluate("document.getElementById('J_SubmitStatic').click()"))
    # 检测点击登陆后是否出现滑块验证
    try:
        slider = await page.Jeval("#nocaptcha", "node => node.style")
        if slider:
            print("after click login slider appear")
            flag, page = await mouse_slide(page)
            if flag:
                # 点击登陆出现滑块验证,密码框是被清空的,所以需要重新输入密码
                await page.type("#TPL_password_1", pwd, {"delay": input_time_random()})
                await asyncio.gather(page.waitForNavigation(),
                                     page.evaluate("document.getElementById('J_SubmitStatic').click()"))
                try:
                    await page.waitForSelector(".account-id", {"timeout": 15000})
                    print(page.url)
                    return await get_cookie(page)
                except:
                    print("timeout wait for element: .account-id")
                    return
    except:
        pass

    # 检测是否有账号密码错误
    try:
        global error 
        error = await page.Jeval("#J_Message .error", "node => node.textContent")
        print("error:", error)
        print("account_info:", username)
    except Exception as e:
        error = None
    finally:
        if error:
            print("确保账号安全重新输入")
        else:
            try:
                # 等待登陆成功页面某一元素的出现
                await page.waitForSelector(".account-id", {"timeout": 10000})
                print(page.url)
                return await get_cookie(page)
            except:
                print("timeout wait for element: .account-id")


#def retry_if_result_none(result):
#   return result is None

# 这儿写重试没实际作用,如果需要的话自己加上
#@retry(retry_on_result=retry_if_result_none)
async def mouse_slide(page=None, frame=None):
    try:
        if frame:
            await frame.hover("#nc_1_n1z")
        else:
            await page.hover("#nc_1_n1z")
            await page.mouse.down()
            await page.mouse.move(2000, 0, {"delay": random.randint(1000, 2000)})
            # await page.mouse.move(2000, 0, {"steps": random.randint(50, 100)})
            await page.mouse.up()
    except Exception as e:
        print("mouse error retry... or please check your code")
        return None
    else:
        await asyncio.sleep(random.uniform(1, 2))
        # 判断是否通过
        slider_result = ""
        try:
            slider_result = await page.Jeval(".nc-lang-cnt", "node => node.textContent")
        except Exception as e:
            pass
        if slider_result != "验证通过":
            print("verify fail error info:", slider_result)
            return None, page
        else:
            print("verify pass")
            return 1, page


async def get_cookie(page):
    cookies_list = await page.cookies()
    cookies = ""
    for cookie in cookies_list:
        str_cookie = "{0}={1};"
        str_cookie = str_cookie.format(cookie["name"], cookie["value"])
        cookies += str_cookie
    return cookies


def input_time_random():
    return random.randint(100, 151)


async def inject_js(page):
    js1 = "() =>{ Object.defineProperties(navigator,{ webdriver:{ get: () => undefined } }) }"
    js2 = "() =>{ window.navigator.chrome = { runtime: {},  }; }"
    js3 = "() =>{ Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] }); }"
    js4 = "() =>{ Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5,6], }); }"
    await page.evaluateOnNewDocument(js1)
    await page.evaluateOnNewDocument(js2)
    await page.evaluateOnNewDocument(js3)
    await page.evaluateOnNewDocument(js4)


if __name__ == '__main__':
    username = "xxxxxxxxxx"
    pwd = "xxxxxxxxxxx"
    url = "https://login.taobao.com/member/login.jhtml?style=mini&css_style=b2b&from=b2b&full_redirect=true&redirect_url=https://login.1688.com/member/jump.htm?target=https://login.1688.com/member/marketSigninJump.htm?Done=http://login.1688.com/member/taobaoSellerLoginDispatch.htm®=%20http://member.1688.com/member/join/enterprise_join.htm?lead=http://login.1688.com/member/taobaoSellerLoginDispatch.htm&leadUrl=http://login.1688.com/member/"
    loop = asyncio.get_event_loop()
    coroutine = main(username, pwd, url)
    task = asyncio.ensure_future(coroutine)
    loop.run_until_complete(task)
    if task.result():
        print(task.result())

注:  成功登录后会自动关闭

你可能感兴趣的:(Python)