携程滑块,点选文字识别

文章目录

    • code_datawhale05_xiecheng.py
    • reslut
    • code_datawhale06_bzhan_login.py
    • result
    • END

code_datawhale05_xiecheng.py

携程滑块,点选文字识别

from selenium import webdriver
from selenium.webdriver import ActionChains
import time
import base64
from PIL import Image
from aip import AipOcr

"""
https://blog.csdn.net/weixin_41311624/article/details/86522119
利用Python3安装aip
https://www.jianshu.com/p/d59b47bc4812
利用Python3安装aip

https://blog.csdn.net/qq_38787214/article/details/87902291
Python3 super().__init__()测试及理解

https://www.cnblogs.com/duanwandao/p/9802795.html

https://blog.csdn.net/pythoncsdn111/article/details/96453839
https://blog.csdn.net/qq_42992919/article/details/98483845

https://blog.csdn.net/hhy1107786871/article/details/88342976


"""


# 破解携程反爬验证
class unlockScrapy(object):
    # super().__init__()的作用也就显而易见了,就是执行父类的构造函数,使得我们能够调用父类的属性。

    def __init__(self, driver):
        super(unlockScrapy, self).__init__()
        # selenium驱动
        self.driver = driver
        # self.WAPPID = '百度文字识别appid'
        # self.WAPPKEY = '百度文字识别appkey'
        # self.WSECRETKEY = '百度文字识别secretkey'
        # 百度文字识别sdk客户端
        # self.WCLIENT = AipOcr(self.WAPPID, self.WAPPKEY, self.WSECRETKEY)
        self.WAPPID = '17062614'
        self.WAPPKEY = 'E15mYUgfBRVV3ohVVZZVcCCc'
        self.WSECRETKEY = 'ClxgLmf2U0DwgX9mSvZG7v4zInrrCT92'
        # 百度文字识别sdk客户端
        self.WCLIENT = AipOcr(self.WAPPID, self.WAPPKEY, self.WSECRETKEY)
        print("5" * 10)




    ## 切换二维码登录,在切换回来,就会滑动出现
    ## 滑动出现后,输错一次密码,再登录,就会出现文字顺序验证码

    # 破解滑动
    ##  cpt - img - double - right - outer
    def unlockScroll(self):
        try:
            # 滑块element
            print("1" * 10)
            scrollElement = self.driver.find_elements_by_class_name(
                'cpt-img-double-right-outer')[0]
            print("2" * 10)
            ActionChains(self.driver).click_and_hold(
                on_element=scrollElement).perform()
            ActionChains(self.driver).move_to_element_with_offset(
                to_element=scrollElement, xoffset=30, yoffset=10).perform()
            ActionChains(self.driver).move_to_element_with_offset(
                to_element=scrollElement, xoffset=100, yoffset=20).perform()
            ActionChains(self.driver).move_to_element_with_offset(
                to_element=scrollElement, xoffset=200, yoffset=50).perform()
            print("滑块破解成功")
        except:
            print("无滑块")

    # 下载上面的小图和下面的大图
    def downloadImg(self):

        # 小图的src
        """//*[@id="sliderddnormal-choose"]/div[2]/div[1]/img"""
        # "/html/body/div[3]/div[1]/img"
        time.sleep(1)
        codeSrc = self.driver.find_element_by_xpath(
            "//*[@id='sliderddnormal-choose']/div[2]/div[1]/img").get_attribute("src")
        print(codeSrc)
        print("6" * 10)
        # 大图的src
        # "/html/body/div[3]/div[3]/img"
        checkSrc = self.driver.find_element_by_xpath(
            "//*[@id='sliderddnormal-choose']/div[2]/div[3]/img").get_attribute("src")
        print("7" * 10)
        print(codeSrc.split(','))

        """
        https://www.cnblogs.com/wswang/p/7717997.html
        Python解码base64遇到Incorrect padding错误
        
        """
        # 保存下载

        # 由于其src是base64编码的,因此需要以base64编码形式写入,
        # 由于标准的Base64编码后可能出现字符+和/,在URL中就不能直接作为参数,所以又有一种"url safe"的base64编码
        # base64.urlsafe_b64decode(base64_url)
        # fh.write(base64.b64decode(codeSrc.split(',')[1]))
        fh = open("code.jpeg", "wb")
        fh.write(base64.urlsafe_b64decode(codeSrc.split(',')[1]))
        fh.close()

        fh = open("checkCode.jpeg", "wb")
        fh.write(base64.urlsafe_b64decode(checkSrc.split(',')[1]))
        fh.close()



    """
    https://www.cnblogs.com/kongzhagen/p/6295925.html
    7. 点操作:
    im.point(function) #,这个function接受一个参数,且对图片中的每一个点执行这个函数
    比如:out=im.point(lambdai:i*1.5)#对每个点进行50%的加强
    """
    # 图片二值化,便于识别其中的文字
    def chageImgLight(self):
        im = Image.open("code.jpeg")
        im1 = im.point(lambda p: p * 4)
        im1.save("code.jpeg")
        im = Image.open("checkCode.jpeg")
        im1 = im.point(lambda p: p * 4)
        im1.save("checkCode.jpeg")

    # 读取图片文件
    def getFile(self, filePath):
        with open(filePath, 'rb') as fp:
            print("8读取图片" * 2)
            return fp.read()


    """
    # 请求参数
    language_type : 	识别语言类型,默认为CHN_ENG中英文混合;。可选值包括:
    detect_direction :是否检测图像朝向,默认不检测, ture 是检测
    # 返回参数
    words_result
    """
    # 识别上面小图中的文字
    def iTow(self):
        try:
            print("开始识别小图...")
            op = {'language_type': 'CHN_ENG', 'detect_direction': 'true'}
            res = self.WCLIENT.basicAccurate(
                self.getFile('code.jpeg'), options=op)  # options 可选参数
            words = ''
            print("9" * 10)
            # http://ai.baidu.com/docs#/OCR-Python-SDK/80d64770
            print(res['words_result'])  # api已经定好的  array	定位和识别结果数组
            print("10" * 10)
            for item in res['words_result']:
                if item['words'].endswith('。'):
                    words = words + item['words'] + '\r\n'
                else:
                    words = words + item['words']
            print('小图中的文字: ' + words)
            print("小图文字识别完成")
            return words
        except:
            return 'error'


    """
    # 请求参数
    recognize_granularity:是否定位单字符位置,big:不定位单字符位置,默认值;small:定位单字符位置
    item['chars'] :+chars	array	单字符结果,recognize_granularity=small时存在
    """
    # 识别下面大图中的文字及坐标
    def getPos(self, words):

        try:
            print("开始识别大图...")
            op = {'language_type': 'CHN_ENG', 'recognize_granularity': 'small'}

            res = self.WCLIENT.accurate(
                self.getFile('checkCode.jpeg'), options=op)

            # 所有文字的位置信息
            allPosInfo = []
            # 需要的文字的位置信息
            needPosInfo = []
            print("#1" * 10)
            # 每日50000次,超时报错{'error_code': 17, 'error_msg': 'Open api daily request limit reached'}
            print(res)
            print(res['words_result'])
            print("#2" * 10)
            print("11" * 10)
            for item in res['words_result']:
                allPosInfo.extend(item['chars'])
                print(item['chars'])  # 文字及位置信息,见百度api

                print("12" * 10)
            # 筛选出需要的文字的位置信息
            for word in words:
                for item in allPosInfo:
                    if word == item['char']:
                        needPosInfo.append(item)
                        time.sleep(1)
                        print('大图中的文字: ' + item['char'])

            # 返回出现文字的位置信息

            print(needPosInfo)

            print("13" * 10)
            print("大图识别完成...")
            return needPosInfo
        except Exception as e:
            print(e)

    """
    https://blog.csdn.net/huilan_same/article/details/52305176
    ActionChains: 模拟鼠标操作比如单击、双击、点击鼠标右键、拖拽等等
    selenium之 玩转鼠标键盘操作(ActionChains)
    https://blog.csdn.net/ccggaag/article/details/75717186
    web自动化测试第6步:模拟鼠标操作(ActionChains)
    """

    # 点击大图上的文字
    def clickWords(self, wordsPosInfo):
        # 获取到大图的element
        #  /html/body/div[3]/div[3]/img
        imgElement = self.driver.find_element_by_xpath(
            '//*[@id="sliderddnormal-choose"]/div[2]/div[3]/img')
        # 根据上图文字在下图中的顺序依次点击下图中的文字
        for info in wordsPosInfo:
            # move_to_element_with_offset(to_element, xoffset, yoffset) ——移动到距某个元素(左上角坐标)多少距离的位置
            ActionChains(self.driver).move_to_element_with_offset(
                to_element=imgElement, xoffset=info['location']['left'] + 20,
                yoffset=info['location']['top'] + 20).click().perform()
            time.sleep(1)


    # 若出现点击图片,则破解
    def pic_main(self):
        try:
            ##  先下载图片
            time.sleep(1)
            self.downloadImg()
            print("14-0" * 10)
            ## 图片二值化,方便识别
            self.chageImgLight()

            ## 读取图片(调用百度ocr),识别小图文字
            text = self.iTow()
            ## 读取图片(调用百度ocr)识别大图文字及位置信息
            posInfo = self.getPos(text)

            ## 点击提交按钮 ,在点击之前确认一下,大图与小图数字是否完全相等,若不相等,则重新识别
            print(type(text))
            print(type(posInfo))
            print(len(text))
            print(len(posInfo))
            print("14" * 10)
            ### 提交之前先判断一下,大小图字数是否一致,若不等,重新生成图片,重新识别
            while len(text) != len(posInfo) or posInfo is None:
                ## 刷新图片
                # /html/body/div[3]/div[4]/div/a
                self.driver.find_elements_by_xpath(
                    '//*[@id="sliderddnormal-choose"]/div[2]/div[4]/div/a')[0].click()
                time.sleep(2)

                ## 下载图片
                self.downloadImg()
                print("14-1" * 10)
                ## 图片二值化,方便识别
                self.chageImgLight()

                ## 识别小图文字
                text = self.iTow()
                ## 识别大图文字及位置信息
                posInfo = self.getPos(text)

            print('匹配成功,开始点击')
            ##  按顺序模拟点击
            self.clickWords(posInfo)
            ## 点选文字后提交
            self.driver.find_elements_by_xpath(
                '//*[@id="sliderddnormal-choose"]/div[2]/div[4]/a')[0].click()

            print("模拟点击完成,已提交...点选图片破解成功...")
        except:
            print("无点选文字点击图片")







    # 破解滑动,点选文字图片
def unlock(name,pwd):
    # 创建浏览器对象
    driver = webdriver.Chrome()
    # 打开Chrome浏览器,需要将Chrome的驱动放在当前文件夹,也可以房子啊google.exe同目录下,需设置到环境变量
    # login_url = 'https://hotels.ctrip.com/hotel/6278770.html#ctm_ref=hod_hp_hot_dl_n_2_7'
    #             "https://passport.ctrip.com/user/login?BackUrl=https%3A%2F%2Fhotels.ctrip.com%2Fhotel%2F6278770.html%23ctm_ref%3Dhod_hp_hot_dl_n_2_7 "

    # 登录页面
    login_url = "https://passport.ctrip.com/user/login?BackUrl=https%3A%2F%2Fhotels.ctrip.com%2Fhotel%2F6278770.html%23ctm_ref%3Dhod_hp_hot_dl_n_2_7"
    driver.get(login_url)
    driver.maximize_window() # 全屏网页窗口
    time.sleep(3)
    # 切换账号密码登录表单
    # js1 = 'document.querySelector("#j_loginTab1").style.display="none";'
    # browser.execute_script(js1)
    # time.sleep(1)
    # js2 = 'document.querySelector("#j_loginTab2").style.display="block";'
    # browser.execute_script(js2)
    # driver.find_element_by_id('lbNormal').click()
    # time.sleep(3)


    #  输入账号密码
    input_name = driver.find_element_by_id('nloginname')
    input_name.clear()
    input_name.send_keys(name)
    input_pass = driver.find_element_by_id('npwd')
    input_pass.clear()
    input_pass.send_keys(pwd)
    time.sleep(3)

    # 此时可能出现有滑动验证码与点选文字
    ##  若出现滑块,则开始破解滑块
    unlock = unlockScrapy(driver)
    unlock.unlockScroll()

    ## 若出现点选文字,开始破解点选文字
    unlock.pic_main()

    # 点击登录
    print("3" * 10)
    """
    //*[@id="nsubmit"]
    """
    # browser.find_element_by_xpath('//*[@class="form__button"]/button').click()
    driver.find_element_by_xpath('//*[@class="form_btn form_btn--block"]').click()
    time.sleep(19)


# 如果破解成功,html的title会变
    if unlock.driver.title != '携程在手,说走就走':
        print('破解成功')
    else:
        # 再次尝试
        print('破解失败,再次破解')
        unlock.pic_main()
        # 再次点击登录
        print("3" * 10)
        """
        //*[@id="nsubmit"]
        """
        # browser.find_element_by_xpath('//*[@class="form__button"]/button').click()
        driver.find_element_by_xpath('//*[@class="form_btn form_btn--block"]').click()
        time.sleep(19)
    time.sleep(9)



if __name__ == '__main__':
    name = str(input("请输入账号:"))
    pwd = str(input('请输入密码:'))
    for i in range(2):
        unlock(name,pwd)

reslut

D:\ProgramData\Anaconda3\python.exe D:/py1805/badou/share_folder/datawhale_spider/datawhale05_xiecheng.py
请输入账号:66***
请输入密码:88***
5555555555
1111111111
2222222222
滑块破解成功

6666666666
7777777777
['data:image/jpg;base64', '/9j/4AAQSkZJRgABAgAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAAoAHgDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwDowlLsqYLS7a+j5j57lINlLsqfbS7aOcOUg2Unl1PdSaZp7QLqWt6ZYvMgkRLq5WNimSM4PuD+VUoPEvg+9mvYbfxPZJJaSiMtdSJEkvX5o23HcOOvvWLxVNOzZssNUaukTbKNlXCthHpcmqyanbNpqW73BuoCZkKK20429ec9PSrEdnA1guprJPNpjWX21LiKEjcmzeMBsckdjjrVfWqfclYap2Mvy6PLpukazoniJpYNHnu3uYrdbspcW/leZCxxuTk52nr9eOhq/FFERLJPN5UMMTzSPtLYVVLHgdeBTjiISi5J6ImVCcZKLWrKXl0vl1YheyvdNs9R0+6NzaXcZkjkMZQkBivQ89QaXZVRqqSuiJUnF2ZW8ujy6s7KazwxOqyyBWfO1R1bHPApVK8aceaT0HToSqS5YrUhEWBliFXuTwKKfC7P+9eOMDIAiLM3lnByCRxycc44orw6ucT5vcWh7VPKafL7+5LspdlSbaULXrcx5nKRhadtp+KXFHMHKZXjfU57RdJkTUPKtZJBZ2sVmLmKUysqlhI0dxEGGRxnpk+9cTBaXXho6vq0t9qaR3UnnzsHnTb16mO9DP16tuPvXb+JfDV14ltNAhtniWO21dZ7l3aPMcW0AsFkyG+hBB9KoeJ/Az6h4durfRJxc3LsImW4ttPtlRTn59ywBiOOikHmvJqQak7Hq05pxVzR8TyXt38Hb1H1OISrpwnHk2zZktjGu1JDIztuJIJYNkkfXPmU2oNotwyaktxrVqfD1kkUP2jyBbtJGgTAXhsA455I617Dc6XZ6t4ZtfDep6ncwW0MUCTGykIW42RbHjLY+4SAenavP0+E99b6dqFxC0Iv5bpZ4NHguQ0bRRtiLM7gnABLEZBIx0PRSg10HGafUk+CV/aXFyix6ZD5ljpwEt6JJPM8x5mUIRu242kkccc+tdDoutalq+ifEFNQvZbhbS+voLdXPEcaxSYUe1Z/gnwd4j8HeKFmOp6XPol1un1NolAYybciMKeSA/3SOME9M4rUtvC1zp48Qw2HiiCO21u5nnlSbSDI8fm5BCt5w6BsZxTjCVtEEpx6s58azqOgfs7adqGlXT2t2kcSrKgBIBuJAetegaoC+oysTycH9BXEa14H1qT4ajwvp/iLTbq2t2QJHcWX2V3G8t/rTKw4JPG2us1e+36pPb2qrIAAHkPQMFDHDDtjofXFONdUG3ImVD26SRFcSeRAXVQ5wcKO5xkAY78VGZPOMMJtyLh0zvZPkTkbst1x6euKq6fNFJdTB/P8wPtUHAjDRscFfc7vxHapo7qe4llZ5ESOQbVVSuVXPU8kY4rzK+Kq1n7z0PQo4anRXurUZJDL9lENpcSbonYmaRt5fnDFsfeP3hg+ue1FR2DTWGmOZZ/LgnZpAvyZyxOQAAeoornsbmpxS0UV9UfOWQmaWiiglDJpkt4jLIcKPzNcjrurajLZSpFMlqGOzMS7zjGTz24HtRRXDXrTUkkzvw9GDTbQ7w14rTUkS1vCI70AAEkYmPqPQ+1dUr+tFFdkJNrU5asVGWhKDmld9ig4zyOP6/h1oorOtOUYtoKUFJq5j3l7JcT+QEYfvQNvGUUZxJ0yQSKSK5SQG8tXVYQ8gkiAJErhdpUkDOVxjjriiivAqycpXZ7tOKiuVbCyXkNre2lksRWSc4hEcbBThcfNxxUf2i8fVLa1CRR2q8OxQqAcdB2JzkY9xRRWRYRZ2xymGZ1y8qKynO8gqQRu784A4ooooA//2Q==']
14-014-014-014-014-014-014-014-014-014-0
开始识别小图...
8读取图片8读取图片
9999999999
[{'words': '青城山'}]
10101010101010101010
小图中的文字: 青城山
小图文字识别完成
开始识别大图...
8读取图片8读取图片
#1#1#1#1#1#1#1#1#1#1
{'log_id': 2128196181742769400, 'words_result_num': 2, 'words_result': [{'chars': [{'char': '城', 'location': {'width': 18, 'top': 134, 'left': 62, 'height': 30}}], 'location': {'width': 28, 'top': 134, 'left': 62, 'height': 30}, 'words': '城'}, {'chars': [{'char': '“', 'location': {'width': 47, 'top': 72, 'left': 138, 'height': 42}}], 'location': {'width': 152, 'top': 0, 'left': 82, 'height': 184}, 'words': '“'}]}
[{'chars': [{'char': '城', 'location': {'width': 18, 'top': 134, 'left': 62, 'height': 30}}], 'location': {'width': 28, 'top': 134, 'left': 62, 'height': 30}, 'words': '城'}, {'chars': [{'char': '“', 'location': {'width': 47, 'top': 72, 'left': 138, 'height': 42}}], 'location': {'width': 152, 'top': 0, 'left': 82, 'height': 184}, 'words': '“'}]
#2#2#2#2#2#2#2#2#2#2
11111111111111111111
[{'char': '城', 'location': {'width': 18, 'top': 134, 'left': 62, 'height': 30}}]
12121212121212121212
[{'char': '“', 'location': {'width': 47, 'top': 72, 'left': 138, 'height': 42}}]
12121212121212121212
大图中的文字: 城
[{'char': '城', 'location': {'width': 18, 'top': 134, 'left': 62, 'height': 30}}]
13131313131313131313
大图识别完成...
<class 'str'>
<class 'list'>
3
1
14141414141414141414

6666666666
7777777777
['data:image/jpg;base64', '/9j/4AAQSkZJRgABAgAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAAoAHgDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwDU2UmyrWyk2V7tz53lKuyk2Va2Umyi4cpW2Umytex003Fw0colXajSeWiZeTAztQHA3HtkiqFldWOu6RFrGkrKtq7mKWCXmS2lXqjfzHsah1YqXL1LVCThz9Ctso2VqNZQW2mfb725kgi80RBUtZZnJIJHyopODg89OKr6VeaJrWn3FzZTXiW6D93qN9EttayOGAMasxyW69cdKmWIpxdmyo4WpJcyWhS2UFK2LHShfzAW93Z3SIy+aLS6jlZVLAZwpPrWXeeI/Ctho+tX0kOptLpt7JZCAsoMzRsiswYKQo+cH5sUpYmnHqOGEqS6EWym7Ktt4g8Iya1FpkTXyrNpv21btgzIjEkBCoj3HkdRxU99YtYX01q7BmibaSOhqqdeFT4SauGnT1kZvl0myrWymlK15jDlK2yirGyii4uU1NlJsqxspNtY3OrlK+yhVwwPoanKUqhQ6lgSueQDjIouFir4q1i10mwn1yfV/FCfarxIreytr9R5jMfmWNdhwFXJx64GeajmtNL8N+J7DQ/7U8UWC6wjzpdG8jjje5J5jYhD85J5J6Er1zxYudFtZ/Fsfia5vvtaWkYTStNMW1bNv4nbkhjnkHuT/sim6ppFr4n0W60XUp/KSc+bDdEbjbzj7rj68g+oNee6LackreR6arpNRbv5mjrOtSaH4LlnXW9bs30xHUyy+RNLdSsfkjYyIcnPHAGB9K8GtNembwHBoN5dX8Km8S4t7do4zBcDzSHOdgcMp7FiDz06V7+0eialNoqaylxeHSHWSG5mbHmyhdpkkReCeARwTke5rnrbwjo8fw8t/C+o3z3TLdNcNcWcW0gb9wX5x0P9KmVKV9IlRrQtrIt6Zbxx/GXxzbQwMsclhaRFLZjEwDqikqV5UjJORXn1rfwaXaeK9Gs7b7fqd7rF7bxR3+or9njjUp+9kSVwrPnGGYZJHU4wfSLHS/DOlatrN7YWt7Cmq2620sCuqJGgGCUx8wbvnJ5rI1Lwho2oeGDpWj2+m6PIl5DMJZUaR3Vcltz4LMeR14pOjO17DVem3a5g+C7e88H6yYtL0yDV7a5gEIZdQs/t8LkfP5JSRiUz823t+BJ7PV4ok1GRYrmW4IPzySdd3fnPP1plxof2jxnoGuNrlg0OltPvjWyNu7b12gjbkH15IxTNldOFjKLdzlxc4yStqVilJsqzspNntXZc4bFbZRVjZRRcXKamyk2e1FFYnQJspNlFFMQmyk2UUUwEKe1JsoooEJspClFFMLCbKTZRRSJE2Um2iigBNtFFFMD/2Q==']
14-114-114-114-114-114-114-114-114-114-1
开始识别小图...
8读取图片8读取图片
9999999999
[{'words': '六和塔'}]
10101010101010101010
小图中的文字: 六和塔
小图文字识别完成
开始识别大图...
8读取图片8读取图片
#1#1#1#1#1#1#1#1#1#1
{'log_id': 3303545943648964856, 'words_result_num': 4, 'words_result': [{'chars': [{'char': '水', 'location': {'width': 28, 'top': 14, 'left': 134, 'height': 17}}, {'char': '六', 'location': {'width': 28, 'top': 53, 'left': 134, 'height': 17}}], 'location': {'width': 28, 'top': 14, 'left': 134, 'height': 69}, 'words': '水六'}, {'chars': [{'char': '手', 'location': {'width': 21, 'top': 18, 'left': 199, 'height': 37}}], 'location': {'width': 32, 'top': 18, 'left': 188, 'height': 37}, 'words': '手'}, {'chars': [{'char': '塔', 'location': {'width': 38, 'top': 69, 'left': 17, 'height': 20}}, {'char': '和', 'location': {'width': 38, 'top': 115, 'left': 17, 'height': 20}}], 'location': {'width': 38, 'top': 69, 'left': 17, 'height': 82}, 'words': '塔和'}, {'chars': [{'char': '追', 'location': {'width': 20, 'top': 114, 'left': 239, 'height': 33}}], 'location': {'width': 29, 'top': 114, 'left': 239, 'height': 33}, 'words': '追'}]}
[{'chars': [{'char': '水', 'location': {'width': 28, 'top': 14, 'left': 134, 'height': 17}}, {'char': '六', 'location': {'width': 28, 'top': 53, 'left': 134, 'height': 17}}], 'location': {'width': 28, 'top': 14, 'left': 134, 'height': 69}, 'words': '水六'}, {'chars': [{'char': '手', 'location': {'width': 21, 'top': 18, 'left': 199, 'height': 37}}], 'location': {'width': 32, 'top': 18, 'left': 188, 'height': 37}, 'words': '手'}, {'chars': [{'char': '塔', 'location': {'width': 38, 'top': 69, 'left': 17, 'height': 20}}, {'char': '和', 'location': {'width': 38, 'top': 115, 'left': 17, 'height': 20}}], 'location': {'width': 38, 'top': 69, 'left': 17, 'height': 82}, 'words': '塔和'}, {'chars': [{'char': '追', 'location': {'width': 20, 'top': 114, 'left': 239, 'height': 33}}], 'location': {'width': 29, 'top': 114, 'left': 239, 'height': 33}, 'words': '追'}]
#2#2#2#2#2#2#2#2#2#2
11111111111111111111
[{'char': '水', 'location': {'width': 28, 'top': 14, 'left': 134, 'height': 17}}, {'char': '六', 'location': {'width': 28, 'top': 53, 'left': 134, 'height': 17}}]
12121212121212121212
[{'char': '手', 'location': {'width': 21, 'top': 18, 'left': 199, 'height': 37}}]
12121212121212121212
[{'char': '塔', 'location': {'width': 38, 'top': 69, 'left': 17, 'height': 20}}, {'char': '和', 'location': {'width': 38, 'top': 115, 'left': 17, 'height': 20}}]
12121212121212121212
[{'char': '追', 'location': {'width': 20, 'top': 114, 'left': 239, 'height': 33}}]
12121212121212121212
大图中的文字: 六
大图中的文字: 和
大图中的文字: 塔
[{'char': '六', 'location': {'width': 28, 'top': 53, 'left': 134, 'height': 17}}, {'char': '和', 'location': {'width': 38, 'top': 115, 'left': 17, 'height': 20}}, {'char': '塔', 'location': {'width': 38, 'top': 69, 'left': 17, 'height': 20}}]
13131313131313131313
大图识别完成...
匹配成功,开始点击
模拟点击完成,已提交...点选图片破解成功...
3333333333

code_datawhale06_bzhan_login.py

# encoding:utf-8
import random

from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from PIL import Image
import time



"""
https://www.cnblogs.com/xumBlog/p/10992721.html
Selenium常见元素定位方法和操作的学习介绍

https://zhidao.baidu.com/question/1952564842626224588.html
python+selenium使用location定位元素坐标偏差

https://www.cppentry.com/bencandy.php?fid=77&id=195925
selenium登录 京东滑动验证码(一)

"""

# 获得大,小缺口图片
def get_image(driver, n, canvas):
    # canvas = driver.find_element_by_xpath('/html/body/div[3]/div[2]/div[2]/div[1]/div[1]/div/a/div[1]/div/canvas[2]')
    #  /html/body/div[2]/div[2]/div[6]/div/div[1]/div[1]/div/a/div[1]/div/canvas[2]
    # /html/body/div[2]/div[2]/div[6]/div/div[1]/div[1]/div/a/div[1]/canvas
    # 
    # 获取缺口图片元素


    print(canvas)
    print("2" * 10)
    # 缺口图片元素的左上角位置信息,location定位元素坐标
    # #图片坐标。(注:图片坐标为该标签相对整个页面的坐标位置)即截图的起始点
    left = canvas.location['x']
    top = canvas.location['y']
    print(left)
    print (top)
    print(type(canvas))
    print("3" * 10)
    # 缺口图片,x,y 的右上角位置信息
    # canvas.size 标签图片的大小
    # 计算出需要截图的长宽、高度等
    elementWidth = canvas.location['x']  + canvas.size['width']
    elementHeight = canvas.location['y']  + canvas.size['height']
    # 保存缺口图片
    driver.save_screenshot(n + '.png')
    picture = Image.open(n + '.png')

    # 截图crop(区域由一个4元组定义,表示为坐标是 (left, upper, right, lower)。
    # Python Imaging Library 使用左上角为 (0, 0)的坐标系统。
    # https://zhidao.baidu.com/question/1689969007353725628.html
    # 因为自己的显示器分辨率较高,故放大了1.25倍,同理截图的元素也需要放大1.25倍
    picture = picture.crop((left * 1.25, top * 1.25, elementWidth * 1.25, elementHeight * 1.25))
    picture.save('photo' + n + '.png')
    # 打开图片,发现图片截图位置不对,因为电脑系统显示缩放了,
    return picture


"""
https://blog.csdn.net/tashanhongye/article/details/49869487
imagesize()函数获取图片信息
"""
# 比较两张图片的异同处,返回差异出x方向的坐标
def get_space(picture1, picture2):
    print("# 比较两站图片的异同...")
    start = 60
    threhold = 60 # 临界值

    for i in range(start, picture1.size[0]): # 索引 0 给出的是图像宽度的像素值
        for j in range(picture1.size[1]):    # 索引 1 给出的是图像高度的像素值
            rgb1 = picture1.load()[i, j]      # 加载二值化后的图片数据为rgb
            rgb2 = picture2.load()[i, j]
            res1 = abs(rgb1[0] - rgb2[0])     # abs取绝对值,连个像素值差异值
            res2 = abs(rgb1[1] - rgb2[1])
            res3 = abs(rgb1[2] - rgb2[2])
            # 若差异值大于临界值,就是缺口值,rentun i  ,,只返回i 即可,因为只在x 方向滑动
            if not (res1 < threhold and res2 < threhold and res3 < threhold):
                return i
    # 小滑块实际未到边,故减去6,才是实际距离,6为用可以显示图片的软件测量得到(beyound compare)
    return i - 6

# 模拟人工滑动
def get_tracks(space):
    print("#3" * 10)
    print(space)
    print("#4" * 10)
    # 模拟人工滑动,避免被识别为机器 # 匀加速运动公式s = Vo X t + 1/2 X a X t^2
    # space += 7  # 先滑过一点,最后再反着滑动回来
    v = 0
    t = 0.2
    current = 0
    s = 0
    forward_tracks = []  # 前进的轨道
    mid = space * 4 / 5  # 前3/5加速,后面减速
    while abs(current - space) > 1:
        if current < mid:  # 加速
            a = 2
        else:
            a = -3      # 减速



        v = v + a * t  # 当前实时速度
        s = v * t + 0.5 * a * (t ** 2) # 此时实时路径
        current += s   # 当前滑动的距离
        forward_tracks.append(s)
        print("#8" * 10)
        print(current)
        print(s)
        print("#9" * 10)
          # 每次滑动的距离添加到forward_tracks列表中,小数太多,可四舍五入
        # [0.04000000000000001, 0.12000000000000002, 0.20000000000000004, 0.28, 0.3600000000000001, 0.44000000000000006
        # 经分析可知保留三位小数即可
        # forward_tracks.append(round(s, 4))  # 每次滑动的距离添加到forward_tracks列表中,并四舍五入


    # 反着滑动到准确位置,分10次回退多余的20 个距离
    # 共回退24,因为实际运行时未完全回退回去,估计是上面四舍五入,导致前进时超出20
    # 前9个数加起来为20,后面一个数考虑到上面四舍五入,根据各个电脑情况,可稍微修正
    # back_tracks = [-3, -3, -2, -2, -2, -2, -2, -1, -3]
    # back_tracks = [-3, -3, -2, -2, -2, -2, -2, -1, -3, -0.000006]
    forward_tracks = [space/3, space/3, space/3]
    back_tracks = [-3, -3, -1]

    return {'forward_tracks': forward_tracks, 'back_tracks': back_tracks}


def main(name,pwd):
    # 创建浏览器对象
    driver = webdriver.Chrome()
    login_url = 'https://passport.bilibili.com/login'
    driver.get(login_url)
    driver.maximize_window()  # 打开网页窗口
    time.sleep(1)

    # 输入账号密码
    input_name = driver.find_element_by_id('login-username')
    input_name.clear()
    input_name.send_keys(name)
    input_pass = driver.find_element_by_id('login-passwd')
    input_pass.clear()
    input_pass.send_keys(pwd)

    # 点击登录
    # print("1"* 10)
    time.sleep(1)
    driver.find_element_by_xpath('//*[@id="geetest-wrap"]/ul/li[5]/a[1]').click()

    # 移动滑块,闭合图形
    slider_button(driver)

    # 最后判断下是否成功,若成功:
    # 如果破解成功,html的title会变
    #  消息

    while driver.title != '消息':
        print('破解失败,再次破解')
        # 刷新图片
        # //*[@id="sliderddnormal-choose"]/div[2]/div[4]/div/a  # /html/body/div[2]/div[2]/div[6]/div/div[2]/div/a[2]
        # 
刷新验证
# href="javascript:;是假链接,点击后没反应,待解决 driver.find_elements_by_xpath( '//*[@class="geetest_refresh_1"]')[0].click() print("刷新图片") time.sleep(2) # 移动滑块,闭合图形 slider_button(driver) print("3" * 10) print('破解成功') time.sleep(19) # 释放浏览器 ActionChains(driver).release().perform() time.sleep(9) # 点击滑块使图形闭合 def slider_button(driver): """docstring for 点击滑块使缺口闭合验证码""" time.sleep(1) # 1、出现滑块验证,获取 有缺口的图片 # driver.find_element_by_xpath('//*[@id="app"]/section/div/ul/li[2]/h2').click() # # geetest_canvas_slice geetest_absolute # document.querySelector("h2, h3").style.backgroundColor = "red"; # driver.execute_script('document.querySelectorAll(".geetest_canvas_slice geetest_absolute")[1].style="opacity: 0; display: block;') ## js 使小图隐藏,以便截图 driver.execute_script('document.querySelectorAll("canvas")[2].style="opacity: 0; display: block;"') # driver.execute_script('document.querySelectorAll("canvas")[3].style=""') # /html/body/div[2]/div[2]/div[6]/div/div[1]/div[1]/div/a/div[1]/div/canvas[1] # ## 大图且缺口, 截图 canvas_1 = driver.find_element_by_xpath('//canvas[@class="geetest_canvas_bg geetest_absolute"]') picture1 = get_image(driver, '1',canvas_1) print("4" * 10) print(picture1) print("5" * 10) # 2、执行js改变css样式,显示背景图!!!!!重点是这一步! # # driver.execute_script('document.querySelectorAll("canvas")[2].style=""') # driver.execute_script('document.querySelectorAll("canvas")[4].style="display: bolck; opacity: 1"') ## 大图且完图,css让其显示 driver.execute_script('document.querySelectorAll("canvas")[3].style=""') print("6" * 10) time.sleep(1) # 3、大图且没有缺口 截图 canvas_2 = driver.find_element_by_xpath('//canvas[@class="geetest_canvas_fullbg geetest_fade geetest_absolute"]') picture2 = get_image(driver, '2',canvas_2) # 恢复小图css 为显示状态 driver.execute_script('document.querySelectorAll("canvas")[2].style="opacity: 1; display: block;"') # 4、对比两种图片的像素点,找出位移 space = get_space(picture1, picture2) print(space) print("#7" * 10) # 5. 模拟人工滑动,调用get_tracks()返回 {'forward_tracks': forward_tracks, 'back_tracks': back_tracks} tracks = get_tracks(space) # 6.点击 button = driver.find_element_by_class_name('geetest_slider_button') ActionChains(driver).click_and_hold(button).perform() # 前进轨迹 print("#1" * 10) print(tracks['forward_tracks']) print(tracks['back_tracks']) print("#2" * 10) for track in tracks['forward_tracks']: ActionChains(driver).move_by_offset(xoffset=track, yoffset=0).perform() time.sleep(0.5) # 上面for 循环后,滑块,应该缺口右边多出20个距离处 # 回退轨迹,只要回退20个距离就好 for back_track in tracks['back_tracks']: # 实际回退时,总时查了一点,估计分辨率的问题,现乘以1.25 后ok ActionChains(driver).move_by_offset(xoffset=back_track * 1.25, yoffset=0).perform() # 来回震荡下 随机数r_n # r_n = random.randint(1,3) # ActionChains(driver).move_by_offset(xoffset=-r_n, yoffset=0).perform() # ActionChains(driver).move_by_offset(xoffset=r_n, yoffset=0).perform() time.sleep(1) # driver.close() # driver.quit() if __name__ == '__main__': name = str(input('请输入账号:')) pwd = str(input('请输入密码:')) main(name, pwd )

result

    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.JavascriptException: Message: javascript error: Cannot set property 'style' of undefined
  (Session info: chrome=76.0.3809.100)
  不知道为何,滑块移动的距离总是超出缺口,

END

https://blog.csdn.net/chenxiao17301/article/details/82911155

https://blog.csdn.net/u012914436/article/details/100031810

https://blog.csdn.net/qq_38612964/article/details/79756492

Selenium + PhantomJS + python图片全屏截取+定位坐标+抠图+图片识别

https://blog.csdn.net/qq_38949193/article/details/81395787

https://blog.csdn.net/qq_31032181/article/details/78799433

https://blog.csdn.net/qq_43614355/article/details/90372449

你可能感兴趣的:(22_爬虫)