携程滑块,点选文字识别
from selenium import webdriver
from selenium.webdriver import ActionChains
import time
import base64
from PIL import Image
from aip import AipOcr
"""
https://blog.csdn.net/weixin_41311624/article/details/86522119
利用Python3安装aip
https://www.jianshu.com/p/d59b47bc4812
利用Python3安装aip
https://blog.csdn.net/qq_38787214/article/details/87902291
Python3 super().__init__()测试及理解
https://www.cnblogs.com/duanwandao/p/9802795.html
https://blog.csdn.net/pythoncsdn111/article/details/96453839
https://blog.csdn.net/qq_42992919/article/details/98483845
https://blog.csdn.net/hhy1107786871/article/details/88342976
"""
# 破解携程反爬验证
class unlockScrapy(object):
# super().__init__()的作用也就显而易见了,就是执行父类的构造函数,使得我们能够调用父类的属性。
def __init__(self, driver):
super(unlockScrapy, self).__init__()
# selenium驱动
self.driver = driver
# self.WAPPID = '百度文字识别appid'
# self.WAPPKEY = '百度文字识别appkey'
# self.WSECRETKEY = '百度文字识别secretkey'
# 百度文字识别sdk客户端
# self.WCLIENT = AipOcr(self.WAPPID, self.WAPPKEY, self.WSECRETKEY)
self.WAPPID = '17062614'
self.WAPPKEY = 'E15mYUgfBRVV3ohVVZZVcCCc'
self.WSECRETKEY = 'ClxgLmf2U0DwgX9mSvZG7v4zInrrCT92'
# 百度文字识别sdk客户端
self.WCLIENT = AipOcr(self.WAPPID, self.WAPPKEY, self.WSECRETKEY)
print("5" * 10)
## 切换二维码登录,在切换回来,就会滑动出现
## 滑动出现后,输错一次密码,再登录,就会出现文字顺序验证码
# 破解滑动
## cpt - img - double - right - outer
def unlockScroll(self):
try:
# 滑块element
print("1" * 10)
scrollElement = self.driver.find_elements_by_class_name(
'cpt-img-double-right-outer')[0]
print("2" * 10)
ActionChains(self.driver).click_and_hold(
on_element=scrollElement).perform()
ActionChains(self.driver).move_to_element_with_offset(
to_element=scrollElement, xoffset=30, yoffset=10).perform()
ActionChains(self.driver).move_to_element_with_offset(
to_element=scrollElement, xoffset=100, yoffset=20).perform()
ActionChains(self.driver).move_to_element_with_offset(
to_element=scrollElement, xoffset=200, yoffset=50).perform()
print("滑块破解成功")
except:
print("无滑块")
# 下载上面的小图和下面的大图
def downloadImg(self):
# 小图的src
"""//*[@id="sliderddnormal-choose"]/div[2]/div[1]/img"""
# "/html/body/div[3]/div[1]/img"
time.sleep(1)
codeSrc = self.driver.find_element_by_xpath(
"//*[@id='sliderddnormal-choose']/div[2]/div[1]/img").get_attribute("src")
print(codeSrc)
print("6" * 10)
# 大图的src
# "/html/body/div[3]/div[3]/img"
checkSrc = self.driver.find_element_by_xpath(
"//*[@id='sliderddnormal-choose']/div[2]/div[3]/img").get_attribute("src")
print("7" * 10)
print(codeSrc.split(','))
"""
https://www.cnblogs.com/wswang/p/7717997.html
Python解码base64遇到Incorrect padding错误
"""
# 保存下载
# 由于其src是base64编码的,因此需要以base64编码形式写入,
# 由于标准的Base64编码后可能出现字符+和/,在URL中就不能直接作为参数,所以又有一种"url safe"的base64编码
# base64.urlsafe_b64decode(base64_url)
# fh.write(base64.b64decode(codeSrc.split(',')[1]))
fh = open("code.jpeg", "wb")
fh.write(base64.urlsafe_b64decode(codeSrc.split(',')[1]))
fh.close()
fh = open("checkCode.jpeg", "wb")
fh.write(base64.urlsafe_b64decode(checkSrc.split(',')[1]))
fh.close()
"""
https://www.cnblogs.com/kongzhagen/p/6295925.html
7. 点操作:
im.point(function) #,这个function接受一个参数,且对图片中的每一个点执行这个函数
比如:out=im.point(lambdai:i*1.5)#对每个点进行50%的加强
"""
# 图片二值化,便于识别其中的文字
def chageImgLight(self):
im = Image.open("code.jpeg")
im1 = im.point(lambda p: p * 4)
im1.save("code.jpeg")
im = Image.open("checkCode.jpeg")
im1 = im.point(lambda p: p * 4)
im1.save("checkCode.jpeg")
# 读取图片文件
def getFile(self, filePath):
with open(filePath, 'rb') as fp:
print("8读取图片" * 2)
return fp.read()
"""
# 请求参数
language_type : 识别语言类型,默认为CHN_ENG中英文混合;。可选值包括:
detect_direction :是否检测图像朝向,默认不检测, ture 是检测
# 返回参数
words_result
"""
# 识别上面小图中的文字
def iTow(self):
try:
print("开始识别小图...")
op = {'language_type': 'CHN_ENG', 'detect_direction': 'true'}
res = self.WCLIENT.basicAccurate(
self.getFile('code.jpeg'), options=op) # options 可选参数
words = ''
print("9" * 10)
# http://ai.baidu.com/docs#/OCR-Python-SDK/80d64770
print(res['words_result']) # api已经定好的 array 定位和识别结果数组
print("10" * 10)
for item in res['words_result']:
if item['words'].endswith('。'):
words = words + item['words'] + '\r\n'
else:
words = words + item['words']
print('小图中的文字: ' + words)
print("小图文字识别完成")
return words
except:
return 'error'
"""
# 请求参数
recognize_granularity:是否定位单字符位置,big:不定位单字符位置,默认值;small:定位单字符位置
item['chars'] :+chars array 单字符结果,recognize_granularity=small时存在
"""
# 识别下面大图中的文字及坐标
def getPos(self, words):
try:
print("开始识别大图...")
op = {'language_type': 'CHN_ENG', 'recognize_granularity': 'small'}
res = self.WCLIENT.accurate(
self.getFile('checkCode.jpeg'), options=op)
# 所有文字的位置信息
allPosInfo = []
# 需要的文字的位置信息
needPosInfo = []
print("#1" * 10)
# 每日50000次,超时报错{'error_code': 17, 'error_msg': 'Open api daily request limit reached'}
print(res)
print(res['words_result'])
print("#2" * 10)
print("11" * 10)
for item in res['words_result']:
allPosInfo.extend(item['chars'])
print(item['chars']) # 文字及位置信息,见百度api
print("12" * 10)
# 筛选出需要的文字的位置信息
for word in words:
for item in allPosInfo:
if word == item['char']:
needPosInfo.append(item)
time.sleep(1)
print('大图中的文字: ' + item['char'])
# 返回出现文字的位置信息
print(needPosInfo)
print("13" * 10)
print("大图识别完成...")
return needPosInfo
except Exception as e:
print(e)
"""
https://blog.csdn.net/huilan_same/article/details/52305176
ActionChains: 模拟鼠标操作比如单击、双击、点击鼠标右键、拖拽等等
selenium之 玩转鼠标键盘操作(ActionChains)
https://blog.csdn.net/ccggaag/article/details/75717186
web自动化测试第6步:模拟鼠标操作(ActionChains)
"""
# 点击大图上的文字
def clickWords(self, wordsPosInfo):
# 获取到大图的element
# /html/body/div[3]/div[3]/img
imgElement = self.driver.find_element_by_xpath(
'//*[@id="sliderddnormal-choose"]/div[2]/div[3]/img')
# 根据上图文字在下图中的顺序依次点击下图中的文字
for info in wordsPosInfo:
# move_to_element_with_offset(to_element, xoffset, yoffset) ——移动到距某个元素(左上角坐标)多少距离的位置
ActionChains(self.driver).move_to_element_with_offset(
to_element=imgElement, xoffset=info['location']['left'] + 20,
yoffset=info['location']['top'] + 20).click().perform()
time.sleep(1)
# 若出现点击图片,则破解
def pic_main(self):
try:
## 先下载图片
time.sleep(1)
self.downloadImg()
print("14-0" * 10)
## 图片二值化,方便识别
self.chageImgLight()
## 读取图片(调用百度ocr),识别小图文字
text = self.iTow()
## 读取图片(调用百度ocr)识别大图文字及位置信息
posInfo = self.getPos(text)
## 点击提交按钮 ,在点击之前确认一下,大图与小图数字是否完全相等,若不相等,则重新识别
print(type(text))
print(type(posInfo))
print(len(text))
print(len(posInfo))
print("14" * 10)
### 提交之前先判断一下,大小图字数是否一致,若不等,重新生成图片,重新识别
while len(text) != len(posInfo) or posInfo is None:
## 刷新图片
# /html/body/div[3]/div[4]/div/a
self.driver.find_elements_by_xpath(
'//*[@id="sliderddnormal-choose"]/div[2]/div[4]/div/a')[0].click()
time.sleep(2)
## 下载图片
self.downloadImg()
print("14-1" * 10)
## 图片二值化,方便识别
self.chageImgLight()
## 识别小图文字
text = self.iTow()
## 识别大图文字及位置信息
posInfo = self.getPos(text)
print('匹配成功,开始点击')
## 按顺序模拟点击
self.clickWords(posInfo)
## 点选文字后提交
self.driver.find_elements_by_xpath(
'//*[@id="sliderddnormal-choose"]/div[2]/div[4]/a')[0].click()
print("模拟点击完成,已提交...点选图片破解成功...")
except:
print("无点选文字点击图片")
# 破解滑动,点选文字图片
def unlock(name,pwd):
# 创建浏览器对象
driver = webdriver.Chrome()
# 打开Chrome浏览器,需要将Chrome的驱动放在当前文件夹,也可以房子啊google.exe同目录下,需设置到环境变量
# login_url = 'https://hotels.ctrip.com/hotel/6278770.html#ctm_ref=hod_hp_hot_dl_n_2_7'
# "https://passport.ctrip.com/user/login?BackUrl=https%3A%2F%2Fhotels.ctrip.com%2Fhotel%2F6278770.html%23ctm_ref%3Dhod_hp_hot_dl_n_2_7 "
# 登录页面
login_url = "https://passport.ctrip.com/user/login?BackUrl=https%3A%2F%2Fhotels.ctrip.com%2Fhotel%2F6278770.html%23ctm_ref%3Dhod_hp_hot_dl_n_2_7"
driver.get(login_url)
driver.maximize_window() # 全屏网页窗口
time.sleep(3)
# 切换账号密码登录表单
# js1 = 'document.querySelector("#j_loginTab1").style.display="none";'
# browser.execute_script(js1)
# time.sleep(1)
# js2 = 'document.querySelector("#j_loginTab2").style.display="block";'
# browser.execute_script(js2)
# driver.find_element_by_id('lbNormal').click()
# time.sleep(3)
# 输入账号密码
input_name = driver.find_element_by_id('nloginname')
input_name.clear()
input_name.send_keys(name)
input_pass = driver.find_element_by_id('npwd')
input_pass.clear()
input_pass.send_keys(pwd)
time.sleep(3)
# 此时可能出现有滑动验证码与点选文字
## 若出现滑块,则开始破解滑块
unlock = unlockScrapy(driver)
unlock.unlockScroll()
## 若出现点选文字,开始破解点选文字
unlock.pic_main()
# 点击登录
print("3" * 10)
"""
//*[@id="nsubmit"]
"""
# browser.find_element_by_xpath('//*[@class="form__button"]/button').click()
driver.find_element_by_xpath('//*[@class="form_btn form_btn--block"]').click()
time.sleep(19)
# 如果破解成功,html的title会变
if unlock.driver.title != '携程在手,说走就走':
print('破解成功')
else:
# 再次尝试
print('破解失败,再次破解')
unlock.pic_main()
# 再次点击登录
print("3" * 10)
"""
//*[@id="nsubmit"]
"""
# browser.find_element_by_xpath('//*[@class="form__button"]/button').click()
driver.find_element_by_xpath('//*[@class="form_btn form_btn--block"]').click()
time.sleep(19)
time.sleep(9)
if __name__ == '__main__':
name = str(input("请输入账号:"))
pwd = str(input('请输入密码:'))
for i in range(2):
unlock(name,pwd)
D:\ProgramData\Anaconda3\python.exe D:/py1805/badou/share_folder/datawhale_spider/datawhale05_xiecheng.py
请输入账号:66***
请输入密码:88***
5555555555
1111111111
2222222222
滑块破解成功

6666666666
7777777777
['data:image/jpg;base64', '/9j/4AAQSkZJRgABAgAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAAoAHgDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwDowlLsqYLS7a+j5j57lINlLsqfbS7aOcOUg2Unl1PdSaZp7QLqWt6ZYvMgkRLq5WNimSM4PuD+VUoPEvg+9mvYbfxPZJJaSiMtdSJEkvX5o23HcOOvvWLxVNOzZssNUaukTbKNlXCthHpcmqyanbNpqW73BuoCZkKK20429ec9PSrEdnA1guprJPNpjWX21LiKEjcmzeMBsckdjjrVfWqfclYap2Mvy6PLpukazoniJpYNHnu3uYrdbspcW/leZCxxuTk52nr9eOhq/FFERLJPN5UMMTzSPtLYVVLHgdeBTjiISi5J6ImVCcZKLWrKXl0vl1YheyvdNs9R0+6NzaXcZkjkMZQkBivQ89QaXZVRqqSuiJUnF2ZW8ujy6s7KazwxOqyyBWfO1R1bHPApVK8aceaT0HToSqS5YrUhEWBliFXuTwKKfC7P+9eOMDIAiLM3lnByCRxycc44orw6ucT5vcWh7VPKafL7+5LspdlSbaULXrcx5nKRhadtp+KXFHMHKZXjfU57RdJkTUPKtZJBZ2sVmLmKUysqlhI0dxEGGRxnpk+9cTBaXXho6vq0t9qaR3UnnzsHnTb16mO9DP16tuPvXb+JfDV14ltNAhtniWO21dZ7l3aPMcW0AsFkyG+hBB9KoeJ/Az6h4durfRJxc3LsImW4ttPtlRTn59ywBiOOikHmvJqQak7Hq05pxVzR8TyXt38Hb1H1OISrpwnHk2zZktjGu1JDIztuJIJYNkkfXPmU2oNotwyaktxrVqfD1kkUP2jyBbtJGgTAXhsA455I617Dc6XZ6t4ZtfDep6ncwW0MUCTGykIW42RbHjLY+4SAenavP0+E99b6dqFxC0Iv5bpZ4NHguQ0bRRtiLM7gnABLEZBIx0PRSg10HGafUk+CV/aXFyix6ZD5ljpwEt6JJPM8x5mUIRu242kkccc+tdDoutalq+ifEFNQvZbhbS+voLdXPEcaxSYUe1Z/gnwd4j8HeKFmOp6XPol1un1NolAYybciMKeSA/3SOME9M4rUtvC1zp48Qw2HiiCO21u5nnlSbSDI8fm5BCt5w6BsZxTjCVtEEpx6s58azqOgfs7adqGlXT2t2kcSrKgBIBuJAetegaoC+oysTycH9BXEa14H1qT4ajwvp/iLTbq2t2QJHcWX2V3G8t/rTKw4JPG2us1e+36pPb2qrIAAHkPQMFDHDDtjofXFONdUG3ImVD26SRFcSeRAXVQ5wcKO5xkAY78VGZPOMMJtyLh0zvZPkTkbst1x6euKq6fNFJdTB/P8wPtUHAjDRscFfc7vxHapo7qe4llZ5ESOQbVVSuVXPU8kY4rzK+Kq1n7z0PQo4anRXurUZJDL9lENpcSbonYmaRt5fnDFsfeP3hg+ue1FR2DTWGmOZZ/LgnZpAvyZyxOQAAeoornsbmpxS0UV9UfOWQmaWiiglDJpkt4jLIcKPzNcjrurajLZSpFMlqGOzMS7zjGTz24HtRRXDXrTUkkzvw9GDTbQ7w14rTUkS1vCI70AAEkYmPqPQ+1dUr+tFFdkJNrU5asVGWhKDmld9ig4zyOP6/h1oorOtOUYtoKUFJq5j3l7JcT+QEYfvQNvGUUZxJ0yQSKSK5SQG8tXVYQ8gkiAJErhdpUkDOVxjjriiivAqycpXZ7tOKiuVbCyXkNre2lksRWSc4hEcbBThcfNxxUf2i8fVLa1CRR2q8OxQqAcdB2JzkY9xRRWRYRZ2xymGZ1y8qKynO8gqQRu784A4ooooA//2Q==']
14-014-014-014-014-014-014-014-014-014-0
开始识别小图...
8读取图片8读取图片
9999999999
[{'words': '青城山'}]
10101010101010101010
小图中的文字: 青城山
小图文字识别完成
开始识别大图...
8读取图片8读取图片
#1#1#1#1#1#1#1#1#1#1
{'log_id': 2128196181742769400, 'words_result_num': 2, 'words_result': [{'chars': [{'char': '城', 'location': {'width': 18, 'top': 134, 'left': 62, 'height': 30}}], 'location': {'width': 28, 'top': 134, 'left': 62, 'height': 30}, 'words': '城'}, {'chars': [{'char': '“', 'location': {'width': 47, 'top': 72, 'left': 138, 'height': 42}}], 'location': {'width': 152, 'top': 0, 'left': 82, 'height': 184}, 'words': '“'}]}
[{'chars': [{'char': '城', 'location': {'width': 18, 'top': 134, 'left': 62, 'height': 30}}], 'location': {'width': 28, 'top': 134, 'left': 62, 'height': 30}, 'words': '城'}, {'chars': [{'char': '“', 'location': {'width': 47, 'top': 72, 'left': 138, 'height': 42}}], 'location': {'width': 152, 'top': 0, 'left': 82, 'height': 184}, 'words': '“'}]
#2#2#2#2#2#2#2#2#2#2
11111111111111111111
[{'char': '城', 'location': {'width': 18, 'top': 134, 'left': 62, 'height': 30}}]
12121212121212121212
[{'char': '“', 'location': {'width': 47, 'top': 72, 'left': 138, 'height': 42}}]
12121212121212121212
大图中的文字: 城
[{'char': '城', 'location': {'width': 18, 'top': 134, 'left': 62, 'height': 30}}]
13131313131313131313
大图识别完成...
<class 'str'>
<class 'list'>
3
1
14141414141414141414

6666666666
7777777777
['data:image/jpg;base64', '/9j/4AAQSkZJRgABAgAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAAoAHgDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwDU2UmyrWyk2V7tz53lKuyk2Va2Umyi4cpW2Umytex003Fw0colXajSeWiZeTAztQHA3HtkiqFldWOu6RFrGkrKtq7mKWCXmS2lXqjfzHsah1YqXL1LVCThz9Ctso2VqNZQW2mfb725kgi80RBUtZZnJIJHyopODg89OKr6VeaJrWn3FzZTXiW6D93qN9EttayOGAMasxyW69cdKmWIpxdmyo4WpJcyWhS2UFK2LHShfzAW93Z3SIy+aLS6jlZVLAZwpPrWXeeI/Ctho+tX0kOptLpt7JZCAsoMzRsiswYKQo+cH5sUpYmnHqOGEqS6EWym7Ktt4g8Iya1FpkTXyrNpv21btgzIjEkBCoj3HkdRxU99YtYX01q7BmibaSOhqqdeFT4SauGnT1kZvl0myrWymlK15jDlK2yirGyii4uU1NlJsqxspNtY3OrlK+yhVwwPoanKUqhQ6lgSueQDjIouFir4q1i10mwn1yfV/FCfarxIreytr9R5jMfmWNdhwFXJx64GeajmtNL8N+J7DQ/7U8UWC6wjzpdG8jjje5J5jYhD85J5J6Er1zxYudFtZ/Fsfia5vvtaWkYTStNMW1bNv4nbkhjnkHuT/sim6ppFr4n0W60XUp/KSc+bDdEbjbzj7rj68g+oNee6LackreR6arpNRbv5mjrOtSaH4LlnXW9bs30xHUyy+RNLdSsfkjYyIcnPHAGB9K8GtNembwHBoN5dX8Km8S4t7do4zBcDzSHOdgcMp7FiDz06V7+0eialNoqaylxeHSHWSG5mbHmyhdpkkReCeARwTke5rnrbwjo8fw8t/C+o3z3TLdNcNcWcW0gb9wX5x0P9KmVKV9IlRrQtrIt6Zbxx/GXxzbQwMsclhaRFLZjEwDqikqV5UjJORXn1rfwaXaeK9Gs7b7fqd7rF7bxR3+or9njjUp+9kSVwrPnGGYZJHU4wfSLHS/DOlatrN7YWt7Cmq2620sCuqJGgGCUx8wbvnJ5rI1Lwho2oeGDpWj2+m6PIl5DMJZUaR3Vcltz4LMeR14pOjO17DVem3a5g+C7e88H6yYtL0yDV7a5gEIZdQs/t8LkfP5JSRiUz823t+BJ7PV4ok1GRYrmW4IPzySdd3fnPP1plxof2jxnoGuNrlg0OltPvjWyNu7b12gjbkH15IxTNldOFjKLdzlxc4yStqVilJsqzspNntXZc4bFbZRVjZRRcXKamyk2e1FFYnQJspNlFFMQmyk2UUUwEKe1JsoooEJspClFFMLCbKTZRRSJE2Um2iigBNtFFFMD/2Q==']
14-114-114-114-114-114-114-114-114-114-1
开始识别小图...
8读取图片8读取图片
9999999999
[{'words': '六和塔'}]
10101010101010101010
小图中的文字: 六和塔
小图文字识别完成
开始识别大图...
8读取图片8读取图片
#1#1#1#1#1#1#1#1#1#1
{'log_id': 3303545943648964856, 'words_result_num': 4, 'words_result': [{'chars': [{'char': '水', 'location': {'width': 28, 'top': 14, 'left': 134, 'height': 17}}, {'char': '六', 'location': {'width': 28, 'top': 53, 'left': 134, 'height': 17}}], 'location': {'width': 28, 'top': 14, 'left': 134, 'height': 69}, 'words': '水六'}, {'chars': [{'char': '手', 'location': {'width': 21, 'top': 18, 'left': 199, 'height': 37}}], 'location': {'width': 32, 'top': 18, 'left': 188, 'height': 37}, 'words': '手'}, {'chars': [{'char': '塔', 'location': {'width': 38, 'top': 69, 'left': 17, 'height': 20}}, {'char': '和', 'location': {'width': 38, 'top': 115, 'left': 17, 'height': 20}}], 'location': {'width': 38, 'top': 69, 'left': 17, 'height': 82}, 'words': '塔和'}, {'chars': [{'char': '追', 'location': {'width': 20, 'top': 114, 'left': 239, 'height': 33}}], 'location': {'width': 29, 'top': 114, 'left': 239, 'height': 33}, 'words': '追'}]}
[{'chars': [{'char': '水', 'location': {'width': 28, 'top': 14, 'left': 134, 'height': 17}}, {'char': '六', 'location': {'width': 28, 'top': 53, 'left': 134, 'height': 17}}], 'location': {'width': 28, 'top': 14, 'left': 134, 'height': 69}, 'words': '水六'}, {'chars': [{'char': '手', 'location': {'width': 21, 'top': 18, 'left': 199, 'height': 37}}], 'location': {'width': 32, 'top': 18, 'left': 188, 'height': 37}, 'words': '手'}, {'chars': [{'char': '塔', 'location': {'width': 38, 'top': 69, 'left': 17, 'height': 20}}, {'char': '和', 'location': {'width': 38, 'top': 115, 'left': 17, 'height': 20}}], 'location': {'width': 38, 'top': 69, 'left': 17, 'height': 82}, 'words': '塔和'}, {'chars': [{'char': '追', 'location': {'width': 20, 'top': 114, 'left': 239, 'height': 33}}], 'location': {'width': 29, 'top': 114, 'left': 239, 'height': 33}, 'words': '追'}]
#2#2#2#2#2#2#2#2#2#2
11111111111111111111
[{'char': '水', 'location': {'width': 28, 'top': 14, 'left': 134, 'height': 17}}, {'char': '六', 'location': {'width': 28, 'top': 53, 'left': 134, 'height': 17}}]
12121212121212121212
[{'char': '手', 'location': {'width': 21, 'top': 18, 'left': 199, 'height': 37}}]
12121212121212121212
[{'char': '塔', 'location': {'width': 38, 'top': 69, 'left': 17, 'height': 20}}, {'char': '和', 'location': {'width': 38, 'top': 115, 'left': 17, 'height': 20}}]
12121212121212121212
[{'char': '追', 'location': {'width': 20, 'top': 114, 'left': 239, 'height': 33}}]
12121212121212121212
大图中的文字: 六
大图中的文字: 和
大图中的文字: 塔
[{'char': '六', 'location': {'width': 28, 'top': 53, 'left': 134, 'height': 17}}, {'char': '和', 'location': {'width': 38, 'top': 115, 'left': 17, 'height': 20}}, {'char': '塔', 'location': {'width': 38, 'top': 69, 'left': 17, 'height': 20}}]
13131313131313131313
大图识别完成...
匹配成功,开始点击
模拟点击完成,已提交...点选图片破解成功...
3333333333
# encoding:utf-8
import random
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from PIL import Image
import time
"""
https://www.cnblogs.com/xumBlog/p/10992721.html
Selenium常见元素定位方法和操作的学习介绍
https://zhidao.baidu.com/question/1952564842626224588.html
python+selenium使用location定位元素坐标偏差
https://www.cppentry.com/bencandy.php?fid=77&id=195925
selenium登录 京东滑动验证码(一)
"""
# 获得大,小缺口图片
def get_image(driver, n, canvas):
# canvas = driver.find_element_by_xpath('/html/body/div[3]/div[2]/div[2]/div[1]/div[1]/div/a/div[1]/div/canvas[2]')
# /html/body/div[2]/div[2]/div[6]/div/div[1]/div[1]/div/a/div[1]/div/canvas[2]
# /html/body/div[2]/div[2]/div[6]/div/div[1]/div[1]/div/a/div[1]/canvas
#
# 获取缺口图片元素
print(canvas)
print("2" * 10)
# 缺口图片元素的左上角位置信息,location定位元素坐标
# #图片坐标。(注:图片坐标为该标签相对整个页面的坐标位置)即截图的起始点
left = canvas.location['x']
top = canvas.location['y']
print(left)
print (top)
print(type(canvas))
print("3" * 10)
# 缺口图片,x,y 的右上角位置信息
# canvas.size 标签图片的大小
# 计算出需要截图的长宽、高度等
elementWidth = canvas.location['x'] + canvas.size['width']
elementHeight = canvas.location['y'] + canvas.size['height']
# 保存缺口图片
driver.save_screenshot(n + '.png')
picture = Image.open(n + '.png')
# 截图crop(区域由一个4元组定义,表示为坐标是 (left, upper, right, lower)。
# Python Imaging Library 使用左上角为 (0, 0)的坐标系统。
# https://zhidao.baidu.com/question/1689969007353725628.html
# 因为自己的显示器分辨率较高,故放大了1.25倍,同理截图的元素也需要放大1.25倍
picture = picture.crop((left * 1.25, top * 1.25, elementWidth * 1.25, elementHeight * 1.25))
picture.save('photo' + n + '.png')
# 打开图片,发现图片截图位置不对,因为电脑系统显示缩放了,
return picture
"""
https://blog.csdn.net/tashanhongye/article/details/49869487
imagesize()函数获取图片信息
"""
# 比较两张图片的异同处,返回差异出x方向的坐标
def get_space(picture1, picture2):
print("# 比较两站图片的异同...")
start = 60
threhold = 60 # 临界值
for i in range(start, picture1.size[0]): # 索引 0 给出的是图像宽度的像素值
for j in range(picture1.size[1]): # 索引 1 给出的是图像高度的像素值
rgb1 = picture1.load()[i, j] # 加载二值化后的图片数据为rgb
rgb2 = picture2.load()[i, j]
res1 = abs(rgb1[0] - rgb2[0]) # abs取绝对值,连个像素值差异值
res2 = abs(rgb1[1] - rgb2[1])
res3 = abs(rgb1[2] - rgb2[2])
# 若差异值大于临界值,就是缺口值,rentun i ,,只返回i 即可,因为只在x 方向滑动
if not (res1 < threhold and res2 < threhold and res3 < threhold):
return i
# 小滑块实际未到边,故减去6,才是实际距离,6为用可以显示图片的软件测量得到(beyound compare)
return i - 6
# 模拟人工滑动
def get_tracks(space):
print("#3" * 10)
print(space)
print("#4" * 10)
# 模拟人工滑动,避免被识别为机器 # 匀加速运动公式s = Vo X t + 1/2 X a X t^2
# space += 7 # 先滑过一点,最后再反着滑动回来
v = 0
t = 0.2
current = 0
s = 0
forward_tracks = [] # 前进的轨道
mid = space * 4 / 5 # 前3/5加速,后面减速
while abs(current - space) > 1:
if current < mid: # 加速
a = 2
else:
a = -3 # 减速
v = v + a * t # 当前实时速度
s = v * t + 0.5 * a * (t ** 2) # 此时实时路径
current += s # 当前滑动的距离
forward_tracks.append(s)
print("#8" * 10)
print(current)
print(s)
print("#9" * 10)
# 每次滑动的距离添加到forward_tracks列表中,小数太多,可四舍五入
# [0.04000000000000001, 0.12000000000000002, 0.20000000000000004, 0.28, 0.3600000000000001, 0.44000000000000006
# 经分析可知保留三位小数即可
# forward_tracks.append(round(s, 4)) # 每次滑动的距离添加到forward_tracks列表中,并四舍五入
# 反着滑动到准确位置,分10次回退多余的20 个距离
# 共回退24,因为实际运行时未完全回退回去,估计是上面四舍五入,导致前进时超出20
# 前9个数加起来为20,后面一个数考虑到上面四舍五入,根据各个电脑情况,可稍微修正
# back_tracks = [-3, -3, -2, -2, -2, -2, -2, -1, -3]
# back_tracks = [-3, -3, -2, -2, -2, -2, -2, -1, -3, -0.000006]
forward_tracks = [space/3, space/3, space/3]
back_tracks = [-3, -3, -1]
return {'forward_tracks': forward_tracks, 'back_tracks': back_tracks}
def main(name,pwd):
# 创建浏览器对象
driver = webdriver.Chrome()
login_url = 'https://passport.bilibili.com/login'
driver.get(login_url)
driver.maximize_window() # 打开网页窗口
time.sleep(1)
# 输入账号密码
input_name = driver.find_element_by_id('login-username')
input_name.clear()
input_name.send_keys(name)
input_pass = driver.find_element_by_id('login-passwd')
input_pass.clear()
input_pass.send_keys(pwd)
# 点击登录
# print("1"* 10)
time.sleep(1)
driver.find_element_by_xpath('//*[@id="geetest-wrap"]/ul/li[5]/a[1]').click()
# 移动滑块,闭合图形
slider_button(driver)
# 最后判断下是否成功,若成功:
# 如果破解成功,html的title会变
# 消息
while driver.title != '消息':
print('破解失败,再次破解')
# 刷新图片
# //*[@id="sliderddnormal-choose"]/div[2]/div[4]/div/a # /html/body/div[2]/div[2]/div[6]/div/div[2]/div/a[2]
# 刷新验证
# href="javascript:;是假链接,点击后没反应,待解决
driver.find_elements_by_xpath(
'//*[@class="geetest_refresh_1"]')[0].click()
print("刷新图片")
time.sleep(2)
# 移动滑块,闭合图形
slider_button(driver)
print("3" * 10)
print('破解成功')
time.sleep(19)
# 释放浏览器
ActionChains(driver).release().perform()
time.sleep(9)
# 点击滑块使图形闭合
def slider_button(driver):
"""docstring for 点击滑块使缺口闭合验证码"""
time.sleep(1)
# 1、出现滑块验证,获取 有缺口的图片
# driver.find_element_by_xpath('//*[@id="app"]/section/div/ul/li[2]/h2').click()
#
# geetest_canvas_slice geetest_absolute
# document.querySelector("h2, h3").style.backgroundColor = "red";
# driver.execute_script('document.querySelectorAll(".geetest_canvas_slice geetest_absolute")[1].style="opacity: 0; display: block;')
## js 使小图隐藏,以便截图
driver.execute_script('document.querySelectorAll("canvas")[2].style="opacity: 0; display: block;"')
# driver.execute_script('document.querySelectorAll("canvas")[3].style=""')
# /html/body/div[2]/div[2]/div[6]/div/div[1]/div[1]/div/a/div[1]/div/canvas[1]
#
## 大图且缺口, 截图
canvas_1 = driver.find_element_by_xpath('//canvas[@class="geetest_canvas_bg geetest_absolute"]')
picture1 = get_image(driver, '1',canvas_1)
print("4" * 10)
print(picture1)
print("5" * 10)
# 2、执行js改变css样式,显示背景图!!!!!重点是这一步!
#
# driver.execute_script('document.querySelectorAll("canvas")[2].style=""')
# driver.execute_script('document.querySelectorAll("canvas")[4].style="display: bolck; opacity: 1"')
## 大图且完图,css让其显示
driver.execute_script('document.querySelectorAll("canvas")[3].style=""')
print("6" * 10)
time.sleep(1)
# 3、大图且没有缺口 截图
canvas_2 = driver.find_element_by_xpath('//canvas[@class="geetest_canvas_fullbg geetest_fade geetest_absolute"]')
picture2 = get_image(driver, '2',canvas_2)
# 恢复小图css 为显示状态
driver.execute_script('document.querySelectorAll("canvas")[2].style="opacity: 1; display: block;"')
# 4、对比两种图片的像素点,找出位移
space = get_space(picture1, picture2)
print(space)
print("#7" * 10)
# 5. 模拟人工滑动,调用get_tracks()返回 {'forward_tracks': forward_tracks, 'back_tracks': back_tracks}
tracks = get_tracks(space)
# 6.点击
button = driver.find_element_by_class_name('geetest_slider_button')
ActionChains(driver).click_and_hold(button).perform()
# 前进轨迹
print("#1" * 10)
print(tracks['forward_tracks'])
print(tracks['back_tracks'])
print("#2" * 10)
for track in tracks['forward_tracks']:
ActionChains(driver).move_by_offset(xoffset=track, yoffset=0).perform()
time.sleep(0.5)
# 上面for 循环后,滑块,应该缺口右边多出20个距离处
# 回退轨迹,只要回退20个距离就好
for back_track in tracks['back_tracks']:
# 实际回退时,总时查了一点,估计分辨率的问题,现乘以1.25 后ok
ActionChains(driver).move_by_offset(xoffset=back_track * 1.25, yoffset=0).perform()
# 来回震荡下 随机数r_n
# r_n = random.randint(1,3)
# ActionChains(driver).move_by_offset(xoffset=-r_n, yoffset=0).perform()
# ActionChains(driver).move_by_offset(xoffset=r_n, yoffset=0).perform()
time.sleep(1)
# driver.close()
# driver.quit()
if __name__ == '__main__':
name = str(input('请输入账号:'))
pwd = str(input('请输入密码:'))
main(name, pwd )
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.JavascriptException: Message: javascript error: Cannot set property 'style' of undefined
(Session info: chrome=76.0.3809.100)
不知道为何,滑块移动的距离总是超出缺口,
https://blog.csdn.net/chenxiao17301/article/details/82911155
https://blog.csdn.net/u012914436/article/details/100031810
https://blog.csdn.net/qq_38612964/article/details/79756492
Selenium + PhantomJS + python图片全屏截取+定位坐标+抠图+图片识别
https://blog.csdn.net/qq_38949193/article/details/81395787
https://blog.csdn.net/qq_31032181/article/details/78799433
https://blog.csdn.net/qq_43614355/article/details/90372449