ps:本来想上传pytesseract包的 实在是不知道怎么加附件....
在验证码识别时候因为每次请求登录界面时候验证码都会随机刷新,所以使用selenium库进行屏幕截图然后
识别输入,
上代码:
import pytesseract
from PIL import Image
from selenium import webdriver
import selenium
import time
def check_login(url, username, passwd, img_dir='D:\img'):
'''
:param url:initial login url
:param username: user login name
:param passwd: user login password
:param img_dir:image save tempdir
:return:jessionid
'''
i = 1
while i < 4: #有时候识别会失败,这里三次重试机会
driver = webdriver.Chrome()
driver.set_window_size(1200, 900)
time.sleep(1)
driver.get(url)
time.sleep(1)
#save image to img_dir
driver.get_screenshot_as_file(img_dir + '\\checkcode.png')#截图
#transfer checkcode
img = Image.open(img_dir + '\\checkcode.png')
box = (636, 403, 739, 437)
codeshot = img.crop(box)
codeshot.save(img_dir + '\\code.png')#剪切
checkcode = pytesseract.image_to_string(codeshot).replace(" ", "")#识别并去除空格
print('The checkcode is:', checkcode)
#input login info
driver.find_element_by_xpath('//*[@id="userName"]').send_keys(username)
driver.find_element_by_xpath('//*[@id="password"]').send_keys(passwd)
driver.find_element_by_xpath('//*[@id="randCode"]').send_keys(checkcode)
driver.find_element_by_xpath('//*[@id="but_login"]').click()
time.sleep(2)
try:#判断是否登录成功,成功则跳出循环,否则继续识别
driver.find_element_by_xpath('//*[@id="page-wrapper"]/div[2]/nav/div[1]/a')
break
except selenium.common.exceptions.NoSuchElementException:
print('failed to catch checkcode %s' %i)
driver.quit()
i = i + 1
cookieL = driver.get_cookies()#登陆后获取到认证信息
session = cookieL[1].setdefault('value')
print(session)
driver.quit()
return session
#cookie like this:JEECGINDEXSTYLE=hplus; JSESSIONID=B36E645AF1D0347B2B93FABB36145578; ZINDEXNUMBER=1990
if __name__ == '__main__':
check_login(r'http://xxxxxxxxx', 'admin', '123456')