我们可以借助插件来做
打开插件,找到自己需要的验证码
筛选有用的路径
把对应的视图函数也拿过来,注意还需要一个geetest.py的文件
具体实
urls
#滑动验证码url(r'^pc-geetest/register', pcgetcaptcha, name='pcgetcaptcha'), url(r'^pc-geetest/ajax_validate', pcajax_validate, name='pcajax_validate'),
views
fromapp01.geetestimportGeetestLibpc_geetest_id ="b46d1900d0a894591916ea94ea91bd2c"pc_geetest_key ="36fc3fe98530eea08dfc6ce76e3d24c4"mobile_geetest_id ="7c25da6fe21944cfe507d2f9876775a9"mobile_geetest_key ="f5883f4ee3bd4fa8caec67941de1b903"# 滑动验证码defpcgetcaptcha(request):user_id ='test'gt = GeetestLib(pc_geetest_id, pc_geetest_key) status = gt.pre_process(user_id) request.session[gt.GT_STATUS_SESSION_KEY] = status request.session["user_id"] = user_id response_str = gt.get_response_str()returnHttpResponse(response_str)# 滑动验证码defpcajax_validate(request):ifrequest.method =="POST":# 验证的验证码ret = {"flag":False,"error_msg":None} gt = GeetestLib(pc_geetest_id, pc_geetest_key) challenge = request.POST.get(gt.FN_CHALLENGE,'') validate = request.POST.get(gt.FN_VALIDATE,'') seccode = request.POST.get(gt.FN_SECCODE,'') status = request.session[gt.GT_STATUS_SESSION_KEY] user_id = request.session["user_id"] print("status",status)ifstatus: result = gt.success_validate(challenge, validate, seccode, user_id)else: result = gt.failback_validate(challenge, validate, seccode)ifresult:#如果验证验证码正确,就验证用户名是否正确username = request.POST.get("username") password = request.POST.get("password")# 验证用户名和密码user = auth.authenticate(username=username, password=password)ifuser:# 如果验证成功就让登录ret["flag"] =Trueauth.login(request, user)else: ret["error_msg"] ="用户名和密码错误"else: ret["error_msg"] ="验证码错误"returnHttpResponse(json.dumps(ret))else:returnrender(request,"login.html")views
login.html
Title滑动验证码的时候导入{% csrf_token %}用户名密码{# 文字部分#}验证码{# 图片部分#}{##}下次自动登录
登录
注册
{#滑动验证码#} var handlerPopup = function (captchaObj) {$("#submit").click(function () {
captchaObj.show();
});
//定时函数
$(".login").click(function () {
function foo() {
$(".error").html("")
}
// 成功的回调
captchaObj.onSuccess(function () {
var validate = captchaObj.getValidate();
$.ajax({
url: "/pc-geetest/ajax_validate", // 进行二次验证
type: "post",
dataType: "json",
headers: {"X-CSRFToken": $.cookie('csrftoken')},
data: {
username: $('#username').val(),
password: $('#password').val(),
geetest_challenge: validate.geetest_challenge,
geetest_validate: validate.geetest_validate,
geetest_seccode: validate.geetest_seccode
},
success: function (data) {
console.log(data);
if (data["flag"]) {
{# alert(location.search);#}
{# alert(location.search.slice(6));#}
{# 方式一#}
{# if (location.search.slice(6)) {#}
{# 如果用户没有登录点赞的时候,当用户后来又登录了,就直接让跳转到当前点赞的那个路径#}
{# location.href = location.search.slice(6)#}
{# }#}
{# else {#}
{# window.location.href = '/index/'#}
{# }#}
{# 方式二:#}
alert($.cookie("next_path"));
if ($.cookie("next_path")){
location.href = $.cookie("next_path")
}
else{
location.href = "/index/"
}
}
else {
$(".error").html(data["error_msg"]);
setTimeout(foo, 3000)
}
}
});
});
});
// 将验证码加到id为captcha的元素里
captchaObj.appendTo("#popup-captcha");
// 更多接口参考:http://www.geetest.com/install/sections/idx-client-sdk.html
};
// 验证开始需要向网站主后台获取id,challenge,success(是否启用failback)
$.ajax({
url: "/pc-geetest/register?t=" + (new Date()).getTime(), // 加随机数防止缓存
type: "get",
dataType: "json",
success: function (data) {
// 使用initGeetest接口
// 参数1:配置参数
// 参数2:回调,回调的第一个参数验证码对象,之后可以使用它做appendTo之类的事件
initGeetest({
gt: data.gt,
challenge: data.challenge,
product: "popup", // 产品形式,包括:float,embed,popup。注意只对PC版验证码有效
offline: !data.success // 表示用户后台检测极验服务器是否宕机,一般不需要关注
// 更多配置参数请参见:http://www.geetest.com/install/sections/idx-client-sdk.html#config
}, handlerPopup);
}
});login.html
爬虫
破解极验滑动验证码
一些网站会在正常运行的正常的账号密码认证之外加上一些验证
码,以此来明确地区分人行为,从一定程度上达到反爬的效果,对于简单的验证码tesserocr就可以搞定如下
图片.png
但一些网站加入了滑动验证码,
图片.png
对于这类验证,如果我们直接模拟表单请求,繁琐的认证参数与认证流程会特别的麻烦我们可以用selenium驱动浏览器来解决这个问题,大致分为
#1、输入账号、密码,然后点击登陆#2、点击按钮,弹出没有缺口的图#3、针对没有缺口的图片进行截图#4、点击滑动按钮,弹出有缺口的图#5、针对有缺口的图片进行截图#6、对比两张图片,找出缺口,即滑动的位移#7、按照人的行为行为习惯,把总位移切成一段段小的位移#8、按照位移移动#9、完成登录
实现
安装:selenium+chrome/phantomjs#安装:PillowPillow:基于PIL,处理python3.x的图形图像库.因为PIL只能处理到python2.x,而这个模块能处理Python3.x,目前用它做图形的很多.http://www.cnblogs.com/apexchu/p/4231041.htmlC:\Users\Administrator>pip3 install pillowC:\Users\Administrator>python3Python3.6.1(v3.6.1:69c0db5, Mar212017,18:41:36) [MSC v.190064bit (AMD64)] on win32Type"help","copyright","credits"or"license"formore information.>>> from PIL import Image>>>
view.code
fromseleniumimportwebdriverfromselenium.webdriverimportActionChainsfromselenium.webdriver.common.byimportByfromselenium.webdriver.common.keysimportKeysfromselenium.webdriver.supportimportexpected_conditionsasECfromselenium.webdriver.support.waitimportWebDriverWaitfromPILimportImageimporttimedefget_snap():'''
对整个网页截图,保存成图片,然后用PIL.Image拿到图片对象
:return: 图片对象
'''driver.save_screenshot('snap.png') page_snap_obj=Image.open('snap.png')returnpage_snap_objdefget_image():'''
从网页的网站截图中,截取验证码图片
:return: 验证码图片
'''img=wait.until(EC.presence_of_element_located((By.CLASS_NAME,'geetest_canvas_img'))) time.sleep(2)#保证图片刷新出来localtion=img.location size=img.size top=localtion['y'] bottom=localtion['y']+size['height'] left=localtion['x'] right=localtion['x']+size['width'] page_snap_obj=get_snap() crop_imag_obj=page_snap_obj.crop((left,top,right,bottom))returncrop_imag_objdefget_distance(image1,image2):'''
拿到滑动验证码需要移动的距离
:param image1:没有缺口的图片对象
:param image2:带缺口的图片对象
:return:需要移动的距离
'''threshold=60left=57foriinrange(left,image1.size[0]):forjinrange(image1.size[1]): rgb1=image1.load()[i,j] rgb2=image2.load()[i,j] res1=abs(rgb1[0]-rgb2[0]) res2=abs(rgb1[1]-rgb2[1]) res3=abs(rgb1[2]-rgb2[2])ifnot(res1 < thresholdandres2 < thresholdandres3 < threshold):returni-7#经过测试,误差为大概为7returni-7#经过测试,误差为大概为7defget_tracks(distance):'''
拿到移动轨迹,模仿人的滑动行为,先匀加速后匀减速
匀变速运动基本公式:
①v=v0+at
②s=v0t+½at²
③v²-v0²=2as
:param distance: 需要移动的距离
:return: 存放每0.3秒移动的距离
'''#初速度v=0#单位时间为0.2s来统计轨迹,轨迹即0.2内的位移t=0.3#位移/轨迹列表,列表内的一个元素代表0.2s的位移tracks=[]#当前的位移current=0#到达mid值开始减速mid=distance*4/5whilecurrent < distance:ifcurrent < mid:# 加速度越小,单位时间的位移越小,模拟的轨迹就越多越详细a=2else: a=-3#初速度v0=v#0.2秒时间内的位移s=v0*t+0.5*a*(t**2)#当前的位置current+=s#添加到轨迹列表tracks.append(round(s))#速度已经达到v,该速度作为下次的初速度v=v0+a*treturntrackstry: driver=webdriver.Chrome() driver.get('https://account.geetest.com/login') wait=WebDriverWait(driver,10)#步骤一:先点击按钮,弹出没有缺口的图片button=wait.until(EC.presence_of_element_located((By.CLASS_NAME,'geetest_radar_tip'))) button.click()#步骤二:拿到没有缺口的图片image1=get_image()#步骤三:点击拖动按钮,弹出有缺口的图片button=wait.until(EC.presence_of_element_located((By.CLASS_NAME,'geetest_slider_button'))) button.click()#步骤四:拿到有缺口的图片image2=get_image()# print(image1,image1.size)# print(image2,image2.size)#步骤五:对比两张图片的所有RBG像素点,得到不一样像素点的x值,即要移动的距离distance=get_distance(image1,image2)#步骤六:模拟人的行为习惯(先匀加速拖动后匀减速拖动),把需要拖动的总距离分成一段一段小的轨迹tracks=get_tracks(distance) print(tracks) print(image1.size) print(distance,sum(tracks))#步骤七:按照轨迹拖动,完全验证button=wait.until(EC.presence_of_element_located((By.CLASS_NAME,'geetest_slider_button'))) ActionChains(driver).click_and_hold(button).perform()fortrackintracks: ActionChains(driver).move_by_offset(xoffset=track,yoffset=0).perform()else: ActionChains(driver).move_by_offset(xoffset=3,yoffset=0).perform()#先移过一点ActionChains(driver).move_by_offset(xoffset=-3,yoffset=0).perform()#再退回来,是不是更像人了time.sleep(0.5)#0.5秒后释放鼠标ActionChains(driver).release().perform()#步骤八:完成登录input_email=driver.find_element_by_id('email') input_password=driver.find_element_by_id('password') button=wait.until(EC.element_to_be_clickable((By.CLASS_NAME,'login-btn'))) input_email.send_keys('[email protected]') input_password.send_keys('linhaifeng123')# button.send_keys(Keys.ENTER)button.click()importtime time.sleep(200)finally: driver.close()
案列:
破解博客园后台登录
fromseleniumimportwebdriverfromselenium.webdriverimportActionChainsfromselenium.webdriver.common.byimportByfromselenium.webdriver.common.keysimportKeysfromselenium.webdriver.supportimportexpected_conditionsasECfromselenium.webdriver.support.waitimportWebDriverWaitfromPILimportImageimporttimedefget_snap():driver.save_screenshot('full_snap.png') page_snap_obj=Image.open('full_snap.png')returnpage_snap_objdefget_image():img=driver.find_element_by_class_name('geetest_canvas_img') time.sleep(2) location=img.location size=img.size left=location['x'] top=location['y'] right=left+size['width'] bottom=top+size['height'] page_snap_obj=get_snap() image_obj=page_snap_obj.crop((left,top,right,bottom))# image_obj.show()returnimage_objdefget_distance(image1,image2):start=57threhold=60foriinrange(start,image1.size[0]):forjinrange(image1.size[1]): rgb1=image1.load()[i,j] rgb2=image2.load()[i,j] res1=abs(rgb1[0]-rgb2[0]) res2=abs(rgb1[1]-rgb2[1]) res3=abs(rgb1[2]-rgb2[2])# print(res1,res2,res3)ifnot(res1 < threholdandres2 < threholdandres3 < threhold):returni-7returni-7defget_tracks(distance):distance+=20#先滑过一点,最后再反着滑动回来v=0t=0.2forward_tracks=[] current=0mid=distance*3/5whilecurrent < distance:ifcurrent < mid: a=2else: a=-3s=v*t+0.5*a*(t**2) v=v+a*t current+=s forward_tracks.append(round(s))#反着滑动到准确位置back_tracks=[-3,-3,-2,-2,-2,-2,-2,-1,-1,-1]#总共等于-20return{'forward_tracks':forward_tracks,'back_tracks':back_tracks}try:# 1、输入账号密码回车driver = webdriver.Chrome() driver.implicitly_wait(3) driver.get('https://passport.cnblogs.com/user/signin') username = driver.find_element_by_id('input1') pwd = driver.find_element_by_id('input2') signin = driver.find_element_by_id('signin') username.send_keys('linhaifeng') pwd.send_keys('xxxxx') signin.click()# 2、点击按钮,得到没有缺口的图片button = driver.find_element_by_class_name('geetest_radar_tip') button.click()# 3、获取没有缺口的图片image1 = get_image()# 4、点击滑动按钮,得到有缺口的图片button = driver.find_element_by_class_name('geetest_slider_button') button.click()# 5、获取有缺口的图片image2 = get_image()# 6、对比两种图片的像素点,找出位移distance = get_distance(image1, image2)# 7、模拟人的行为习惯,根据总位移得到行为轨迹tracks = get_tracks(distance) print(tracks)# 8、按照行动轨迹先正向滑动,后反滑动button = driver.find_element_by_class_name('geetest_slider_button') ActionChains(driver).click_and_hold(button).perform()# 正常人类总是自信满满地开始正向滑动,自信地表现是疯狂加速fortrackintracks['forward_tracks']: ActionChains(driver).move_by_offset(xoffset=track, yoffset=0).perform()# 结果傻逼了,正常的人类停顿了一下,回过神来发现,卧槽,滑过了,然后开始反向滑动time.sleep(0.5)forback_trackintracks['back_tracks']: ActionChains(driver).move_by_offset(xoffset=back_track, yoffset=0).perform()# 小范围震荡一下,进一步迷惑极验后台,这一步可以极大地提高成功率ActionChains(driver).move_by_offset(xoffset=-3, yoffset=0).perform() ActionChains(driver).move_by_offset(xoffset=3, yoffset=0).perform()# 成功后,骚包人类总喜欢默默地欣赏一下自己拼图的成果,然后恋恋不舍地松开那只脏手time.sleep(0.5) ActionChains(driver).release().perform() time.sleep(10)# 睡时间长一点,确定登录成功finally: driver.close()
修订版本
fromseleniumimportwebdriverfromselenium.webdriverimportActionChainsfromselenium.webdriver.common.byimportByfromselenium.webdriver.common.keysimportKeysfromselenium.webdriver.supportimportexpected_conditionsasECfromselenium.webdriver.support.waitimportWebDriverWaitfromPILimportImageimporttimedefget_snap(driver):driver.save_screenshot('full_snap.png') page_snap_obj=Image.open('full_snap.png')returnpage_snap_objdefget_image(driver):img=driver.find_element_by_class_name('geetest_canvas_img') time.sleep(2) location=img.location size=img.size left=location['x'] top=location['y'] right=left+size['width'] bottom=top+size['height'] page_snap_obj=get_snap(driver) image_obj=page_snap_obj.crop((left,top,right,bottom))# image_obj.show()returnimage_objdefget_distance(image1,image2):start=57threhold=60foriinrange(start,image1.size[0]):forjinrange(image1.size[1]): rgb1=image1.load()[i,j] rgb2=image2.load()[i,j] res1=abs(rgb1[0]-rgb2[0]) res2=abs(rgb1[1]-rgb2[1]) res3=abs(rgb1[2]-rgb2[2])# print(res1,res2,res3)ifnot(res1 < threholdandres2 < threholdandres3 < threhold):returni-7returni-7defget_tracks(distance):distance+=20#先滑过一点,最后再反着滑动回来v=0t=0.2forward_tracks=[] current=0mid=distance*3/5whilecurrent < distance:ifcurrent < mid: a=2else: a=-3s=v*t+0.5*a*(t**2) v=v+a*t current+=s forward_tracks.append(round(s))#反着滑动到准确位置back_tracks=[-3,-3,-2,-2,-2,-2,-2,-1,-1,-1]#总共等于-20return{'forward_tracks':forward_tracks,'back_tracks':back_tracks}defcrack(driver):#破解滑动认证# 1、点击按钮,得到没有缺口的图片button = driver.find_element_by_class_name('geetest_radar_tip') button.click()# 2、获取没有缺口的图片image1 = get_image(driver)# 3、点击滑动按钮,得到有缺口的图片button = driver.find_element_by_class_name('geetest_slider_button') button.click()# 4、获取有缺口的图片image2 = get_image(driver)# 5、对比两种图片的像素点,找出位移distance = get_distance(image1, image2)# 6、模拟人的行为习惯,根据总位移得到行为轨迹tracks = get_tracks(distance) print(tracks)# 7、按照行动轨迹先正向滑动,后反滑动button = driver.find_element_by_class_name('geetest_slider_button') ActionChains(driver).click_and_hold(button).perform()# 正常人类总是自信满满地开始正向滑动,自信地表现是疯狂加速fortrackintracks['forward_tracks']: ActionChains(driver).move_by_offset(xoffset=track, yoffset=0).perform()# 结果傻逼了,正常的人类停顿了一下,回过神来发现,卧槽,滑过了,然后开始反向滑动time.sleep(0.5)forback_trackintracks['back_tracks']: ActionChains(driver).move_by_offset(xoffset=back_track, yoffset=0).perform()# 小范围震荡一下,进一步迷惑极验后台,这一步可以极大地提高成功率ActionChains(driver).move_by_offset(xoffset=-3, yoffset=0).perform() ActionChains(driver).move_by_offset(xoffset=3, yoffset=0).perform()# 成功后,骚包人类总喜欢默默地欣赏一下自己拼图的成果,然后恋恋不舍地松开那只脏手time.sleep(0.5) ActionChains(driver).release().perform()deflogin_cnblogs(username,password):driver = webdriver.Chrome()try:# 1、输入账号密码回车driver.implicitly_wait(3) driver.get('https://passport.cnblogs.com/user/signin') input_username = driver.find_element_by_id('input1') input_pwd = driver.find_element_by_id('input2') signin = driver.find_element_by_id('signin') input_username.send_keys(username) input_pwd.send_keys(password) signin.click()# 2、破解滑动认证crack(driver) time.sleep(10)# 睡时间长一点,确定登录成功finally: driver.close()if__name__ =='__main__': login_cnblogs(username='linhaifeng',password='xxxx')
作者:酷酷的小明
链接:https://www.jianshu.com/p/2c726ff42029
來源:
著作权归作者所有。商业转载请联系作者获得授权,非商业转载请注明出处。