对天猫进行部分数据的爬取时需要登录后才可查看,自动登录时会有滑块验证。需要从html文档中找出填写内容的页面元素,并使用自动化工具破解滑块验证码
# coding:utf-8
import random
import time
from fake_useragent import UserAgent
from selenium import webdriver
from selenium.webdriver import ActionChains
url = "https://login.tmall.com/"
chrome_driver = "C:/Users/Lenovo/PycharmProjects/data_science/京东/chromedriver.exe"
options = webdriver.ChromeOptions()
options.add_argument("user-agent" + UserAgent().random) # 随机客户端
chrome = webdriver.Chrome(executable_path=chrome_driver, options=options)
def login():
"""
通过selenium自动登录
:return: void
"""
username = "用户名"
password = "密码"
chrome.get(url)
chrome.maximize_window() # 最大化窗口
time.sleep(3 + random.random())
chrome.switch_to_frame(chrome.find_element_by_id("J_loginIframe"))
username_box = chrome.find_element_by_xpath('//*[@id="fm-login-id"]')
password_box = chrome.find_element_by_xpath('//*[@id="fm-login-password"]')
username_box.send_keys(username)
password_box.send_keys(password)
remain = 300 - 40
while chrome.find_element_by_xpath('//*[@id="nc_1_n1z"]') is None:
pass
slicer = chrome.find_element_by_xpath('//*[@id="nc_1_n1z"]')
ActionChains(chrome).click_and_hold(slicer).perform()
ActionChains(chrome).move_by_offset(xoffset=remain, yoffset=0).perform()
ActionChains(chrome).release(slicer).perform()
submit_button = chrome.find_element_by_xpath('//*[@class="fm-button fm-submit password-login"]')
submit_button.click()
if __name__ == "__main__":
login()
截止2020-05-30,天猫的滑块验证时不需要任何的生理模仿,模仿了反而会验证失败。主要使用如下代码进行破解
ActionChains(chrome).move_by_offset(xoffset=remain, yoffset=0).perform()