2019-06-18 python day-06

今日内容:

1.selenium相关操作

2.selenium登录破解

3.爬取京东商品信息

4.破解滑动验证码的逻辑



1.selenium相关操作

    (1)搭配使用xpath

from selenium import webdriver

import time

'''

 

  Example website

 

'''

driver = webdriver.Chrome(r'/Users/nadia/Downloads/chromedriver')

try:

    # 隐式等待写在get请求前

    driver.implicitly_wait(5)

    driver.get('https://doc.scrapy.org/en/latest/_static/selectors-sample1.html')

    # 显示等待写在get请求后

    html = driver.find_element_by_xpath('/html')

    print(html.tag_name)

    # 从根节点开始找任意一个节点

    div = driver.find_element_by_xpath('//div')

    print(div.tag_name)

    # 查找id为images的节点

    # div = driver.find_element_by_xpath('div[@id="images"]')

    # print(div.tag_name)

    # print(div.text)

    # 找到第一个a标签

    a = driver.find_element_by_xpath('//a')

    print(a)

    # 找到所有a标签

    a_s = driver.find_elements_by_xpath('//a')

    print(a_s)

    # 找到第一个a节点的href属性

    a = driver.find_element_by_xpath('//a').get_attribute('href')

    print(a)

    time.sleep(5)

finally:

    driver.close()

 (2)元素交互操作

-1点击,清除,搜索

from selenium import webdriver

from selenium.webdriver.common.keys import Keys

import time

driver = webdriver.Chrome(r'/Users/nadia/Downloads/chromedriver')

try:

    driver.implicitly_wait(5)

    driver.get('https://www.jd.com/')

    input_tag =driver.find_element_by_id('key')

    input_tag.send_keys('围城')

    input_tag.send_keys(Keys.ENTER)

    time.sleep(2)

    input_tag = driver.find_element_by_id('key')

    input_tag.clear()

    input_tag.send_keys('航海王')

    button = driver.find_element_by_class_name('button')

    button.click()

    time.sleep(10)

finally:

    driver.close()

-2获取cookies

from selenium import webdriver

import time

driver = webdriver.Chrome(r'/Users/nadia/Downloads/chromedriver')

try:

    driver.implicitly_wait(10)

    driver.get('https://www.zhihu.com/explore')   

    print(driver.get_cookies())

    time.sleep(10)

finally:

    driver.close()

-3 选项卡

import time

from selenium import webdriver

browser = webdriver.Chrome(r'/Users/nadia/Downloads/chromedriver')

browser.get('https://www.baidu.com')

browser.execute_script('window.open()')

print(browser.window_handles) #get all xuanxiangka

browser.switch_to_window(browser.window_handles[1])

browser.get('https://www.taobao.com')

time.sleep(10)

browser.switch_to_window(browser.window_handles[0])

browser.get('https://www.sina.com.cn')

browser.close()

-4动作链

from selenium import webdriver

from selenium.webdriver import ActionChains

import time

driver = webdriver.Chrome()

driver.implicitly_wait(10)

driver.get('http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')

try:

    # driver.switch_to_frame('iframeResult')

    # 切换到id为iframeResult的窗口内

    driver.switch_to.frame('iframeResult')

    # 源位置

    draggable = driver.find_element_by_id('draggable')

    # 目标位置

    droppable = driver.find_element_by_id('droppable')

    # 调用ActionChains,必须把驱动对象传进去

    # 得到一个动作链对象,复制给一个变量

    actions = ActionChains(driver)

    # 方式一: 机器人

    # 瞬间把源图片位置秒移到目标图片位置

    # actions.drag_and_drop(draggable, droppable)  # 编写一个行为

    # actions.perform()  # 执行编写好的行为

    # 方式二: 模拟人的行为

    source = draggable.location['x']

    target = droppable.location['x']

    print(source, target)

    distance = target - source

    print(distance)

    # perform:每个动作都要调用perform执行

    # 点击并摁住源图片

    ActionChains(driver).click_and_hold(draggable).perform()

    s = 0

    while s < distance:

        # 执行位移操作

        ActionChains(driver).move_by_offset(xoffset=2, yoffset=0).perform()

        s += 2

    # 释放动作链

    ActionChains(driver).release().perform()

    time.sleep(10)

finally:

    driver.close()

-5前进,后退

from selenium import webdriver

import time

driver = webdriver.Chrome(r'/Users/nadia/Downloads/chromedriver')

try:

    driver.implicitly_wait(10)

    driver.get('https://www.jd.com/')

    driver.get('https://www.baidu.com/')

    driver.get('https://www.cnblogs.com/')

    time.sleep(2)

    # 回退操作

    driver.back()

    time.sleep(1)

    # 前进操作

    driver.forward()

    time.sleep(1)

    driver.back()

    time.sleep(10)

finally:

    driver.close()

2.selenium登录破解

# 由于selenium 驱动浏览器无缓存

#如何给selenium驱动的浏览器加缓存(cookie)避免登录

from selenium import webdriver

from selenium.webdriver import ChromeOptions

import time

options = ChromeOptions()

profile_directory = r'--user-data-dir=(填写chrome存放cookie的位置)'

options.add_argument(profile_directory)

driver = webdriver.Chrome(chrome_options=options)

try:

    driver.implicitly_wait(10)

    driver.get('https://www.baidu.com/')

    # 添加用户cookies信息

    # name、value必须小写

    driver.add_cookie({"name": "BDUSS", "value": (value的值请在网页的响应头中自行获取})

    # 刷新操作

    driver.refresh()

    time.sleep(10)

finally:

    driver.close()

3.爬取京东商品信息

from selenium import webdriver

from selenium.webdriver.common.keys import Keys

import time

def get_good(driver):

    try:


        # 通过JS控制滚轮滑动获取所有商品

        js_code = '''

            window.scrollTo(0,50000);

        '''

        driver.execute_script(js_code)

        time.sleep(2)

        #查找所有商品div

        # good_div = driver.find_element_by_class_id('J_goodList')

        good_list = driver.find_elements_by_class_name('gl-item')

        for good in good_list:

            # 商品链接

            good_link = driver.find_element_by_css_selector(' .p-img').get_attribute('href')

            # 商品名称

            good_name = driver.find_element_by_css_selector('.p-name em').text.replace("\n",'--')

            # 商品价格

            good_price = driver.find_element_by_class_name('p-price').text.replace('\n',':')

            # 评价人数

            comment_num = driver.find_element_by_class_name('p-commit').text.replace('\n',' ')

            good_content = f'''

                            商品链接:{good_link}

                            商品名称:{good_name}

                            商品价格:{good_price}

                            评价人数:{comment_num}

                            '''

            print(good_content)

        next_tag = driver.find_element_by_class_name('pn-next')     

        next_tag.click()

        time.sleep(2)

        get_good(driver)

        time.sleep(5)

    finally:

        driver.close()


if __name__ == "__main__":

    good_tag = input("请输入想查找的商品:")

    driver = webdriver.Chrome(r'/Users/nadia/Downloads/chromedriver')

    driver.implicitly_wait(10)

        # 1.往jd发送请求   

    driver.get("https://www.jd.com/")

        # 输入商品名称,并回车搜索

    input_tag = driver.find_element_by_id('key')

    input_tag.send_keys(good_tag)

    input_tag.send_keys(Keys.ENTER)

    time.sleep(2)

    get_good(driver)

4.破解滑动验证码的逻辑


2019-06-18 python day-06_第1张图片
滑动验证码


2019-06-18 python day-06_第2张图片
破解逻辑


2019-06-18 python day-06_第3张图片
破解示意图

你可能感兴趣的:(2019-06-18 python day-06)