selenium

selenium

from selenium.webdriver import Chrome

# 创建浏览器对象
b = Chrome()
# 打开网页
b.get('https://cd.zu.ke.com/zufang/pg2/#contentList')
# 获取网页源代码
print(b.page_source)

控制翻页

  1. 找到多页规律利用循环获取多页内容
from selenium.webdriver import Chrome

# 创建浏览器对象
b = Chrome()
# 找到多页规律利用循环获取多页内容
for x in range(0, 100):
    b.get(f'https://cd.zu.ke.com/zufang/pg{x}/#contentList')
    print(b.page_source)
# dian
  1. 点击翻页按钮,再刷新后获取网页源代码
# 点击翻页按钮,再刷新后获取网页源代码
b = Chrome()
b.get(f'https://cd.zu.ke.com/zufang/#contentList')
for _ in range(5):
    print(b.page_source)
    # 点击下一页按钮
    # 通过class属性名获取标签
    c = b.find_element(By.CLASS_NAME,'next')
    c.click()
    # 通过css选择器获取标签
    b.find_element(By.CSS_SELECTOR,'.next')
    # 获取标签内容为新闻的标签
    b.find_element(By.LINK_TEXT,'新闻')
    # 获取标签内容包含文闻的标签
		b.find_element(By.PARTIAL_LINK_TEXT,'闻')
  1. selenium获取标签

    -关键字 -含义 说明
    By.ID 听过ID属性值获取标签
    By.CLASS_NAME 通过class属性名获取标签
    By.CSS_SELECTOR 通过css选择器获取标签
    By.LINK_TEXT 通过a标签的标签内容获取标签 只有a标签有用,必须全部匹配
    By.NAME
    By.TAG_NAME
    By.PARTIAL_LINK_TEXT 获取包含str的a标签 部分匹配
    By.XPATH
  2. 输入框输入内容

    输入框对应的标签.send_keys(输入内容)

    from selenium.webdriver import Chrome
    from selenium.webdriver.common.by import By
    import time
    
    b = Chrome()
    b.get('https://www.cnki.net/')
    input_ = b.find_element(By.ID, 'txt_SearchText')
    input_.send_keys('数据分析\n')
    time.sleep(2)
    # 获取搜索结果所有标签
    with open('files/数据分析摘要.docx','a') as f:
        for _ in range(9):
            f.write(f'第{_ + 1}页\n')
            title = b.find_elements(By.CLASS_NAME, 'fz14')
            for x in range(20):
                title[x].click()
                time.sleep(1)
                b.switch_to.window(b.window_handles[-1])
                time.sleep(1)
                zhaiyao = b.find_element(By.ID, 'ChDivSummary').text
                b.close()
                f.write(f'打印第{_+1}页第{x + 1}个摘要\n')
                f.write(zhaiyao)
                b.switch_to.window(b.window_handles[0])
            next_butten = b.find_element(By.ID, 'PageNext')
            next_butten.click()
            time.sleep(3)
    
    
  3. 点击标签

    标签.click()

    from selenium.webdriver import Chrome
    from selenium.webdriver.common.by import By
    import time
    
    b = Chrome()
    b.get('https://www.cnki.net/')
    input_ = b.find_element(By.ID, 'txt_SearchText')
    input_.send_keys('数据分析\n')
    time.sleep(2)
    # 获取搜索结果所有标签
    with open('files/数据分析摘要.docx','a') as f:
        for _ in range(9):
            f.write(f'第{_ + 1}页\n')
            title = b.find_elements(By.CLASS_NAME, 'fz14')
            for x in range(20):
                title[x].click()
                time.sleep(1)
                b.switch_to.window(b.window_handles[-1])
                time.sleep(1)
                zhaiyao = b.find_element(By.ID, 'ChDivSummary').text
                b.close()
                f.write(f'打印第{_+1}页第{x + 1}个摘要\n')
                f.write(zhaiyao)
                b.switch_to.window(b.window_handles[0])
            next_butten = b.find_element(By.ID, 'PageNext')
            next_butten.click()
            time.sleep(3)
    
    
  4. 切换标签页

    from selenium.webdriver import Chrome
    from selenium.webdriver.common.by import By
    import time
    
    b = Chrome()
    b.get('https://www.cnki.net/')
    input_ = b.find_element(By.ID, 'txt_SearchText')
    input_.send_keys('数据分析\n')
    time.sleep(2)
    # 获取搜索结果所有标签
    with open('files/数据分析摘要.docx','a') as f:
        for _ in range(9):
            f.write(f'第{_ + 1}页\n')
            title = b.find_elements(By.CLASS_NAME, 'fz14')
            for x in range(20):
                title[x].click()
                time.sleep(1)
                b.switch_to.window(b.window_handles[-1])
                time.sleep(1)
                zhaiyao = b.find_element(By.ID, 'ChDivSummary').text
                b.close()
                f.write(f'打印第{_+1}页第{x + 1}个摘要\n')
                f.write(zhaiyao)
                b.switch_to.window(b.window_handles[0])
            next_butten = b.find_element(By.ID, 'PageNext')
            next_butten.click()
            time.sleep(3)
    
    
  5. 鼠标滚动

    from selenium.webdriver import Chrome
    import time
    
    b = Chrome()
    b.get('https://www.cnki.net/')
    # 单次滚动
    b.execute_script('window.scrollBy(0,200)')
    # 多次滚动
    for x in range(8):
        b.execute_script('window.scrollBy(0,200)')
        time.sleep(2)
    

你可能感兴趣的:(selenium,chrome,python)