【selenium爬虫测试】对网页中帖子标题进行爬取

from selenium import webdriver
import re
from selenium.webdriver.common.by import By
from time import sleep
o=webdriver.ChromeOptions()
o.add_experimental_option('detach',True)
d=webdriver.Chrome(options=o)
num=0
open=0
while(open==0):
    try:
        d.get("https://tieba.baidu.com/f?kw=%E6%8A%97%E5%8E%8B%E8%83%8C%E9%94%85&ie=utf-8&pn="+str(num))
        sleep(2)
        t1=d.find_elements(By.CLASS_NAME,'j_th_tit')
        text=[]
        tem=t1[0].text
        text.append(tem)
        print(tem)
        for i in t1:
            tem2=i.text
            if(tem!=tem2):
                text.append(tem2)
                print(tem2)
                tem=tem2
        num=num+50
    except:
        open=1

【selenium爬虫测试】对网页中帖子标题进行爬取_第1张图片

 

你可能感兴趣的:(python,爬虫,selenium)