from selenium import webdriver
import time
# amount=要爬多少数量
def douYuDanMu(amount):
#手动添加路径
path = "C:\Program Files (x86)\Google\Chrome\Application\chromedriver\chromedriver.exe"
driver = webdriver.Chrome(executable_path=path)
url = "https://www.douyu.com/134000"
driver.get(url)
try:
elem = driver.find_element_by_class_name("normallevel-close")
elem.click()
except:
pass
driver.maximize_window () #全屏显示
flag = True
i =0
while flag:
try:
i=i+1
print ("\r第%d次尝试中"% i)
elems = driver.find_elements_by_class_name("jschartli")
if (len(elems)!=0):
flag = False
print ("第%d次尝试成功"% i)
if(i==30):
print ("主人,这网站真的爬不动......")
except:
print ("\r第%d次尝试失败"% i)
time.sleep(2)
flag2 = True
count = 0
while flag2:
for chat in elems[count:]: #防重复
try:
saying = chat.find_element_by_class_name("text-cont").text
count = count+1
print ("【弹幕 {0}】{1}".format(count,saying))
time.sleep(1)
if(count==amount): #到达预期数量打断
break
except KeyboardInterrupt: #键盘输入进行打断
flag2 = False
break
except :
continue
elems = driver.find_elements_by_class_name("jschartli")
一点解释:
这是测试代码,可以看到driver会自动更新,弹幕数会一直增加,而原来的弹幕也不会消失(虽然页面上没有了...),所以用count不仅用于计数,还有个作用是再次提取所有弹幕时通过列表分片的形式过滤掉已经爬取过的。