selenium+BeautifulSoup 爬虫

爬取dy平台主播名字,在线人数

from selenium import webdriver
from bs4 import BeautifulSoup as bs
import time

driver = webdriver.PhantomJS()
driver.get("https://www.douyu.com/directory/all")
#while True:
i = 1
while True:
    #html源码
    html = driver.page_source
    #    创建解析对象
    soup = bs(html,"lxml")
#    直接调用方法查找元素
#    存放所有主播的元素对象
    names = soup.find_all("span",{"class":"dy-name ellipsis fl"})
#    存放人气的元素对象
    nums = soup.find_all("span",{"class":"dy-num fr"})

#    name和number 是一个对象,get_text()
    for name,number in zip(names,nums):
        print("\t观众人数:",number.get_text().strip(),end="")
        print("\t主播名字:",name.get_text().strip())         
    page = driver.find_element_by_class_name("jumptxt").send_keys(i)

    if driver.page_source.find("shark-pager-disable-next") == -1:   
        driver.find_element_by_class_name("shark-pager-submit").click()
        time.sleep(5)
        i+=1
#        driver.find_element_by_class_name("shark-pager-next").click()
    else:
        break

你可能感兴趣的:(爬虫)