爬取斗鱼弹幕数据

测试版 可用

#!/usr/bin/env python3
# _*_ coding: utf-8 _*_
# Date: 2019/12/9 0009 18:59
# Author: Mijiu
# Version: 1.0
import time
from selenium import webdriver



chrome_options = webdriver.ChromeOptions()
    # # 使用headless无界面浏览器模式
# chrome_options.add_argument('--headless')
# chrome_options.add_argument('--disable-gpu')
prefs = {"profile.managed_default_content_settings.images": 2}
chrome_options.add_experimental_option("prefs", prefs)
browser = webdriver.Chrome(chrome_options=chrome_options)
url = 'https://www.douyu.com/'



def getDanmu(homeId):
    homeHref = url+str(homeId)
    browser.get(homeHref)

    while 1:
        time.sleep(2)
        try:
            for i in browser.find_elements_by_xpath('.//div[@class=" danmu-6e95c1"]/div/div'):
                if len(i.text) > 0:
                    try:
                        print(i.text)
                    except:
                        pass
                    saveDanmu(i.text)
                else:
                    continue
        except:
            time.sleep(2)
            for i in browser.find_elements_by_xpath('.//div[@class=" danmu-6e95c1"]/div/div'):
                if len(i.text) > 0:
                    try:
                        print(i.text)
                    except:
                        pass
                    saveDanmu(i.text)
                else:
                    continue


def saveDanmu(danmu):
    with open('danmu.txt','a+',encoding='utf-8')as f:
        f.write(danmu+'\n')

if __name__ == '__main__':
    num = input('请输入需要查询的房间号')  # 138243
    getDanmu(num)

爬取斗鱼弹幕数据_第1张图片

你可能感兴趣的:(爬虫)