python3爬虫selenium爬取今日头条财经新闻

**

python3爬虫selenium爬取今日头条财经新闻

**
使用selenium爬取今日头条财经版块新闻标题,并保存至本地文档。
参考文章:https://blog.csdn.net/qq_42689278/article/details/84590798

from selenium import webdriver
import time #导入时间库
#给txt文件命名
def txt_name():
    d = time.localtime( time.time())          
    t_n = 'D:/python/workspace/toutiaonews/'+'newstitle_{}.{}.txt'.format(d[1],d[2])
    return t_n

#获取当前时间
def get_time():
    d = time.localtime( time.time())      
    dd = "现在是{}年{}月{}日{}时{}分".format(d[0],d[1],d[2],d[3],d[4])
    return dd
    
#将爬取的当前时间写入文本
def write_time(t_n):
    t = get_time()
    with open(t_n,'a',encoding='utf-8') as data:  
        print(t,file = data)

#保存标题
def write_info(t_n):
    titles= brower.find_elements_by_xpath('//div[@class="title-box"]/a')#如何找到这个位置
    for title in titles:
        con = title.text
        with open(t_n,'a',encoding='utf-8') as data:  
            print(con,file = data) 

#下拉界面
def get_manyinfo(t_n):
    brower.execute_script("window.scrollTo(0,1000);")
    time.sleep(1)# 推迟执行的秒数
    t = 0
    while t < 10:
        for i in range(30):
            brower.execute_script("window.scrollTo(0,document.body.scrollHeight);")
            time.sleep(3)
        write_info(t_n)
        brower.refresh()
        t += 1
    else:
        brower.close()

你可能感兴趣的:(爬虫)