正则爬虫 & 进度条

正则爬虫

import requests
import re
target="https://www.vodtw.com/Html/Book/59/59089/"

headers ={
    'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.80 Safari/537.36'
}

req_text = requests.get(url=target,headers=headers)
req_text.encoding='gbk'
li_list = re.findall('i>(.*?)',chapter)[0]
    chapter_title = re.findall('id="htmltimu"> (.*?) ',chapter)[0]
    chapter_text=(re.findall('3px;">
([\W\w]*?)
',chapter)[0]).replace('
','').replace('
'
,'').replace(' ','') percent = count / len(li_list) * 100 print('%s 下载进度 %0.1f %%'%(name,percent),end='\r') count = count + 1 with open(name+'.txt', 'a',encoding='utf-8') as f: f.write(chapter_title+'\n'+chapter_text+'\n') print('\n'+'下载完成...')

你可能感兴趣的:(Python)