requests:用于get请求
bs4:用于网页请求
codecs:用于读写文件
<div id="list">下的<dd>内的<a>标签内
4.一下为获取URL的代码
// An highlighted block
import requests
import codecs
from bs4 import BeautifulSoup
re=requests.get('https://www.zwdu.com/book/7586/')
html=re.content
soup=BeautifulSoup(html,'html.parser')
body=soup.body
data=body.find('div',{'id':'list'})
dd=data.find_all('dd')
for n in dd:
txt=n.find('a').string
url=n.find('a')['href']
url='https://www.zwdu.com'+url
download(url)#自定义的下载函数
def download(url):
req=requests.get(url)
html2=req.content
soup2=BeautifulSoup(html2,'html.parser')
body2=soup2.body
data2=body2.find('div',{'class':'box_con'})
data3=data2.find('div',{'id':'content'})
name=body2.find('div',{'class':'bookname'})
name2=name.find('h1').string #获取章节名
fo = codecs.open('F:\\PycharmProjects\\untitled\\venv\\output.txt', 'a', 'utf-8')
#打开一个txt文件,用于写入文本(文件路径及文件换成自己的)
fo.write(name2)
data4=data3.find_all('br')
for d in data3:
fo.write((''+str(d).replace('
','')+'\r\n'))
#需要将<br/>替换掉
#print(str(d).replace('
','\r\n'))
fo.close()
return 0
import requests
import codecs
from bs4 import BeautifulSoup
re=requests.get('https://www.zwdu.com/book/7586/')
html=re.content
soup=BeautifulSoup(html,'html.parser')
body=soup.body
data=body.find('div',{'id':'list'})
dd=data.find_all('dd')
def download(url):
req=requests.get(url)
html2=req.content
soup2=BeautifulSoup(html2,'html.parser')
body2=soup2.body
data2=body2.find('div',{'class':'box_con'})
data3=data2.find('div',{'id':'content'})
name=body2.find('div',{'class':'bookname'})
name2=name.find('h1').string
fo = codecs.open('F:\\PycharmProjects\\untitled\\venv\\output.txt', 'a', 'utf-8')
fo.write(name2)
data4=data3.find_all('br')
for d in data3:
fo.write((''+str(d).replace('
','')+'\r\n'))
#print(str(d).replace('
','\r\n'))
fo.close()
return 0
for n in dd:
txt=n.find('a').string
url=n.find('a')['href']
url='https://www.zwdu.com'+url
#print(url)
download(url)