importrequests
frombs4importBeautifulSoup
importcsv
url ="http://www.qiushibaike.com/text/"
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36'}
res = requests.get(url,headers=headers)
soup = BeautifulSoup(res.text,'lxml')
#print(soup)
names = soup.select('h2')
ages = soup.select('div.author.clearfix > div')
#这里本来还应该有一个性别的,但是本人不才还没有找到方法,希望看官们能帮我看看怎么找
contents = soup.select('a.contentHerf > div > span')
stats_votes = soup.select('div.stats > span.stats-vote')
comments = soup.select("span a i")
#print(comments)
#f =open(r'C:\Users\MY\Desktop\qiushibaike.txt','a+')
a = []
for name,age,content,stats_vote,comment inzip(names,ages,contents,stats_votes,comments):
#这里用到了一个zip内置函数
data ={
'name':name.get_text(),
'age': age.get_text(),
'content':content.get_text(),
'stats_vote':stats_vote.get_text().replace('好笑',''),
'comment':comment.get_text()
}
a.append(data)#将字典丢入a这个空列表中
print(len(a))
filednames = ['name','age','content','stats_vote','comment']#定义了表头
withopen('dst.csv','w',newline='')asdstfile:#写入方式选择wb,否则有空行
writer = csv.DictWriter(dstfile,fieldnames=filednames)
writer.writeheader()#写入表头
writer.writerows(a)#批量写入
dstfile.close()
这是我上述代码写好后运行的结果。。。。。
道生一,一生二,二生三,三生万物!