豆瓣250

from bs4 import BeautifulSoup
import requests
import re
link = "https://book.douban.com/top250"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36'
}
for j in range(0,10):
    link = "https://book.douban.com/top250?start={}".format(j*25)
    resp = requests.get(link,headers=headers)

    resp.encoding = "utf-8"
    resp = resp.text
    soup = BeautifulSoup(resp,"lxml")
    getdata1 = soup.select("tr.item td[valign='top'] div.pl2 a" )
    getdata2 = soup.select("tr.item td[valign='top'] a img" )
    getdata3 = soup.select("tr.item td[valign='top'] p.pl" )
    getdata4 = soup.select("tr.item td[valign='top'] div.star span.rating_nums" )
    getdata5 = soup.select("tr.item td[valign='top'] div.star span.pl" )

    for a,b,c,d,e in zip(getdata1,getdata2,getdata3,getdata4,getdata5):
        print("---"*20)
        print("书名:"+"%s"%a["title"].strip())
        print("图书信息链接:"+"%s"%b["src"])
        list = c.text.strip().split("/")
        price = list[-1]
        date = list[-2]
        chuban = list[-3]
        author = "/".join(list[:-3])
        print("作者:%s"%author)
        print("出版社:%s"%chuban)
        print("出版日期:%s"%date)
        print("价格%s"%price)
        print("评分:"+"%s"%d.text.strip())
        print("评分人数:"+"%s"%e.text[1:-1].strip())
        print("---"*20)

        with open(r"C:\Users\Administrator\Desktop\1.txt","a",encoding="utf-8") as f:
            f.write("书名:{}\n".format(a["title"].strip()))
            f.write("图书信息链接:{}\n".format(b["src"]))
            f.write("作者:{}\n".format(author))
            f.write("出版社:{}\n".format(chuban))
            f.write("出版日期:{}\n".format(date))
            f.write("价格{}\n".format(price))
            f.write("评分:{}\n".format(d.text.strip()))
            f.write("评分人数:{}\n".format(e.text[1:-1].strip()))

你可能感兴趣的:(豆瓣250)