python爬取豆瓣读书任意标签下的书籍信息

import requests
from bs4 import BeautifulSoup
for tag in ['%E8%AE%A1%E7%AE%97%E6%9C%BA', '%e5%b0%8f%e8%af%b4', '%e5%a4%96%e5%9b%bd%e6%96%87%e5%ad%a6',
            '%e6%96%87%e5%ad%a6', '%e9%9a%8f%e7%ac%94', '%e4%b8%ad%e5%9b%bd%e6%96%87%e5%ad%a6',
            '%e7%bb%8f%e5%85%b8', '%e6%97%a5%e6%9c%ac%e6%96%87%e5%ad%a6',
            '%e6%95%a3%e6%96%87', '%e6%9d%91%e4%b8%8a%e6%98%a5%e6%a0%91',
            '%e8%af%97%e6%ad%8c', '%e7%ab%a5%e8%af%9d', '%e5%84%bf%e7%ab%a5%e6%96%87%e5%ad%a6',
            '%e5%8f%a4%e5%85%b8%e6%96%87%e5%ad%a6', '%e7%8e%8b%e5%b0%8f%e6%b3%a2',
            '%e6%9d%82%e6%96%87', '%e5%90%8d%e8%91%97', '%e4%bd%99%e5%8d%8e',
            '%e5%bc%a0%e7%88%b1%e7%8e%b2', '%e5%bd%93%e4%bb%a3%e6%96%87%e5%ad%a6',
            '%e9%92%b1%e9%92%9f%e4%b9%a6', '%e5%a4%96%e5%9b%bd%e5%90%8d%e8%91%97',
            '%e9%b2%81%e8%bf%85', '%e8%af%97%e8%af%8d', '%e8%8c%a8%e5%a8%81%e6%a0%bc',
            '%e6%b8%af%e5%8f%b0', '%e6%9d%9c%e6%8b%89%e6%96%af',
            '%e7%b1%b3%e5%85%b0%c2%b7%e6%98%86%e5%be%b7%e6%8b%89', '%e6%bc%ab%e7%94%bb',
            '%e6%8e%a8%e7%90%86'
            #any tags
            ]:
  for a in [0, 20, 40, 60, 80, 100, 120, 140, 160, 180, 200, 220, 240, 260, 280, 300, 320, 340, 360, 380, 400, 420, 440,
            460, 480, 500, 520, 540, 560, 580, 600, 620, 640, 660, 680, 700, 720, 740, 760, 780, 800, 820, 840, 860,
            880, 900, 920, 940, 960, 980]:
    url = 'https://book.douban.com/tag/'+tag+'?start='+str(a)
    web_data = requests.get(url)
    soup = BeautifulSoup(web_data.text,'lxml')
    titles = soup.select('#subject_list > ul > li > div.info > h2 > a')
    for title in titles:
        print(title.get_text(),title.get('href'))

你可能感兴趣的:(python爬取豆瓣读书任意标签下的书籍信息)