Python爬虫实现豆瓣图书搜索

import requests
import time
from bs4 import BeautifulSoup
import re

book=[]
def search(url):
    try:
        #url='https://book.douban.com/tag/%E6%97%A5%E6%9C%AC%E6%96%87%E5%AD%A6'
        r=requests.get(url)
        html=r.text
        return html
    except:
        search(url)

def parse_page(url,k):
    html=search(url)
    pattern1=re.compile('
(.*?)
',re.S) book_info=re.findall(pattern1,html) pattern2=re.compile('title="(.*?)"',re.S) book_name=re.findall(pattern2,html) pattern3=re.compile('class="rating_nums">(.*?)') book_score=re.findall(pattern3,html) if len(book_name)==0: print('爬取结束') print('共'+str(len(book))+'本') exit() for i in range(len(book_name)-1): book_tmp = [] book_tmp.append(book_name[i]) book_tmp.append(book_info[i].strip()) #book_tmp.append(book_score[i]) book.append(book_tmp) #print(len(book_name)) for i in range(k*20,len(book)-1): print('第'+str(i+1)+'本') print('书名:《'+book[i][0]+'》'+' 图书信息:'+book[i][1]) print('-----------------------------------------------------') def main(k,sname,start): url='https://book.douban.com/tag/'+sname+'?start='+str(start)+'&type=T' parse_page(url,k) if __name__ == '__main__': sname=input("请输入图书信息:") print('-------------------------开始爬取-------------------------') time.sleep(1) for i in range(10): main(i,sname,start=i*20)

 

你可能感兴趣的:(python,网络爬虫)