python当当图书信息抓取

import requests
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
import time
#当当图书信息抓取
def getdangdang(isbn):
    ua = UserAgent()
    headers = {'User-Agent':ua.random}
    url = 'http://search.dangdang.com/?act=input&key='+isbn
    data = requests.get(url,headers=headers)
    #print(data.text)
    soup = BeautifulSoup(data.text,'lxml')
    ul = soup.find_all('ul',{'class':'bigimg'})[0]
    #print(ul)
    li = ul.find_all('li')
    #print(li)
    titles = []
    imgsrcs = []
    prices = []
    for index in range(len(li)):
        #print(li[index].find('img'))
        data_original = li[index].find('img').get('data-original')
        #print('data_original=',data_original)
        if data_original == None:
            src = li[index].find('img').get('src')
            if src.find('http') != -1:
                imgsrcs.append(src)
        else:
            imgsrcs.append(data_original)
        titles.append(li[index].find('img').get('alt'))
        prices.append(li[index].find('p',{'class':'price'}).find('span',{'class':'search_now_price'}).get_text().replace('¥',''))
    #print(titles)
    #print(imgsrcs)
    return titles,imgsrcs,prices

if __name__=='__main__':
    print('当当图书信息抓取开始。。。')
    #time.sleep(1)
    titles,imgsrcs,prices = getdangdang('9787533735609')
    print('titles=',titles)
    print('img=',imgsrcs)
    print('prices=',prices)
    print('当当图书信息抓取结束。。。')

#以上代码仅提供参考交流学习

你可能感兴趣的:(Python,爬虫)