所用工具:requests库,BeautifulSoup库
编程语言:python
软件版本:python3
本次爬取所选用的页面为京东搜索书包关键词所显示出来的页面,网址为https://search.jd.com/Search?keyword=%E4%B9%A6%E5%8C%85&enc=utf-8&wq=%E4%B9%A6%E5%8C%85&pvid=cacd21b19bb746d4927aae1ea15028e2
下图为搜索出来的结果:
分析网页代码,找到商品标题和价格所在部分,写出代码
代码如下:
import requests
from bs4 import BeautifulSoup
def gettext(url):
try:
r=requests.get(url)
r.encoding='utf-8'
r.raise_for_status
return r.text
except:
return "访问失败"
def getinfo(text,title,price):
try:
soup=BeautifulSoup(text,"html.parser")
names=soup.find_all("div",class_="p-name p-name-type-2")
prices=soup.find_all("div",class_="p-price")
print(type(names))
print(type(prices))
for i in range(len(names)):
title.append(names[i].a.em.text)
price.append(prices[i].strong.i.text)
except:
print("数据获取失败")
def printinfo(namess,pricess):
t="{0:^3}\t{1:^8}\t{2:^50}"
for i in range(len(namess)):
print(t.format(i+1,pricess[i],namess[i]))
def main():
url="https://search.jd.com/Search?keyword=%E4%B9%A6%E5%8C%85&enc=utf-8&wq=%E4%B9%A6%E5%8C%85&pvid=f868df094a8e4b5aaeb08d8ab0d906b2"
NAME=[]
Price=[]
texte=gettext(url)
getinfo(texte,NAME,Price)
printinfo(NAME,Price)
main()