import re
import requests
#获取淘宝搜索页面的html源码
def gethtmltext(url):
try:
r = requests.get(url, timeout = 30)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return "产生异常"
#使用正则表达式解析源码中商品名称和商品价格
def resolveGoodsList(urlstr, glist):
try:
namepat = re.compile(r'"raw_title":".*?"')
pricepat = re.compile(r'"view_price":"\d*.\d*"')
namels = namepat.findall(urlstr)
pricels = pricepat.findall(urlstr)
for i in range(len(pricels)):
name = eval(namels[i].split(':')[1])
price = eval(pricels[i].split(':')[1])
glist.append([name, price])
except:
print("resolveGoodsList产生异常")
#打印商品价格和商品名称
def printGoodsList(glist):
pmat = "{:4}\t{:8}\t{:16}"
print(pmat.format("序号", "价格", "商品名称"))
count = 0
for g in glist:
count = count + 1
print(pmat.format(count, g[1], g[0]))
def main():
good = "书包"
urll = "https://s.taobao.com/search?q=" + good
deep = 2
glist = []
for i in range(deep):
url = urll + '&s=' + str(44*i)
htext = gethtmltext(url)
resolveGoodsList(htext, glist)
printGoodsList(glist)
main()