import requests
import re
def get_html_text(url):
try:
r = requests.get(url, timeout=30)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except RuntimeError:
return ""
def parse_page(self, html):
try:
find_price = re.findall(r'\"view_price\"\:\"^-?([1-9]\d*|0)(\.\d{1,2})?$"', html)
find_title = re.findall(r'\"raw_title\"\:\".*?\"', html)
for i in range(len(find_price)):
price = eval(find_price[i].split(':')[1])
title = eval(find_title[i].split(':')[1])
self.append([price, title])
except RuntimeError:
print()
def print_goods_list(data):
headline = "{:4}\t{:8}\t{:16}"
print(headline.format("序号", "价格", "商品名称"))
count = 0
for i in data:
count = count + 1
print(headline.format(count, i[0], i[1]))
def main():
search_text = '鞋'
depth = 3
start_url = 'https://s.taobao.com/search?q=' + search_text
information_list = []
for i in range(depth):
try:
url = start_url + '$s=' + str(44*i)
html = get_html_text(url)
parse_page(information_list, html)
except RuntimeError:
continue
print_goods_list(information_list)
main()