python学习之爬取淘宝商品信息

import re
import requests
#获取淘宝搜索页面的html源码
def gethtmltext(url):
    try:
        r = requests.get(url, timeout = 30)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except:
        return "产生异常"
#使用正则表达式解析源码中商品名称和商品价格
def resolveGoodsList(urlstr, glist):
    try:
        namepat = re.compile(r'"raw_title":".*?"')
        pricepat = re.compile(r'"view_price":"\d*.\d*"')
        namels = namepat.findall(urlstr)
        pricels = pricepat.findall(urlstr)
        for i in range(len(pricels)):
            name = eval(namels[i].split(':')[1])
            price = eval(pricels[i].split(':')[1])
            glist.append([name, price])
    except:
        print("resolveGoodsList产生异常")    


#打印商品价格和商品名称
def printGoodsList(glist):
    pmat = "{:4}\t{:8}\t{:16}"
    print(pmat.format("序号", "价格", "商品名称"))
    count = 0
    for g in glist:
        count = count + 1
        print(pmat.format(count, g[1], g[0]))


def main():
    good = "书包"
    urll = "https://s.taobao.com/search?q=" + good
    deep = 2
    glist = []
    for i in range(deep):
        url = urll + '&s=' + str(44*i)
        htext = gethtmltext(url)
        resolveGoodsList(htext, glist)
    printGoodsList(glist)


main()

你可能感兴趣的:(Python)