python爬虫——淘宝书包

import re
import requests
def getHTMLText(url):
    try:
        headers={
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36',
              'cookie': 'cna=NQPPFYICwyoCAW8OdtofQqLm; tracknick=tb31551803; tg=0; enc=rmQMsTLREKmqi0wCPiaesTYWq1FQEkZvJR9RxYQe31E%2B8H%2Bgsg29O3QjlWBYNsk5B4hWdVnoyl9FmqezxIlA0A%3D%3D; thw=cn; hng=CN%7Czh-CN%7CCNY%7C156; x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0; t=8b87e0b54eedb63ec79bf6e58120539b; uc3=lg2=V32FPkk%2Fw0dUvg%3D%3D&nk2=F5RGNw0oDsOWmg%3D%3D&vt3=F8dBxdsbDKQbUlBPQ8E%3D&id2=Vy0Qmj0GGZVBZA%3D%3D; lgc=tb31551803; uc4=id4=0%40VXqe5nOGv2p3QUXK2yfB2w2AQ2Rw&nk4=0%40FY4NAqkeavMG4lO%2Fk5N%2Fb5R2Zw6o; _cc_=URm48syIZQ%3D%3D; mt=ci=118_1; JSESSIONID=790849CCFE2C514ABA531615EB3CA8E8; l=cBOcsO94QbHsxX8kBOCNquI8LPbOSIRAkuPRwCcXi_5dc6L_6w_OoSyK1Fp62jWdtfTB4JuaUM29-etkiKy06Pt-g3fP.; isg=BJ2dqUHwn35VGntR9LG1laqSrHmXutEM-IpwbF9i2fQjFr1IJwrh3GuEQQoQmOnE',
}
        r=requests.get(url,timeout=30,headers=headers)
        r.raise_for_status()
        r.encoding=r.apparent_encoding
        return r.text
    except:
        return""
       
def parsePage(ilt,html):
    try:
        plt=re.findall(r'\"view_price\":\"\d+\.\d*\"',html)
        tlt=re.findall(r'\"raw_title\":\".*?\"',html)
        for i in range(len(plt)):
            price=eval(plt[i].split('\"')[3])
            title=tlt[i].split('\"')[3]
            ilt.append([title,price])
    except:
        print("解析出错")

def printGoodsList(ilt):
    tplt="{:^4}\t{:^8}\t{:^16}"
    
    print(tplt.format("序号","价格","商品名称"))
    count=0
    for g in ilt:
        count=count+1
        print(tplt.format(count,g[0],g[1]))
if __name__ == '__main__' :
    goods="书包"
    depth=2
    start_url="https://s.taobao.com/search?q="+goods
    infoList=[]
    for i in range(depth):
        try:
            url=start_url+"$S="+str(44*i)
            html=getHTMLText(url)
            parsePage(infoList,html)
        except:
            continue
    printGoodsList(infoList)

你可能感兴趣的:(python爬虫)