python3爬淘宝一个店铺内的所有商品'收藏','名字','评论数','交易成功数','价格'

最近找工作遇到一个公司给的面试的题,写一个脚本,爬取一个店铺所有商品的淘宝价,累计评论,交易成功数,宝贝收藏人气
python3爬淘宝一个店铺内的所有商品'收藏','名字','评论数','交易成功数','价格'_第1张图片
因为考虑只是一个简单demo对下载速度并没有要求所有也没有使用ip池,
脚本执行时打印的爬取到数据处理成一个列表

把爬到的数据存入了csv
python3爬淘宝一个店铺内的所有商品'收藏','名字','评论数','交易成功数','价格'_第2张图片
请求函数代码发出来吧。
因为只用半天以前也没爬过,所以些许有些潦草

# 打开店铺主页获取每个商品的id
def get_goodsId(page):
    ''''''
    url = "https://lp930428.taobao.com/i/asynSearch.htm"
#https://lp930428.taobao.com/search.htm?spm=a1z10.1-c-s.w5002-21751133133.1.6757669drR9RHI&search=y
    querystring = {"_ksTS": "1571966776769_186", "mid": "w-21751133153-0", "wid": "21751133153", "path": "/search.htm",
                   "search": "y", "spm": "a1z10.1-c-s.w5002-21751133133.1.6757669drR9RHI", "orderType": "newOn_desc",
                   "pageNo": page}

    headers = {
        'Host': "lp930428.taobao.com",
        'Cookie': "t=e3ced4867bbbb33ad8e43a6664f1b440; thw=cn; cookie2=1ae89e1b7eb4f38050c2a8484ca7f9e3; _tb_token_=fe3fdd7e7787e; enc=FCtEbwsd4d4qFaGEEU1%2Bh6zQqkT31zH7ZjTng%2ByjgFV1IKn98igFAnRQ80yP1MGcCWXDBuTJBB%2Fv2BN0HrnsFw%3D%3D; hng=CN%7Czh-CN%7CCNY%7C156; cna=cfrcFZLIz0UCAXxBtKpIjGyJ; v=0; unb=3342625761; uc3=vt3=F8dByucj%2BPu98CEsPjg%3D&lg2=VFC%2FuZ9ayeYq2g%3D%3D&id2=UNN5E%2BCNxCTmeQ%3D%3D&nk2=symzrp14SFvM8w%3D%3D; csg=e158c6a6; lgc=%5Cu5C0F%5Cu9F99cxcgao; cookie17=UNN5E%2BCNxCTmeQ%3D%3D; dnk=%5Cu5C0F%5Cu9F99cxcgao; skt=5acee91bdd655b99; existShop=MTU3MTk4NzgwNA%3D%3D; uc4=id4=0%40UgQxlJz5L5Tdls4BK%2BsdqbH4MLk8&nk4=0%40sVYTNakktI2J71HcZK%2B0xxDC%2BDng; tracknick=%5Cu5C0F%5Cu9F99cxcgao; _cc_=W5iHLLyFfA%3D%3D; tg=0; _l_g_=Ug%3D%3D; sg=o1f; _nk_=%5Cu5C0F%5Cu9F99cxcgao; cookie1=B0OsmmuZ3XRBlfT4RmnPR0sbhg2pIYLBZ6M6p3hMTnE%3D; uc1=cookie16=U%2BGCWk%2F74Mx5tgzv3dWpnhjPaQ%3D%3D&cookie21=UtASsssmeW6lpyd%2BB%2B3t&cookie15=Vq8l%2BKCLz3%2F65A%3D%3D&existShop=false&pas=0&cookie14=UoTbnKo8UDBHXA%3D%3D&tag=8&lng=zh_CN; mt=ci=0_1; l=dBgSXBauqGXlvNsDBOCwVuI8YdQOAIRAguPRwhApi_5B9_T6fvbOkZkD2E96cjWftzTB4DlUQKv9-etXiDt6Qtk8sxAJvxDc.; isg=BNLSgblL29-ZPicjbaEizx2QI550S9VMW8fzYZwrzgVwr3KphHIYjcTNG0s2304V",
        'accept': "text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01",
        'x-requested-with': "XMLHttpRequest",
        'user-agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.2 Safari/605.1.15",
        'sec-fetch-mode': "cors",
        'sec-fetch-site': "same-origin",
        'referer': "https://lp930428.taobao.com/search.htm?spm=a1z10.3-c-s.w4002-21751133153.93.1d6d1c25zjRRJk&_ksTS=1571991134154_186&callback=jsonp187&mid=w-21751133153-0&wid=21751133153&path=%2Fsearch.htm&search=y&pageNo=2",
        'accept-language': "zh-CN,zh;q=0.9",
        'cache-control': "no-cache",
        # 'Postman-Token': "798702b1-342e-4e2a-8176-c2cc057b0351"
    }

    requests.packages.urllib3.disable_warnings()
    response = requests.request("GET", url, headers=headers, params=querystring, verify=False)

    res = response.text
    # print(res)
    json_data = re.findall('itemIds=(.*?)&source', res,re.S) #解析获取到商品ID
    # print(json_data)
    return json_data
# 根据索取到的商品ID进入到商品详情页
def get_goodsDetails(i):
    url = "https://item.taobao.com/item.htm?spm=a1z10.3-c-s.w4002-21751133153.37.66ab1c25jaTgVP&id={}".format(i)

    headers = {
        'Host': "item.taobao.com",
        'Cookie': "t=e3ced4867bbbb33ad8e43a6664f1b440; thw=cn; cookie2=1ae89e1b7eb4f38050c2a8484ca7f9e3; _tb_token_=fe3fdd7e7787e; enc=FCtEbwsd4d4qFaGEEU1%2Bh6zQqkT31zH7ZjTng%2ByjgFV1IKn98igFAnRQ80yP1MGcCWXDBuTJBB%2Fv2BN0HrnsFw%3D%3D; hng=CN%7Czh-CN%7CCNY%7C156; cna=cfrcFZLIz0UCAXxBtKpIjGyJ; v=0; unb=3342625761; uc3=vt3=F8dByucj%2BPu98CEsPjg%3D&lg2=VFC%2FuZ9ayeYq2g%3D%3D&id2=UNN5E%2BCNxCTmeQ%3D%3D&nk2=symzrp14SFvM8w%3D%3D; csg=e158c6a6; lgc=%5Cu5C0F%5Cu9F99cxcgao; cookie17=UNN5E%2BCNxCTmeQ%3D%3D; dnk=%5Cu5C0F%5Cu9F99cxcgao; skt=5acee91bdd655b99; existShop=MTU3MTk4NzgwNA%3D%3D; uc4=id4=0%40UgQxlJz5L5Tdls4BK%2BsdqbH4MLk8&nk4=0%40sVYTNakktI2J71HcZK%2B0xxDC%2BDng; tracknick=%5Cu5C0F%5Cu9F99cxcgao; _cc_=W5iHLLyFfA%3D%3D; tg=0; _l_g_=Ug%3D%3D; sg=o1f; _nk_=%5Cu5C0F%5Cu9F99cxcgao; cookie1=B0OsmmuZ3XRBlfT4RmnPR0sbhg2pIYLBZ6M6p3hMTnE%3D; uc1=cookie16=U%2BGCWk%2F74Mx5tgzv3dWpnhjPaQ%3D%3D&cookie21=UtASsssmeW6lpyd%2BB%2B3t&cookie15=Vq8l%2BKCLz3%2F65A%3D%3D&existShop=false&pas=0&cookie14=UoTbnKo8UDBHXA%3D%3D&tag=8&lng=zh_CN; mt=ci=0_1; l=dBgSXBauqGXlvNsDBOCwVuI8YdQOAIRAguPRwhApi_5B9_T6fvbOkZkD2E96cjWftzTB4DlUQKv9-etXiDt6Qtk8sxAJvxDc.; isg=BNLSgblL29-ZPicjbaEizx2QI550S9VMW8fzYZwrzgVwr3KphHIYjcTNG0s2304V",
        'upgrade-insecure-requests': "1",
        'user-agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36",
        'sec-fetch-mode': "navigate",
        'sec-fetch-user': "?1",
        'accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
        'sec-fetch-site': "same-site",
        'referer': "https://lp930428.taobao.com/search.htm?spm=a1z10.1-c-s.0.0.21c2669dXxI29C&search=y",
        'accept-language': "zh-CN,zh;q=0.9",
        'cache-control': "no-cache",
        'Postman-Token': "7c7dc56c-6a76-425d-8f60-98609da11052"
    }

    requests.packages.urllib3.disable_warnings()
    response = requests.request("GET", url, headers=headers, verify=False)
    # print(response)

    res = response.text
    # print(res)

    resp = re.findall('class="tb-main-title".*?data-title="(.*?)"', res)
    count = re.findall("sign=(.*?)&", res)
    '''获取的是商品名称,和要抓取宝贝人气的链接'''
    return resp, count

#  获取累计评论数

def get_comments(i):
    url = "https://rate.taobao.com/detailCount.do?_ksTS=1571993971073_117&callback=jsonp118&itemId={}".format(i)

    headers = {
        'Host': "rate.taobao.com",
        'Cookie': "t=e3ced4867bbbb33ad8e43a6664f1b440; thw=cn; cookie2=1ae89e1b7eb4f38050c2a8484ca7f9e3; _tb_token_=fe3fdd7e7787e; enc=FCtEbwsd4d4qFaGEEU1%2Bh6zQqkT31zH7ZjTng%2ByjgFV1IKn98igFAnRQ80yP1MGcCWXDBuTJBB%2Fv2BN0HrnsFw%3D%3D; hng=CN%7Czh-CN%7CCNY%7C156; cna=cfrcFZLIz0UCAXxBtKpIjGyJ; v=0; unb=3342625761; uc3=vt3=F8dByucj%2BPu98CEsPjg%3D&lg2=VFC%2FuZ9ayeYq2g%3D%3D&id2=UNN5E%2BCNxCTmeQ%3D%3D&nk2=symzrp14SFvM8w%3D%3D; csg=e158c6a6; lgc=%5Cu5C0F%5Cu9F99cxcgao; cookie17=UNN5E%2BCNxCTmeQ%3D%3D; dnk=%5Cu5C0F%5Cu9F99cxcgao; skt=5acee91bdd655b99; existShop=MTU3MTk4NzgwNA%3D%3D; uc4=id4=0%40UgQxlJz5L5Tdls4BK%2BsdqbH4MLk8&nk4=0%40sVYTNakktI2J71HcZK%2B0xxDC%2BDng; tracknick=%5Cu5C0F%5Cu9F99cxcgao; _cc_=W5iHLLyFfA%3D%3D; tg=0; _l_g_=Ug%3D%3D; sg=o1f; _nk_=%5Cu5C0F%5Cu9F99cxcgao; cookie1=B0OsmmuZ3XRBlfT4RmnPR0sbhg2pIYLBZ6M6p3hMTnE%3D; uc1=cookie16=U%2BGCWk%2F74Mx5tgzv3dWpnhjPaQ%3D%3D&cookie21=UtASsssmeW6lpyd%2BB%2B3t&cookie15=Vq8l%2BKCLz3%2F65A%3D%3D&existShop=false&pas=0&cookie14=UoTbnKo8UDBHXA%3D%3D&tag=8&lng=zh_CN; mt=ci=0_1; l=dBgSXBauqGXlvNsDBOCwVuI8YdQOAIRAguPRwhApi_5B9_T6fvbOkZkD2E96cjWftzTB4DlUQKv9-etXiDt6Qtk8sxAJvxDc.; isg=BNLSgblL29-ZPicjbaEizx2QI550S9VMW8fzYZwrzgVwr3KphHIYjcTNG0s2304V",
        'sec-fetch-mode': "no-cors",
        'user-agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36",
        'accept': "*/*",
        'sec-fetch-site': "same-site",
        'referer': "https://item.taobao.com/item.htm?spm=a1z10.3-c-s.w4002-21751133153.48.10971c25BMetQJ&id={}".format(i),
        'accept-language': "zh-CN,zh;q=0.9",
        'cache-control': "no-cache"
        # 'Postman-Token': "0e7548b8-43dd-46e0-9467-a1310ebbc0d9"
    }

    requests.packages.urllib3.disable_warnings()
    response = requests.request("GET", url, headers=headers, verify=False)
    res = response.text
    count = re.findall('count.*?(\d+)', res)
    return count
# 获取淘宝价以及交易成功
def get_pice(i):
    url = "https://detailskip.taobao.com/service/getData/1/p1/item/detail/sib.htm"

    querystring = {"itemId": i, "sellerId": "1091756444",
                   "modules": "dynStock,qrcode,viewer,price,duty,xmpPromotion,delivery,activity,fqg,zjys,couponActivity,soldQuantity,page,originalPrice,tradeContract"}

    headers = {
        'Sec-Fetch-Mode': "no-cors",
        'Cookie': "t=e3ced4867bbbb33ad8e43a6664f1b440; thw=cn; cookie2=1ae89e1b7eb4f38050c2a8484ca7f9e3; _tb_token_=fe3fdd7e7787e; enc=FCtEbwsd4d4qFaGEEU1%2Bh6zQqkT31zH7ZjTng%2ByjgFV1IKn98igFAnRQ80yP1MGcCWXDBuTJBB%2Fv2BN0HrnsFw%3D%3D; hng=CN%7Czh-CN%7CCNY%7C156; cna=cfrcFZLIz0UCAXxBtKpIjGyJ; v=0; unb=3342625761; uc3=vt3=F8dByucj%2BPu98CEsPjg%3D&lg2=VFC%2FuZ9ayeYq2g%3D%3D&id2=UNN5E%2BCNxCTmeQ%3D%3D&nk2=symzrp14SFvM8w%3D%3D; csg=e158c6a6; lgc=%5Cu5C0F%5Cu9F99cxcgao; cookie17=UNN5E%2BCNxCTmeQ%3D%3D; dnk=%5Cu5C0F%5Cu9F99cxcgao; skt=5acee91bdd655b99; existShop=MTU3MTk4NzgwNA%3D%3D; uc4=id4=0%40UgQxlJz5L5Tdls4BK%2BsdqbH4MLk8&nk4=0%40sVYTNakktI2J71HcZK%2B0xxDC%2BDng; tracknick=%5Cu5C0F%5Cu9F99cxcgao; _cc_=W5iHLLyFfA%3D%3D; tg=0; _l_g_=Ug%3D%3D; sg=o1f; _nk_=%5Cu5C0F%5Cu9F99cxcgao; cookie1=B0OsmmuZ3XRBlfT4RmnPR0sbhg2pIYLBZ6M6p3hMTnE%3D; uc1=cookie16=U%2BGCWk%2F74Mx5tgzv3dWpnhjPaQ%3D%3D&cookie21=UtASsssmeW6lpyd%2BB%2B3t&cookie15=Vq8l%2BKCLz3%2F65A%3D%3D&existShop=false&pas=0&cookie14=UoTbnKo8UDBHXA%3D%3D&tag=8&lng=zh_CN; mt=ci=0_1; l=dBgSXBauqGXlvNsDBOCwVuI8YdQOAIRAguPRwhApi_5B9_T6fvbOkZkD2E96cjWftzTB4DlUQKv9-etXiDt6Qtk8sxAJvxDc.; isg=BNLSgblL29-ZPicjbaEizx2QI550S9VMW8fzYZwrzgVwr3KphHIYjcTNG0s2304V",
        'accept': "text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01",

        'Referer': "https://item.taobao.com/item.htm?spm=a1z10.3-c-s.w4002-21751133153.33.7ffe1c25zHNgcL&id={}".format(i),
        'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36",
        'cache-control': "no-cache"
    }

    requests.packages.urllib3.disable_warnings()
    response = requests.request("GET", url, headers=headers, params=querystring, verify=False)

    res = response.text
    # print(res)
    datadict = json.loads(res)
    # print(str)
    '''现获取交易成功数'''
    trading = datadict['data']['soldQuantity']['confirmGoodsCount'] # 交易成功的数
    pice = datadict['data']['promotion']['promoData']['def'][0]['price']

    return trading, pice
# 宝贝收藏
def get_collection(i,sing):
    url = "https://count.taobao.com/counter3?_ksTS=1571993971066_103&callback=jsonp104&inc=ICVT_7_{}&sign={}&keys=DFX_200_1_{},ICVT_7_{},ICCP_1_{},SCCP_2_101463431".format(i, sing, i, i, i)

    headers = {
        'Host': "count.taobao.com",
        'Cookie': "t=e3ced4867bbbb33ad8e43a6664f1b440; thw=cn; cookie2=1ae89e1b7eb4f38050c2a8484ca7f9e3; _tb_token_=fe3fdd7e7787e; enc=FCtEbwsd4d4qFaGEEU1%2Bh6zQqkT31zH7ZjTng%2ByjgFV1IKn98igFAnRQ80yP1MGcCWXDBuTJBB%2Fv2BN0HrnsFw%3D%3D; hng=CN%7Czh-CN%7CCNY%7C156; cna=cfrcFZLIz0UCAXxBtKpIjGyJ; v=0; unb=3342625761; uc3=vt3=F8dByucj%2BPu98CEsPjg%3D&lg2=VFC%2FuZ9ayeYq2g%3D%3D&id2=UNN5E%2BCNxCTmeQ%3D%3D&nk2=symzrp14SFvM8w%3D%3D; csg=e158c6a6; lgc=%5Cu5C0F%5Cu9F99cxcgao; cookie17=UNN5E%2BCNxCTmeQ%3D%3D; dnk=%5Cu5C0F%5Cu9F99cxcgao; skt=5acee91bdd655b99; existShop=MTU3MTk4NzgwNA%3D%3D; uc4=id4=0%40UgQxlJz5L5Tdls4BK%2BsdqbH4MLk8&nk4=0%40sVYTNakktI2J71HcZK%2B0xxDC%2BDng; tracknick=%5Cu5C0F%5Cu9F99cxcgao; _cc_=W5iHLLyFfA%3D%3D; tg=0; _l_g_=Ug%3D%3D; sg=o1f; _nk_=%5Cu5C0F%5Cu9F99cxcgao; cookie1=B0OsmmuZ3XRBlfT4RmnPR0sbhg2pIYLBZ6M6p3hMTnE%3D; uc1=cookie16=U%2BGCWk%2F74Mx5tgzv3dWpnhjPaQ%3D%3D&cookie21=UtASsssmeW6lpyd%2BB%2B3t&cookie15=Vq8l%2BKCLz3%2F65A%3D%3D&existShop=false&pas=0&cookie14=UoTbnKo8UDBHXA%3D%3D&tag=8&lng=zh_CN; mt=ci=0_1; _m_h5_tk=f1425a910c31b9877a2b467419fa4bfc_1572002857779; _m_h5_tk_enc=25325b15bf3ab20673732ed53229f569; l=dBx1kOlIq1-Yna4UBOCidweULX7OSIRAguWK8C09i_5CW6Ls_U7OkZlM8Fp6VjWfGeTB4DlUQKv9-etXmxeT62k8sxAJwxDc.; isg=BJiYNvaHccm3pl2NmLBHZTCVacCDfP38DcXpT9KJ5FOGbThXepHMm65LoeV4_bTj",
        'sec-fetch-mode': "no-cors",
        'user-agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36",
        'accept': "*/*",
        'sec-fetch-site': "same-site",
        'referer': "https://item.taobao.com/item.htm?spm=a1z10.3-c-s.w4002-21751133153.39.56a81c25HPoZfb&id={}".format(i),
        'accept-language': "zh-CN,zh;q=0.9",
        'cache-control': "no-cache",
        'Postman-Token': "1420a7da-b2b9-4249-829b-edddeb105432"
    }

    requests.packages.urllib3.disable_warnings()
    response = requests.request("GET", url, headers=headers, verify=False)

    res = response.text
    count = re.findall('\((.*?)\)', res)[0]
    resp = eval(count)["ICCP_1_{}".format(i)]
    return resp

潦草是潦草了些,希望对大家有所帮助

你可能感兴趣的:(爬虫)