淘宝爬虫之搜索商品

本篇使用python爬虫来获取淘宝商品搜索结果

import hashlib
import json
import time
from urllib import parse

import requests

# 获取淘宝搜索iphone结果 

# sid 需要登陆淘宝后获取,一般是32位长度的字符串
sid = '73dc'

# appKey 固定值,固定为'21646297'
appKey = '21646297'

# ttid 手机淘宝版本号
ttid = '701186@taobao_android_9.1.0'

# x_features 对应api功能
x_features = '27'

# pv 对应签名版本 如果需要pv6.3的请联系qq951263019
pv = '6.2'

# deviceId 设备id,可44位随机
deviceId = 'AiN8kvbdEkyD1P3CqFhYct1a7PmMab5dj804e192TcrV'

# utdid 淘宝uuid,可以24位随机
utdid = 'XcZJFF61gMADAep76BgfX2AD'


# 获取签名信息 参数data 为字符串
def get_sign(data):
    url = 'http://api.xsign.com/api/sign'  # 获取签名的地址
    params = {
        'sid': sid,
        'data': hashlib.md5(data.encode(encoding='UTF-8')).hexdigest(),  # 获取签名需要将data进行md5处理,以方便数据传输
        'api': 'mtop.taobao.wsearch.appsearch',  # 对应评论的api接口
        'v': '1.0',  # 对应api接口的版本号
        't': str(int(time.time())),  # 时间戳
        'ttid': ttid,
        'utdid': utdid,
        'deviceId': deviceId,
        'x-features': x_features,
        'appKey': appKey,
        'pv': pv,
        'dataMd5': 'true',  # 如果data进行了md5处理,那这里需要设置为'true',
        'token': '5f40',  # 获取签名参数所需的token值,长度为32位,有需要的请联系qq951263019申请
    }
    header = {'Content-Type': 'application/json'}  # 协议头
    return requests.post(url, data=json.dumps(params), headers=header).json()


def fetch_rate(x_t, x_sign, data):
    header = {
        'x-sid': sid,
        'user-Agent': 'MTOPSDK%2F3.1.1.7+%28Android%3B4.4.2%3BXiaomi%3BMI+6%29',
        'x-appkey': appKey,
        'x-ttid': parse.quote(ttid),
        'x-devid': deviceId,
        'x-features': x_features,
        'x-utdid': utdid,
        'x-pv': pv,
        'x-location': '%2C',  # 如果获取参数的时候有参数lat和lng,那这里就是lng%2Clat,本例为空则设置为%2C
        'x-t': x_t,
        'x-sign': x_sign,
    }
    url = "http://guide-acs.m.taobao.com/gw/mtop.taobao.wsearch.appsearch/1.0?data=" + parse.quote(data)
    return requests.get(url, headers=header).json()


if __name__ == '__main__':
    search_text = "iphone"
    # 搜索的参数太多了很容易搞错,先用json格式进行拼接不容易出错
    data = {
        "LBS": "{\"SG_TMCS_1H_DS\": \"{\\\"stores\\\":[]}\", \"SG_TMCS_FRESH_MARKET\": \"{\\\"stores\\\":[]}\", \"TB\": \"{\\\"stores\\\":[]}\", \"TMALL_MARKET_B2C\": \"{\\\"stores\\\":[]}\", \"TMALL_MARKET_O2O\": \"{\\\"stores\\\":[]}\"}",
        "URL_REFERER_ORIGIN": "https://s.m.taobao.com/h5entry?utparam=%7B%22ranger_buckets_native%22%3A%22tsp2189_21582_normaluser01%22%7D&spm=a2141.1.searchbar.searchbox&scm=1007.home_topbar.searchbox.d&_navigation_params=%7B%22needdismiss%22%3A%220%22%2C%22animated%22%3A%220%22%2C%22needpoptoroot%22%3A%220%22%7D",
        "_navigation_params": "{\"needdismiss\":\"0\",\"animated\":\"0\",\"needpoptoroot\":\"0\"}",
        "ad_type": "1.0",
        "apptimestamp": str(int(time.time())),
        "areaCode": "CN",
        "brand": "HUAWEI",
        "cityCode": "513300",
        "countryNum": "156",
        "device": "HUAWEI+MLA-AL10",
        "editionCode": "CN",
        "filterEmpty": "true",
        "filterUnused": "true",
        "from": "input",
        "homePageVersion": "v6",
        "imei": "863064646921168",
        "imsi": "460076404218192",
        "info": "wifi",
        "isEnterSrpSearch": "true",
        "itemfields": "commentCount,newDsr",
        "jarvisDisable": "true",
        "latitude": "",
        "layeredSrp": "true",
        "longitude": "",
        "n": "10",
        "needTabs": "true",
        "network": "wifi",
        "new_shopstar": "true",
        "page": "1",
        "q": search_text,
        "rainbow": "13407,11833,13321,14070,13885",
        "referrer": "http://m.taobao.com/index.htm",
        "schemaType": "auction",
        "scm": "1007.home_topbar.searchbox.d",
        "searchFramework": "true",
        "search_action": "initiative",
        "search_wap_mall": "false",
        "setting_on": "imgBanners,userdoc,tbcode,pricerange,localshop,smartTips,firstCat,dropbox,realsale,insertTexts,tabs",
        "showspu": "true",
        "spm": "a2141.1.searchbar.searchbox",
        "sputips": "on",
        "style": "list",
        "sversion": "8.0",
        "ttid": ttid,
        "utd_id": utdid,
        "utparam": "{\"ranger_buckets_native\":\"tsp2189_21582_normaluser01\"}",
        "vm": "nw"
    }
    data = json.dumps(data)

    print(data)

    sign_res = get_sign(data)
    print(sign_res)
    rate = fetch_rate(sign_res['x-t'], sign_res['x-sign'], data)
    print(rate)

淘宝爬虫之搜索商品_第1张图片

你可能感兴趣的:(python,爬虫)