抓取淘宝直通车转化看板的数据

import datetime
import requests
import json
import pandas as pd
import time
from sqlalchemy import create_engine, Column, Integer, String, BIGINT, CHAR, Date, DECIMAL
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker

# date = datetime.date.today() - datetime.timedelta(days=1)
# print(date)
engine = create_engine("mysql+pymysql://root:123456@localhost/lcsy?charset=utf8",
                       encoding='utf-8', echo=False)
headers={
            'authority':'sycm.taobao.com',
            'method':'GET',
            'scheme':'https',
            'accept':'*/*',
            'accept-encoding':'gzip, deflate, br',
            'accept-language':'zh-CN,zh;q=0.9',
            'cookie':'thw=cn; t=a208e8363cf808d6d2e1a7bf33fb25e3; cna=3kNYFPNvukkCAWVRcNTXUgr1; hng=CN%7Czh-CN%7CCNY%7C156; tg=0; miid=1045397493995789440; l=Av//gcppIzWff4kp8aRQtKMiD9iJ/FOG; _cc_=WqG3DMC9EA%3D%3D; mt=ci%3D-1_0; uc3=id2=&nk2=&lg2=; tracknick=; cookie2=1b97d7783272db103dcbbfcc6a784cd1; _tb_token_=6bedb5b39857; x=106852162; sn=%E7%83%AD%E9%A3%8E%E6%97%97%E8%88%B0%E5%BA%97%3A%E8%AE%A2%E5%8D%95; unb=2363082805; enc=%2FgLgaiZq5VLekUnATUWW8m4jeNV87QCJtmuL1JFVo9Ch6oq526SWlqzS6DGXgJbNnn9LS1RBMufQjFknQmQQ1w%3D%3D; _m_h5_tk=eb8a4574c423853a44310455cbad9f67_1543551827519; _m_h5_tk_enc=d7ff26ad44fe571e8274b8e096a11052; linezing_session=sd9K5EeOY5pmJPIXwR0e1goU_1543542177270fwoT_16; JSESSIONID=097A5E48432B8466961771F575C8ABBB; uc1=cookie14=UoTYNc5xYR2AlQ%3D%3D&lng=zh_CN; skt=b6ac10dcf3aa1eb7; csg=ecc5e790; _euacm_ac_l_uid_=2363082805; 2363082805_euacm_ac_c_uid_=106852162; 2363082805_euacm_ac_rs_uid_=106852162; _euacm_ac_rs_sid_=57300507; _portal_version_=new; cc_gray=1; v=0; apush6f99429e2209e469e1bb2d917c674c8e=%7B%22ts%22%3A1543556527599%2C%22parentId%22%3A1543556517480%7D; isg=BAAA5-RkSMP78jMYyIL8WyTw0Y4SIeZLyG5ID3qRu5s_9aEfIp0S4_jLCR2QxZwr',
            'referer':'https://sycm.taobao.com/portal/home.htm',
            'user-agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
}

def fangkebang():
    '''
    抓取生意参谋>首页>转化看板>访客榜>商品
    :return:
    '''
    url = "https://sycm.taobao.com/portal/conversion/listItemTop.json?dateType=day&dateRange=2018-11-29%7C2018-11-29&token=5dae8b3a1"
    response = requests.get(url,headers=headers).text
    payTop_data = {}
    payTop_data['item_id'] = []
    payTop_data['pictUrl'] = []
    payTop_data['detailUrl'] = []
    payTop_data['title'] = []
    payTop_data['uv'] = []


    if response:
        response = json.loads(response)
        payTop = response['content']['data']['payTop']
        uvTop = response['content']['data']['uvTop']
        cltTop = response['content']['data']['cltTop']
        cartTop = response['content']['data']['cartTop']
        for i in uvTop:
            payTop_data['item_id'].append(i['item']['itemId'])
            payTop_data['pictUrl'].append(i['item']['pictUrl'])
            payTop_data['detailUrl'].append(i['item']['detailUrl'])
            payTop_data['title'].append(i['item']['title'])
            payTop_data['uv'].append(i['uv'])
        # print(payTop_data)
        pay_data = pd.DataFrame(payTop_data)
        # print(pay_data)
        pay_data['date_time'] = int(time.strftime("%Y%m%d"))

        pd.io.sql.to_sql(pay_data,"visitor_list",engine,schema = "lcsy",if_exists = 'append')

def zhifubang():
    '''
    抓取生意参谋>首页>转化看板>支付榜商品
    :return:
    '''
    url = "https://sycm.taobao.com/portal/conversion/listItemTop.json?dateType=day&dateRange=2018-11-29%7C2018-11-29&token=5dae8b3a1"
    response = requests.get(url,headers=headers).text
    payTop_data = {}
    payTop_data['item_id'] = []
    payTop_data['pictUrl'] = []
    payTop_data['detailUrl'] = []
    payTop_data['title'] = []
    payTop_data['payItmCnt'] = []


    if response:
        response = json.loads(response)
        payTop = response['content']['data']['payTop']
        uvTop = response['content']['data']['uvTop']
        cltTop = response['content']['data']['cltTop']
        cartTop = response['content']['data']['cartTop']
        for i in payTop:
            payTop_data['item_id'].append(i['item']['itemId'])
            payTop_data['pictUrl'].append(i['item']['pictUrl'])
            payTop_data['detailUrl'].append(i['item']['detailUrl'])
            payTop_data['title'].append(i['item']['title'])
            payTop_data['payItmCnt'].append(i['payItmCnt'])
        # print(payTop_data)
        pay_data = pd.DataFrame(payTop_data)
        # print(pay_data)
        pay_data['date_time'] = int(time.strftime("%Y%m%d"))

        pd.io.sql.to_sql(pay_data,"payment_list",engine,schema = "lcsy",if_exists = 'append')


def shoucangbang():
    '''
    抓取生意参谋>首页>转化看板>收藏榜>商品
    :return:
    '''
    url = "https://sycm.taobao.com/portal/conversion/listItemTop.json?dateType=day&dateRange=2018-11-29%7C2018-11-29&token=5dae8b3a1"
    response = requests.get(url, headers=headers).text
    payTop_data = {}
    payTop_data['item_id'] = []
    payTop_data['pictUrl'] = []
    payTop_data['detailUrl'] = []
    payTop_data['title'] = []
    payTop_data['cltByrCnt'] = []

    if response:
        response = json.loads(response)
        payTop = response['content']['data']['payTop']
        uvTop = response['content']['data']['uvTop']
        cltTop = response['content']['data']['cltTop']
        cartTop = response['content']['data']['cartTop']
        for i in cltTop:
            payTop_data['item_id'].append(i['item']['itemId'])
            payTop_data['pictUrl'].append(i['item']['pictUrl'])
            payTop_data['detailUrl'].append(i['item']['detailUrl'])
            payTop_data['title'].append(i['item']['title'])
            payTop_data['cltByrCnt'].append(i['cltByrCnt'])
        # print(payTop_data)
        pay_data = pd.DataFrame(payTop_data)
        # print(pay_data)
        pay_data['date_time'] = int(time.strftime("%Y%m%d"))

        pd.io.sql.to_sql(pay_data, "collect_list", engine, schema="lcsy", if_exists='append')

def jiagoubang():
    '''
    抓取生意参谋>首页>转化看板>加购榜>商品
    :return:
    '''
    url = "https://sycm.taobao.com/portal/conversion/listItemTop.json?dateType=day&dateRange=2018-11-29%7C2018-11-29&token=5dae8b3a1"
    response = requests.get(url, headers=headers).text
    payTop_data = {}
    payTop_data['item_id'] = []
    payTop_data['pictUrl'] = []
    payTop_data['detailUrl'] = []
    payTop_data['title'] = []
    payTop_data['cartCnt'] = []

    if response:
        response = json.loads(response)
        payTop = response['content']['data']['payTop']
        uvTop = response['content']['data']['uvTop']
        cltTop = response['content']['data']['cltTop']
        cartTop = response['content']['data']['cartTop']
        for i in cartTop:
            payTop_data['item_id'].append(i['item']['itemId'])
            payTop_data['pictUrl'].append(i['item']['pictUrl'])
            payTop_data['detailUrl'].append(i['item']['detailUrl'])
            payTop_data['title'].append(i['item']['title'])
            payTop_data['cartCnt'].append(i['cartCnt'])
        # print(payTop_data)
        pay_data = pd.DataFrame(payTop_data)
        # print(pay_data)
        pay_data['date_time'] = int(time.strftime("%Y%m%d"))

        pd.io.sql.to_sql(pay_data, "purchase_list", engine, schema="lcsy", if_exists='append')

fangkebang()
zhifubang()
shoucangbang()
jiagoubang()

你可能感兴趣的:(爬虫)