京东 Python卫龙辣条

from bs4 import BeautifulSoup
import requests
import datetime
import time
import os
import random
import re
import json
import pickle


cookies = {}
headers = {}
sess = requests.Session()

def get_strtime_now():
    return datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
def login_by_QR():
    try:
        print('%s 请打开京东手机客户端,准备扫码登录' %(get_strtime_now()))
        url_login = 'https://passport.jd.com/new/login.aspx'

        # step1 open login page
        resp = sess.get(url_login)
        if resp.status_code != requests.codes.OK:
            print("%s 获取登录界面失败" %(get_strtimr_now()))
            return False
        ## save cookies
        for k, v in resp.cookies.items():
            cookies[k] = v
            
        # step2 get qr image
        url_show_qr = 'https://qr.m.jd.com/show'
        resp = sess.get(
            url_show_qr,
            cookies=cookies,
            params={
                'appid':133,
                'size':147,
                't':(time.time()*1000)
                }
            )
        if resp.status_code != requests.codes.OK:
            print('%s 获取二维码失败' %(get_strtime_now()))
            return False
        ## save cookies
        for k, v in resp.cookies.items():
            cookies[k] = v
        ## save qr code
        image_file = 'qr.png'
        with open(image_file, 'wb') as f:
            for chunk in resp.iter_content(chunk_size=1024):
                f.write(chunk)
        ## open qr code
        os.system('start '+image_file)

        #step3 check scan result
        url_check = 'https://qr.m.jd.com/check'
        retry_times = 100
        qr_ticket = None
        headers['Host'] = 'qr.m.jd.com'
        headers['Referer'] = 'https://passport.jd.com/new/login.aspx'
        while retry_times:
            retry_times -= 1
            resp = sess.get(
                url_check,
                headers = headers,
                cookies = cookies,
                params = {
                    'callback':'jQuery%u' % random.randint(1000000,9999999),
                    'appid': 133,
                    'token':cookies['wlfstk_smdl'],
                    '_':int((time.time()*1000))
                    }
                )
            if resp.status_code != requests.codes.OK:
                continue;
            js_data = json.loads(re.match(r'jQuery\d+\(([\s\S]+)\)', resp.text)[1])
            if js_data['code'] == 200:
                qr_ticket = js_data['ticket']
                break
            else:
                print('%s %s' %(get_strtime_now(), js_data['msg']))
            time.sleep(3)
        if not qr_ticket:
            print('%s 二维码登录失败' %(get_strtime_now()))
            return False

        #step4 validate scan result
        headers['Host'] = 'passport.jd.com'
        headers['Referer'] = 'https://passport.jd.com/uc/login?ltype=logout'
        url_ticket = 'https://passport.jd.com/uc/qrCodeTicketValidation'
        resp = sess.get(
            url_ticket,
            headers = headers,
            cookies = cookies,
            params = {
                't' : qr_ticket
                }
            )
        if resp.status_code != requests.codes.OK:
            print('%s 二维码登录校验失败' %(get_strtime_now()))
            return False
        #print(resp.text)
        #print(resp.headers)
        if not resp.headers.get('P3P'):
            if json.loads(resp.text).has_key('url'):
                print('%s 需要手动安全验证' %(get_strtime_now()))
                return False
            else:
                print('%s %s' %(get_strtime_now(), resp.text))
                return False
        ## login succeed
        headers['P3P'] = resp.headers['P3P']
        for k, v in resp.cookies.items():
            cookies[k] = v
        with open('cookie','wb') as f:
            pickle.dump(cookies, f)
        print('%s 登录成功' %(get_strtime_now()))
        return True
    except Exception as e:
        print('%s 异常 %s' %(get_strtime_now(), e))
        raise
    return False

def good_detail(stock_id, area_id=None):
    good_data = {
        'id' : stock_id,
        'name': '',
        'link': '',
        'price': '',
        'stock': '',
        'stockname': '',
        }
    try:
        stock_link = 'https://item.jd.com/{0}.html'.format(stock_id)
        resp = sess.get(stock_link)
        soup = BeautifulSoup(resp.text, 'html.parser')
        tags = soup.select('div#name h1')
        if len(tags) == 0:
            tags = soup.select('div.sku-name')
        good_data['name'] = re.match(r'.+\s+(.+)
',str(tags[0]))[1].strip(' ') print(good_data['name']) except Exception as e: print('Exp {0} : good_detail()'.format(e)) if __name__ == '__main__': stock_id = '29954742610' #login_by_QR() good_detail(stock_id)

你可能感兴趣的:(笔记)