scrapy 模拟登陆

# -*- coding: utf-8 -*-

import scrapy,requests

from scrapy.http.cookiesimport CookieJar

import re,json

from lxmlimport etree

from scrapyimport FormRequest,Request

#第一次请求url:https://passport.aliexpress.com/mini_login.htm?appName=aebuyer

#第二次请求url:https://passport.aliexpress.com/newlogin/login.do?fromSite=13&appName=aebuyer

#第三次请求url:https://sellercenter.aliexpress.com

#第四次请求:https://login.aliexpress.com/validateST.htm?st=+sre(st)

#第五次请求:https://fund.aliexpress.com/fundIndex.htm?spm=a2g0s.9042311.0.0.60b14c4d3LOcEV

class Gin3Spider(scrapy.Spider):

name ='gin3'

    start_urls ="https://passport.aliexpress.com/mini_login.htm?appName=aebuyer"

    start_urls1 ='https://passport.aliexpress.com/newlogin/login.do?fromSite=13&appName=aebuyer'

    # start_urls4 = 'https://fund.aliexpress.com/fundIndex.htm'

    start_urls4 ='https://fund.aliexpress.com/fundIndex.htm?spm=a2g0s.9042311.0.0.60b14c4d3LOcEV'

    start_urls2 ='https://sellercenter.aliexpress.com'

    start_urls3 ='https://login.aliexpress.com/validateST.htm?st='

    cookie_file ='./wish_newmorehot.json'

    cookie_jar = CookieJar()

a_list = []

def get_cookies(self):

cookies = {}

try:

with open(self.cookie_file,'r+')as f:

data = f.read()

if data:

cookies = json.loads(data)

except FileNotFoundError as e:

with open(self.cookie_file,'w')as f:

a = {}

f.write(json.dumps(a))

return cookies

def merge_cookies(self, response):

cookie_jar =self.cookie_jar

cookie_jar.extract_cookies(response, response.request)

cookies_final =self.get_cookies()

p = re.compile(r'')

for itemin cookie_jar:

cookies = re.findall(p,str(item))

cookies = (cookie.split('=',1)for cookiein cookies)

cookies =dict(cookies)

cookies_final.update(cookies)

with open(self.cookie_file,'w')as f:

f.write(json.dumps(cookies_final))

return cookies_final

def start_requests(self):

yield scrapy.FormRequest(self.start_urls,callback=self.parse)

def parse(self, response):

with open('./login.html','wb')as f:

f.write(response.body)

yield scrapy.FormRequest(self.start_urls1,formdata={'loginId':'xxxxxxxx','password':'xxxxxxx'},cookies=self.get_cookies(),callback=self.login)

def login(self,response):

l = response.text

l=json.loads(l)

print("结果", l)

st = l.get('content').get('data').get('st')

print("st为:",st)

s =self.start_urls3+str(st)

self.a_list.append(s)

print(self.a_list)

yield scrapy.FormRequest(self.start_urls2,cookies=self.merge_cookies(response),callback=self.login_after)#qq

    def login_after(self,response):

with open('./houtai.html','wb')as f:

f.write(response.body)

yield scrapy.FormRequest(self.a_list[0],cookies=self.get_cookies(),callback=self.login_agter1)

def login_agter1(self,response):

yield scrapy.FormRequest(self.start_urls4,cookies=self.merge_cookies(response),callback=self.login_agter2)

def login_agter2(self,response):

with open('./houtai1.html','wb')as f:

f.write(response.body)

doc1 = response.xpath("//table//tr[2]/td[2]/text()").extract()[0]

doc2 = response.xpath("//table//tr[2]/td[2]/text()").extract()[1]

doc3 = response.xpath("//table//tr[2]/td[2]/text()").extract()[2]

print('CNH人民币账户---可用',doc1)

print('CNY 人民币账户---可用',doc2)

print('美元账户---可用',doc3)

你可能感兴趣的:(scrapy 模拟登陆)