# -*- coding: utf-8 -*-
import scrapy,requests
from scrapy.http.cookiesimport CookieJar
import re,json
from lxmlimport etree
from scrapyimport FormRequest,Request
#第一次请求url:https://passport.aliexpress.com/mini_login.htm?appName=aebuyer
#第二次请求url:https://passport.aliexpress.com/newlogin/login.do?fromSite=13&appName=aebuyer
#第三次请求url:https://sellercenter.aliexpress.com
#第四次请求:https://login.aliexpress.com/validateST.htm?st=+sre(st)
#第五次请求:https://fund.aliexpress.com/fundIndex.htm?spm=a2g0s.9042311.0.0.60b14c4d3LOcEV
class Gin3Spider(scrapy.Spider):
name ='gin3'
start_urls ="https://passport.aliexpress.com/mini_login.htm?appName=aebuyer"
start_urls1 ='https://passport.aliexpress.com/newlogin/login.do?fromSite=13&appName=aebuyer'
# start_urls4 = 'https://fund.aliexpress.com/fundIndex.htm'
start_urls4 ='https://fund.aliexpress.com/fundIndex.htm?spm=a2g0s.9042311.0.0.60b14c4d3LOcEV'
start_urls2 ='https://sellercenter.aliexpress.com'
start_urls3 ='https://login.aliexpress.com/validateST.htm?st='
cookie_file ='./wish_newmorehot.json'
cookie_jar = CookieJar()
a_list = []
def get_cookies(self):
cookies = {}
try:
with open(self.cookie_file,'r+')as f:
data = f.read()
if data:
cookies = json.loads(data)
except FileNotFoundError as e:
with open(self.cookie_file,'w')as f:
a = {}
f.write(json.dumps(a))
return cookies
def merge_cookies(self, response):
cookie_jar =self.cookie_jar
cookie_jar.extract_cookies(response, response.request)
cookies_final =self.get_cookies()
p = re.compile(r'')
for itemin cookie_jar:
cookies = re.findall(p,str(item))
cookies = (cookie.split('=',1)for cookiein cookies)
cookies =dict(cookies)
cookies_final.update(cookies)
with open(self.cookie_file,'w')as f:
f.write(json.dumps(cookies_final))
return cookies_final
def start_requests(self):
yield scrapy.FormRequest(self.start_urls,callback=self.parse)
def parse(self, response):
with open('./login.html','wb')as f:
f.write(response.body)
yield scrapy.FormRequest(self.start_urls1,formdata={'loginId':'xxxxxxxx','password':'xxxxxxx'},cookies=self.get_cookies(),callback=self.login)
def login(self,response):
l = response.text
l=json.loads(l)
print("结果", l)
st = l.get('content').get('data').get('st')
print("st为:",st)
s =self.start_urls3+str(st)
self.a_list.append(s)
print(self.a_list)
yield scrapy.FormRequest(self.start_urls2,cookies=self.merge_cookies(response),callback=self.login_after)#qq
def login_after(self,response):
with open('./houtai.html','wb')as f:
f.write(response.body)
yield scrapy.FormRequest(self.a_list[0],cookies=self.get_cookies(),callback=self.login_agter1)
def login_agter1(self,response):
yield scrapy.FormRequest(self.start_urls4,cookies=self.merge_cookies(response),callback=self.login_agter2)
def login_agter2(self,response):
with open('./houtai1.html','wb')as f:
f.write(response.body)
doc1 = response.xpath("//table//tr[2]/td[2]/text()").extract()[0]
doc2 = response.xpath("//table//tr[2]/td[2]/text()").extract()[1]
doc3 = response.xpath("//table//tr[2]/td[2]/text()").extract()[2]
print('CNH人民币账户---可用',doc1)
print('CNY 人民币账户---可用',doc2)
print('美元账户---可用',doc3)