1、使用cookie模拟登陆
# -*- coding: utf-8 -*-
import scrapy
# 实在没办法了,可以用这种方法模拟登录,麻烦一点,成功率100%
class RenrenSpider(scrapy.Spider):
name = "renren"
allowed_domains = ["renren.com"]
start_urls = (
'http://www.renren.com/xxxxx',
'http://www.renren.com/11111',
'http://www.renren.com/xx',
)
cookies = {
#cookie内容
}
def start_requests(self):
for url in self.start_urls:
#yield scrapy.Request(url, callback = self.parse)
#url = "http://www.renren.com/410043129/profile"
yield scrapy.FormRequest(url, cookies = self.cookies, callback = self.parse_page)
def parse_page(self, response):
print "===========" + response.url
with open("deng.html", "w") as filename:
filename.write(response.body)
2、只要是需要提供post数据的
# -*- coding: utf-8 -*-
import scrapy
# 只要是需要提供post数据的,就可以用这种方法,
# 下面示例:post数据是账户密码
class Renren1Spider(scrapy.Spider):
name = "renren1"
allowed_domains = ["renren.com"]
def start_requests(self):
url = 'http://www.renren.com/PLogin.do'
yield scrapy.FormRequest(
url = url,
formdata = {"email" : "xxx", "password" : "xxx"},
callback = self.parse_page)
def parse_page(self, response):
with open("mao2.html", "w") as filename:
filename.write(response.body)
3、 首先发送登录页面的get请求,获取到页面里的登录必须的参数,比如说zhihu的 _xsrf,然后和账户密码一起post到服务器,登录成功
# -*- coding: utf-8 -*-
import scrapy
# 正统模拟登录方法:
# 首先发送登录页面的get请求,获取到页面里的登录必须的参数,比如说zhihu的 _xsrf
# 然后和账户密码一起post到服务器,登录成功
class Renren2Spider(scrapy.Spider):
name = "renren2"
allowed_domains = ["renren.com"]
start_urls = (
"http://www.renren.com/PLogin.do",
)
def parse(self, response):
#_xsrf = response.xpath("//_xsrf").extract()[0]
yield scrapy.FormRequest.from_response(
response,
formdata = {"email" : "xxx", "password" : "xxx"},#, "_xsrf" = _xsrf},
callback = self.parse_page
)
def parse_page(self, response):
print "=========1===" + response.url
#with open("mao.html", "w") as filename:
# filename.write(response.body)
url = "http://www.renren.com/422167102/profile"
yield scrapy.Request(url, callback = self.parse_newpage)
def parse_newpage(self, response):
print "===========2====" + response.url
with open("xiao.html", "w") as filename:
filename.write(response.body)