方法一通过python的requests包:
import requests
url = "https://fanyi.baidu.com"
res = requests.get(url)
ck = res.cookies
print(ck)
print(type(ck))
print(ck.keys()) # 获取cookie中所有键名,以list格式输出
print(ck.items())
# 输出
, ]>
['BAIDUID', 'locale']
[('BAIDUID', '3A6AD66348038CBCB3BB6927F4A2CD77:FG=1'), ('locale', 'zh')]
0
1
2
3
4
5
6
7
8
9
10
11
12
13
importrequests
url="https://fanyi.baidu.com"
res=requests.get(url)
ck=res.cookies
print(ck)
print(type(ck))
print(ck.keys())# 获取cookie中所有键名,以list格式输出
print(ck.items())
# 输出
,]>
['BAIDUID','locale']
[('BAIDUID','3A6AD66348038CBCB3BB6927F4A2CD77:FG=1'),('locale','zh')]
获取cookies是通过response的cookies 属性,可以通过res.cookies["cookie_name"]的方式获取。
并且需要注意的是,这个是一个RequestCookieJar的实例,也就是说,在requests的操作里的cookie一般都是包装到了
看源码:
class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping):
def get(self, name, default=None, domain=None, path=None):
try:
return self._find_no_duplicates(name, domain, path)
except KeyError:
return default
def set(self, name, value, **kwargs):
# support client code that unsets cookies by assignment of a None value:
if value is None:
remove_cookie_by_name(self, name, domain=kwargs.get('domain'), path=kwargs.get('path'))
return
if isinstance(value, Morsel):
c = morsel_to_cookie(value)
else:
c = create_cookie(name, value, **kwargs)
self.set_cookie(c)
return c
def iterkeys(self):
for cookie in iter(self):
yield cookie.name
def keys(self):
return list(self.iterkeys())
def itervalues(self):
for cookie in iter(self):
yield cookie.value
def values(self):
return list(self.itervalues())
def iteritems(self):
for cookie in iter(self):
yield cookie.name, cookie.value
def items(self):
return list(self.iteritems())
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
classRequestsCookieJar(cookielib.CookieJar,collections.MutableMapping):
defget(self,name,default=None,domain=None,path=None):
try:
returnself._find_no_duplicates(name,domain,path)
exceptKeyError:
returndefault
defset(self,name,value,**kwargs):
# support client code that unsets cookies by assignment of a None value:
ifvalueisNone:
remove_cookie_by_name(self,name,domain=kwargs.get('domain'),path=kwargs.get('path'))
return
ifisinstance(value,Morsel):
c=morsel_to_cookie(value)
else:
c=create_cookie(name,value,**kwargs)
self.set_cookie(c)
returnc
defiterkeys(self):
forcookieiniter(self):
yieldcookie.name
defkeys(self):
returnlist(self.iterkeys())
defitervalues(self):
forcookieiniter(self):
yieldcookie.value
defvalues(self):
returnlist(self.itervalues())
defiteritems(self):
forcookieiniter(self):
yieldcookie.name,cookie.value
defitems(self):
returnlist(self.iteritems())
里面包括了很多的方法,可以根据需要进行使用,通常就是get, set,keys, itemitems等,和字典很像!
重点
如果需要在请求中添加cookie,可以实例化一个RequestCookieJar的类,然后把值set进去,最后在get,post方法里面指定cookies参数就行了,如下:
import requests
from requests.cookies import RequestsCookieJar
url = "http://fanyi.baidu.com/v2transapi"
cookie_jar = RequestsCookieJar()
cookie_jar.set("BAIDUID", "B1CCDD4B4BC886BF99364C72C8AE1C01:FG=1", domain="baidu.com")
res = requests.get(url, cookies=cookie_jar)
print res.status_code
# 输出 200 则为正确
0
1
2
3
4
5
6
7
8
9
10
11
importrequests
fromrequests.cookiesimportRequestsCookieJar
url="http://fanyi.baidu.com/v2transapi"
cookie_jar=RequestsCookieJar()
cookie_jar.set("BAIDUID","B1CCDD4B4BC886BF99364C72C8AE1C01:FG=1",domain="baidu.com")
res=requests.get(url,cookies=cookie_jar)
printres.status_code
# 输出 200 则为正确
方法二使用python的cookielib包:
import cookielib,urllib2
loginUrl = "https://fanyi.baidu.com"
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
urllib2.install_opener(opener)
res = urllib2.urlopen(loginUrl)
opener.close()
for index, cookie in enumerate(cj):
print '[', index, ']', cookie
输出:
[ 0 ]
[ 1 ]
0
1
2
3
4
5
6
7
8
9
10
11
12
13
importcookielib,urllib2
loginUrl="https://fanyi.baidu.com"
cj=cookielib.CookieJar()
opener=urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
urllib2.install_opener(opener)
res=urllib2.urlopen(loginUrl)
opener.close()
forindex,cookieinenumerate(cj):
print'[',index,']',cookie
输出:
[0]
[1]
方法三使用python的httplib包:
import httplib
cj = ''
header = {'Host': 'fanyi.baidu.com',
'Accept-Language': 'zh-CN',
'Connection': 'Keep-Alive',
'Accept-Encoding': 'gzip,deflate',
'Accept': 'text/html, application/xhtml+xml, */*',
'User-Agent': 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/28.0.1500.71 Chrome/28.0.1500.71 Safari/537.36'
}
# con = httplib.HTTPConnection('fanyi.baidu.com') # 因为网站使用的是https协议,所以res.status会返回301重定向
con = httplib.HTTPSConnection('fanyi.baidu.com') # 建立请求:fanyi.baidu.com/v2transapi
con.request(method='GET', url='/v2transapi', headers=header)
res = con.getresponse()
res.read() # 调用read函数以后,才能获取content。
con.close()
print(res.status) # 返回状态码
if res.getheader('Set-Cookie') != None: # 判断是否存在Set-Cookie,有的话,将cookie保存起来
print(res.getheaders()) # 获取所有头部信息
print(res.getheader('Set-Cookie')) # 获取远程服务器响应后设置的全部Cookie信息
cj = res.getheader('Set-Cookie').split(';')[0]
print(cj)
else:
print('got no cookie')
exit()
# 输出
200
[('content-length', '57'), ('content-encoding', 'gzip'), ('set-cookie', 'locale=zh; expires=Sat, 01-Feb-2020 03:26:10 GMT; path=/; domain=.baidu.com, BAIDUID=F3B1D486AEEF5CB69BCDBF801064CBEE:FG=1; expires=Mon, 06-Apr-20 03:26:10 GMT; max-age=31536000; path=/; domain=.baidu.com; version=1'), ('vary', 'Accept-Encoding'), ('server', 'Apache'), ('date', 'Sun, 07 Apr 2019 03:26:10 GMT'), ('p3p', 'CP=" OTI DSP COR IVA OUR IND COM "'), ('content-type', 'application/json')]
locale=zh; expires=Sat, 01-Feb-2020 03:26:10 GMT; path=/; domain=.baidu.com, BAIDUID=F3B1D486AEEF5CB69BCDBF801064CBEE:FG=1; expires=Mon, 06-Apr-20 03:26:10 GMT; max-age=31536000; path=/; domain=.baidu.com; version=1
locale=zh
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
importhttplib
cj=''
header={'Host':'fanyi.baidu.com',
'Accept-Language':'zh-CN',
'Connection':'Keep-Alive',
'Accept-Encoding':'gzip,deflate',
'Accept':'text/html, application/xhtml+xml, */*',
'User-Agent':'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/28.0.1500.71 Chrome/28.0.1500.71 Safari/537.36'
}
# con = httplib.HTTPConnection('fanyi.baidu.com') # 因为网站使用的是https协议,所以res.status会返回301重定向
con=httplib.HTTPSConnection('fanyi.baidu.com')# 建立请求:fanyi.baidu.com/v2transapi
con.request(method='GET',url='/v2transapi',headers=header)
res=con.getresponse()
res.read()# 调用read函数以后,才能获取content。
con.close()
print(res.status)# 返回状态码
ifres.getheader('Set-Cookie')!=None:# 判断是否存在Set-Cookie,有的话,将cookie保存起来
print(res.getheaders())# 获取所有头部信息
print(res.getheader('Set-Cookie'))# 获取远程服务器响应后设置的全部Cookie信息
cj=res.getheader('Set-Cookie').split(';')[0]
print(cj)
else:
print('got no cookie')
exit()
# 输出
200
[('content-length','57'),('content-encoding','gzip'),('set-cookie','locale=zh; expires=Sat, 01-Feb-2020 03:26:10 GMT; path=/; domain=.baidu.com, BAIDUID=F3B1D486AEEF5CB69BCDBF801064CBEE:FG=1; expires=Mon, 06-Apr-20 03:26:10 GMT; max-age=31536000; path=/; domain=.baidu.com; version=1'),('vary','Accept-Encoding'),('server','Apache'),('date','Sun, 07 Apr 2019 03:26:10 GMT'),('p3p','CP=" OTI DSP COR IVA OUR IND COM "'),('content-type','application/json')]
locale=zh;expires=Sat,01-Feb-202003:26:10GMT;path=/;domain=.baidu.com,BAIDUID=F3B1D486AEEF5CB69BCDBF801064CBEE:FG=1;expires=Mon,06-Apr-2003:26:10GMT;max-age=31536000;path=/;domain=.baidu.com;version=1
locale=zh
方法四使用python的selenium包:
用的比较少的selenium包,用于模拟登陆并获取cookie。
import time,random
from selenium import webdriver
import requests
from urllib import request
from lxml import etree
driver = webdriver.Chrome(executable_path=r'/Applications/Google Chrome.app/chromedriver')
driver.get('http://www.renren.com/PLogin.do')
time.sleep(2)
driver.find_element_by_id('email').clear()
driver.find_element_by_id('email').send_keys('myusername') # 输入用户名
driver.find_element_by_id('password').clear()
driver.find_element_by_id('password').send_keys('mypassword') # 输入密码
img_url = 'http://icode.renren.com/getcode.do?t=web_login&rnd='+str(random.random())
request.urlretrieve(img_url,'renren_yzm.jpg')
try:
driver.find_element_by_id('icode').clear()
img_res = input('输入验证码:') # 如果需要输入验证码,可以手工,或者接口给打码平台
driver.find_element_by_id('icode').send_keys(img_res)
except:
pass
driver.find_element_by_id('autoLogin').click() # 自动登陆
driver.find_element_by_id('login').click() # 登陆
time.sleep(3)
cookie_items = driver.get_cookies() # 获取cookie值
post = {} # 保存cookie值
for cookie in cookie_items:
post[cookie['name']] = cookie['value']
print(post['t']) # 人人网登陆后需要保持登陆的cookie信息
driver.quit() # 退出selenium
# ------------------------------------------------------------
url = 'http://www.renren.com/265025131/profile'
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
'Cookie':'t='+post['t'],
}
response = requests.get(url,headers=headers)
print('-'*50)
html = etree.HTML(response.text)
title = html.xpath('//title/text()')
print('目前得到的页面信息',title)
print(response.url)
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
importtime,random
fromseleniumimportwebdriver
importrequests
fromurllibimportrequest
fromlxmlimportetree
driver=webdriver.Chrome(executable_path=r'/Applications/Google Chrome.app/chromedriver')
driver.get('http://www.renren.com/PLogin.do')
time.sleep(2)
driver.find_element_by_id('email').clear()
driver.find_element_by_id('email').send_keys('myusername')# 输入用户名
driver.find_element_by_id('password').clear()
driver.find_element_by_id('password').send_keys('mypassword')# 输入密码
img_url='http://icode.renren.com/getcode.do?t=web_login&rnd='+str(random.random())
request.urlretrieve(img_url,'renren_yzm.jpg')
try:
driver.find_element_by_id('icode').clear()
img_res=input('输入验证码:')# 如果需要输入验证码,可以手工,或者接口给打码平台
driver.find_element_by_id('icode').send_keys(img_res)
except:
pass
driver.find_element_by_id('autoLogin').click()# 自动登陆
driver.find_element_by_id('login').click()# 登陆
time.sleep(3)
cookie_items=driver.get_cookies()# 获取cookie值
post={}# 保存cookie值
forcookieincookie_items:
post[cookie['name']]=cookie['value']
print(post['t'])# 人人网登陆后需要保持登陆的cookie信息
driver.quit()# 退出selenium
# ------------------------------------------------------------
url='http://www.renren.com/265025131/profile'
headers={
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
'Cookie':'t='+post['t'],
}
response=requests.get(url,headers=headers)
print('-'*50)
html=etree.HTML(response.text)
title=html.xpath('//title/text()')
print('目前得到的页面信息',title)
print(response.url)
还有很多方法可以实现获取cookie和设置cookie,有待学习研究。