mechanize模拟浏览器登录人人网

mechanize是非常合适的模拟浏览器的模块,它的特点主要有:

http://blog.chinaunix.net/uid-26722078-id-3507409.html  mechanize模拟浏览器行为使用总结


1 http,https协议等。
2 简单的HTML表单填写。
3 浏览器历史记录和重载。
4 Referer的HTTP头的正确添加(可选)。
5 自动遵守robots.txt的。
6 自动处理HTTP-EQUIV和刷新。

所以你可以用mechanize来完成一些自动化浏览器想要做的事情,比如自动登录表单,自动填写表单等。


#!/usr/bin/env python
import mechanize
import cookielib
from bs4 import BeautifulSoup


br = mechanize.Browser()
cj = cookielib.LWPCookieJar()
br.set_cookiejar(cj)##关联cookies

###设置一些参数,因为是模拟客户端请求,所以要支持客户端的一些常用功能,比如gzip,referer等
br.set_handle_equiv(True)
br.set_handle_gzip(False)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)

br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)

###这个是degbug##你可以看到他中间的执行过程,对你调试代码有帮助
br.set_debug_http(True)
#br.set_debug_redirects(True)
#br.set_debug_responses(True)

br.addheaders = [('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.11) Gecko/20100701 Firefox/3.5.11')]##模拟浏览器头
response = br.open('http://www.renren.com/PLogin.do')##自己设定一个url

for f in br.forms():##有的页面有很多表单,你可以通过来查看
    print f

br.select_form(nr=0)##选择表单1,

br['email'] = 'xxx'
br['password'] = 'xxx'

br.submit()##提交表单

print 'success login'



登录163邮箱:

#!/usr/bin/env/ python
#coding=utf-8
import mechanize
import cookielib

# Cookie Jar
cj = cookielib.LWPCookieJar()

# Browser
br = mechanize.Browser()
br.set_cookiejar(cj)

# Browser options
br.set_handle_equiv(True)
#br.set_handle_gzip(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)

# Follows refresh 0 but not hangs on refresh > 0
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)

# Want debugging messages?
#br.set_debug_http(True)
#br.set_debug_redirects(True)
#br.set_debug_responses(True)

# User-Agent (http header)
#br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/4.0.0')]
br.addheaders = [('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.11) Gecko/20100701 Firefox/3.5.11')]
# HTTP access and get response pack
r = br.open("http://reg.163.com/logins.jsp?type=1&product=mail163&url=http://entry.mail.163.com/coremail/fcg/ntesdoor2?lightweight%3D1%26verifycookie%3D1%26language%3D-1%26style%3D1")
#html = r.read()
#print br.response().read().decode("utf-8")
#print r.info()
#print r.geturl()

for f in br.forms():
    print f
br.select_form(nr=0)
br['username']='xxx'
br['password']='xxx'
br.submit()
print 'sucess login'
#print br.response().read()

输出结果:

<fLogin POST https://reg.163.com/logins.jsp application/x-www-form-urlencoded
  <HiddenControl(url=http://entry.mail.163.com/coremail/fcg/ntesdoor2?lightweight=1&verifycookie=1&language=-1&style=1) (readonly)>
  <HiddenControl(product=mail163) (readonly)>
  <HiddenControl(savelogin=) (readonly)>
  <HiddenControl(outfoxer=) (readonly)>
  <HiddenControl(domains=) (readonly)>
  <HiddenControl(syscheckcode=b8bad95b10ac02219480122ee11b1c4cdc291e3c) (readonly)>
  <TextControl(username=)>
  <PasswordControl(password=)>
  <SubmitControl(Submit=) (readonly)>>
sucess login


自动搜索"computer"

# -*- coding: utf-8 -*-

import ie

url = "http://ieeexplore.ieee.org/xpl/periodicals.jsp"

myie = ie.NewIE(url)

ie.Visible(myie)

mybody = ie.GetBody(myie)
input_ids=ie.GetNodes(mybody,"input")
print input_ids

input_id_topsearch = ie.NodeByAttr(input_ids, "id", "browse_keyword")
print input_id_topsearch
input_id_topsearch.value = "computer"

div_ids=ie.GetNodes(mybody,"div")
div_browser_title_hdr = ie.NodeByAttr(div_ids, "id", "browse-title-hdr")
print div_browser_title_hdr


div_browser_input = ie.GetNodes(div_browser_title_hdr, "input")
input_id_search = ie.NodeByAttr(div_browser_input, "type", "image")
print input_id_search
input_id_search.click()

输出结果:

wKioL1O_-tuB2M24AAIJpDWCzZ8491.jpg




python模拟登陆163邮箱并获取通讯录

#-*- coding:UTF-8 -*-
import urllib,urllib2,cookielib
import xml.etree.ElementTree as etree #xml解析类

class Login163:
   #伪装browser
    header = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; rv:30.0) Gecko/20100101 Firefox/30.0'}
    #username = 'xxx'
    #passwd = 'xxx'
    cookie = None #cookie对象
    cookiefile = 'D:\cookies.dat' #cookie临时存放地
    user = ''
    
    def __init__(self,username,passwd):
        self.username = username
        self.passwd = passwd
        #cookie设置
        self.cookie = cookielib.LWPCookieJar() #自定义cookie存放
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie))
        urllib2.install_opener(opener)

   #登陆    
    def login(self):       

        #请求参数设置
        postdata = {
            'username':self.username,
            'password':self.passwd,
            'type':1
            }
        postdata = urllib.urlencode(postdata)

        #发起请求
        req = urllib2.Request(
                url='http://reg.163.com/logins.jsp?type=1&product=mail163&url=http://entry.mail.163.com/coremail/fcg/ntesdoor2?lightweight%3D1%26verifycookie%3D1%26language%3D-1%26style%3D1',
                data= postdata,#请求数据
                headers = self.header #请求头
            )

        result = urllib2.urlopen(req).read()
        result = str(result)
        #print result
        self.user = self.username.split('@')[0]

        self.cookie.save(self.cookiefile)#保存cookie
        
        if '登录成功,正在跳转...' in result:
            #print("%s 你已成功登陆163邮箱。---------\n" %(self.user))
            flag = True
        else:
            flag = '%s 登陆163邮箱失败。'%(self.user)
           
        return flag

   #获取通讯录
    def address_list(self):

        #获取认证sid
        auth = urllib2.Request(
                url='http://entry.mail.163.com/coremail/fcg/ntesdoor2?username='+self.user+'&lightweight=1&verifycookie=1&language=-1&style=1',
                headers = self.header
            )
        auth = urllib2.urlopen(auth).read()
        for i,sid in enumerate(self.cookie):#enumerate()用于同时返数字索引与数值,实际上是一个元组:((0,test[0]),(1,test[1]).......)这有点像php里的foreach 语句的作用
            sid = str(sid)
            #print sid
            if 'sid' in sid:
                sid = sid.split()[1].split('=')[1]
                break
        #print sid
        self.cookie.save(self.cookiefile)
        
        #请求地址
        url = 'http://twebmail.mail.163.com/js6/s?sid='+sid+'&func=global:sequential&showAd=false&userType=browser&uid='+self.username
        #参数设定(var 变量是必需要的,不然就只能看到:<code>S_OK</code><messages/>这类信息)
        #这里参数也是在firebug下查看的。
        postdata = {
            'func':'global:sequential',
            'showAd':'false',
            'sid':sid,
            'uid':self.username,
            'userType':'browser',
            'var':'<?xml version="1.0"?><object><array name="items"><object><string name="func">pab:searchContacts</string><object name="var"><array name="order"><object><string name="field">FN</string><boolean name="desc">false</boolean><boolean name="ignoreCase">true</boolean></object></array></object></object><object><string name="func">pab:getAllGroups</string></object></array></object>'
            }
        postdata = urllib.urlencode(postdata)
        
        #组装请求
        req = urllib2.Request(
            url = url,
            data = postdata,
            headers = self.header
            )
        res = urllib2.urlopen(req).read()
        #print res
        
        #解析XML,转换成json
        #说明:由于这样请求后163给出的是xml格式的数据,
        #为了返回的数据能方便使用最好是转为JSON
        json = []
        tree = etree.fromstring(res)
        obj = None
        for child in tree:
            #print child.tag
            #print child.attrib
            #print child.text
            #print child.getchildren
            if child.tag == 'array':
                obj = child
                #print obj #<Element 'array' at 0x21971b0>
                break
        #这里多参考一下,etree元素的方法属性等,包括attrib,text,tag,getchildren()等
        obj = obj[0].getchildren().pop() 
        for child in obj:
            for x in child:
                attr = x.attrib
                if attr['name']== 'EMAIL;PREF':
                    value = {'email':x.text}
                    #print value
                    json.append(value)
        return json
        
#Demo
print("Requesting......\n\n")
login = Login163('xxx','xxx')
flag = login.login()
if type(flag) is bool:
    print("Successful landing,Resolved contacts......\n\n")
    res = login.address_list()
    #print res
    for x in res:
        print(x['email'])
else:
    print(flag)

附结果图:

你可能感兴趣的:(shell)