
Python语言 : 导出邮箱里的联系人:支持Gmail,126,网易,搜狐,Hotmail,新浪,雅虎,MSN

#!/usr/bin/env python
from BeautifulSoup import BeautifulSoup
import os , urllib , urllib2 , pdb
import cookielib
import httplib
import csv , re

GDATA_URL = '/accounts/ClientLogin'

class MailContactError (Exception ):

class MailContact :
    def __init__ (self , username , password ):
    def login (self ):
    def get_contacts (self ):
    def get_contact_page (self ):
class GMailContact (MailContact ):
    A class to retrieve a users contacts from their Google Account.
    * BeautifulSoup.
    * That's it. :-)

    >>> g = GMailContact('[email protected]', 'password')
    >>> g.login()
    (200, 'OK')
    >>> g.get_contacts()
    >>> g.contacts
    [(u'Persons Name', '[email protected]'), ...]

    def __init__ (self , username = '[email protected]' , password = 'test' , service = 'cp' ):
        self . mail_type = ""
        self . username = username + self . mail_type
        self . password = password
        self . account_type = 'HOSTED_OR_GOOGLE'   # Allow both Google Domain and Gmail accounts
        self . service = service                   # Defaults to cp (contacts)
        self . source = 'google-data-import'       # Our application name
        self . code = ''                           # Empty by default, populated by self.login()
        self . contacts = []                       # Empty list by default, populated by self.get_contacts()
    def login (self ):
        Login to Google. No arguments.
        data = urllib . urlencode ({
            'accountType' : self . account_type ,
            'Email' : self . username ,
            'Passwd' : self . password ,
            'service' : self . service ,
            'source' : self . source
        headers = {
            'Content-type' : 'application/x-www-form-urlencoded' ,
            'Accept' : 'text/plain'
        conn = httplib . HTTPSConnection ('' )
        conn . request ('POST' , GDATA_URL , data , headers )
        response = conn . getresponse ()
        if not str (response . status ) == '200' :
            raise GdataError ("Couldn't log in. HTTP Code: %s , %s " % (response . status , response . reason ))
        d = response . read ()
        self . code = d . split (" /n " )[ 2 ] . replace ('Auth=' , '' )
        conn . close ()
        return response . status , response . reason
    def _request (self , max_results = 200 ):
        Base function for requesting the contacts. We'll allow other methods eventually
        url = '/m8/feeds/contacts/ %s /base/?max-results= %d ' % (self . username , max_results )
        headers = { 'Authorization' : 'GoogleLogin auth= %s ' % self . code }
        conn = httplib . HTTPConnection ('' )
        conn . request ('GET' , url , headers = headers )
        response = conn . getresponse ()
        if not str (response . status ) == '200' :
            raise MailContactError ("Couldn't log in. HTTP Code: %s , %s " % (response . status , response . reason ))
        page = response . read ()
        conn . close ()
        return page
    def get_contacts (self , max_results = 200 ):
        """ Parses the contacts (using BeautifulSoup) from self._request, and then populates self.contacts
        soup = BeautifulSoup (self . _request (max_results ))
        self . contacts = []
        for entry in soup . findAll ('title' ):
            if len (entry . parent . findAll ([ 'gd:email' , 'title' ])) == 2 :
                s = entry . parent . findAll ([ 'gd:email' , 'title' ])
                self . contacts . append ((s[ 0 ] . string , s[ 1 ] . get ('address' )))

class M126Contact (MailContact ):
    def __init__ (self , username , password ):
        self . mail_type = ""
        self . username = username
        self . password = password        
        self . login_host = ''
        self . login_url = '/cgi/login?redirTempName=https.htm&hid=10010102&lightweight=1&verifycookie=1&language=0&style=-1'
        self . login_data = urllib . urlencode ({
            'domain' : '' ,
            'language' : 0 ,
            'bCookie' : '' ,
            'user' : self . username ,
            'pass' : self . password ,
            'style' : - 1 ,
            'remUser' : '' ,
            'secure' : '' ,
            'enter.x' : '%B5%C7+%C2%BC'
        self . login_headers = {
            'Content-type' : 'application/x-www-form-urlencoded' ,
            'Accept' : 'text/xml,text/plain' ,
            'Refer' : ''
        self . contact_host = ''
        self . contact_url = '/coremail/fcg/ldvcapp?funcid=prtsearchres&sid= %(sid)s &listnum=200&tempname=address %% 2faddress.htm'

    def login (self ):
        conn = httplib . HTTPSConnection (self . login_host )
        conn . request ('POST' , self . login_url , self . login_data , self . login_headers )
        response = conn . getresponse ()
        if not str (response . status ) == '200' :
            raise MailContactError ("Couldn't log in. HTTP Code: %s , %s " % (response . status , response . reason ))
        #sc="Coremail=aaYgsaQsvSmKa%MBgzxnddkKzjPJUTbMddRUIgVwfeiBUd; path=/;"
        sc = response . getheader ('Set-Cookie' )
        if not sc or sc . find ("Coremail" ) == - 1 :
            raise MailContactError ("Email user %s%s password %s not correct!" % (self . username , self . mail_type , self . password ))
        cookie = sc . split ()[ 0 ]
        coremail = cookie [ cookie . find ('=' )+ 1 : cookie . find (';' )]
        sid = coremail [ coremail . find ('%' )+ 1 :]
        self . contact_url = self . contact_url % { 'sid' : sid }
        self . contact_headers = {
        'Cookie' : 'MAIL126_SSN= %(user)s ; NETEASE_SSN= %(user)s ; nts_mail_user= %(user)s ; logType=df; ntes_mail_firstpage=normal; /
        Coremail= %(coremail)s ;; mail_sid= %(sid)s ; mail_uid= %(user)s; /
        mail_style=dm3; oulink_h=520; ntes_mail_noremember=true' % { 'user' : self . username , 'coremail' : coremail , 'sid' : sid }
        conn . close ()
    def get_contact_page (self ):
        conn = httplib . HTTPConnection (self . contact_host )
        conn . request ('GET' , self . contact_url , headers = self . contact_headers )
        response = conn . getresponse ()
        if not str (response . status ) == '200' :
            raise MailContactError ("Couldn't getc contact page. HTTP Code: %s , %s " % (response . status , response . reason ))
        page = response . read ()
        conn . close ()
        return page
    def get_contacts (self ):
        page = self . get_contact_page ()
        self . contacts = []
        soup = BeautifulSoup (page )
        xmps = soup . findAll ('xmp' )
        for x in xmps :
            if x [ 'id' ] . startswith ('t' ):
                self . contacts . append ((x . contents [ 0 ], x . space . string ))

class M163Contact (MailContact ):
    def __init__ (self , username , password ):
        self . mail_type = ""
        self . username = username
        self . password = password      
        self . contacts = []  
        self . login_host = ''        
        self . login_url = '/logins.jsp?type=1&url='
        self . login_data = urllib . urlencode ({
            'verifycookie' : 1 ,
            'style' : - 1 ,
            'product' : 'mail163' ,
            'username' : self . username ,
            'password' : self . password ,
            'selType' : - 1 ,
            'remUser' : '' ,
            'secure' : 'on'
        self . login_headers = {
            'Content-type' : 'application/x-www-form-urlencoded' ,
            'Accept' : 'text/xml,text/plain' ,
            'Refer' : ''
        self . contact_host = ''

    def login (self ):
        conn = httplib . HTTPSConnection (self . login_host )
        conn . request ('POST' , self . login_url , self . login_data , self . login_headers )
        response = conn . getresponse ()
        if not str (response . status ) == '200' :
            raise MailContactError ("Couldn't log in. HTTP Code: %s , %s " % (response . status , response . reason ))
        sc1 = response . getheader ('Set-Cookie' )
            Set-Cookie: NTES_SESS=ohAWkiyj.OCjHdh1BK4ToxPcUvFX2fSLaN3FaU0cRInzLoieELdifjyqnBdk4C8qWIZkirZ7.JF.IPFDuR7BcAtKL;; path=/
            Set-Cookie: NETEASE_SSN=weafriend;; path=/; expires=Mon, 08-Jun-2009 10:42:26 GMT
            Set-Cookie: NETEASE_ADV=11&24&1212921746999;; path=/; expires=Mon, 08-Jun-2009 10:42:26 GMT
        ntes_sess , ntes_adv = None , None
        for s in sc1 . split ():
            if s. startswith ('NTES_SESS' ):
                ntes_sess = s[ s. find ('=' )+ 1 : s. find (';' )]
            elif s. startswith ('NETEASE_ADV' ):
                ntes_adv = s[ s. find ('=' )+ 1 : s. find (';' )]
        if not ntes_sess or not ntes_adv :
            raise MailContactError ("Email user %s%s password %s not correct!" % (self . username , self . mail_type , self . password ))
        url = '/coremail/fcg/ntesdoor2?lightweight=1&verifycookie=1&language=-1&style=-1&username=weafriend'
        headers = { 'cookie' : sc1 }
        conn = httplib . HTTPConnection ('' )
        conn . request ('GET' , url ,{}, headers )
        response = conn . getresponse ()
        sc2 = response . getheader ('Set-Cookie' )
        coremail = sc2 [ sc2 . find ('=' )+ 1 : sc2 . find (';' )]
        sid = coremail [ coremail . find ('%' )+ 1 :]
        self . contact_url = '/coremail/fcg/ldvcapp?funcid=prtsearchres&sid=' + sid + '&listnum=200&tempname=address %2f address.htm'
        self . contact_headers = {
        'Cookie' : 'MAIL163_SSN= %(user)s ; vjlast=1212911118; vjuids=-99d7a91f6.1156a6ea3cd.0.9e6d0e6f029e78; /
        _ntes_nuid=7118c6a1c9d16ee59a045a2e66186af8;  NTES_adMenuNum=3; /
        _ntes_nnid=7118c6a1c9d16ee59a045a2e66186af8,0|www|urs|163mail|news|ent|sports|digi|lady|tech|stock|travel|music|2008|; /
        NTES_UFC=9110001100010000000000000000000000100000000000000002331026300000; logType=-1; nts_mail_user=weafriend:-1:1; /
        Province=010; _ntes_nvst=1212911122953,|www|urs|; Coremail= %(coremail)s ; /; ntes_mail_truename=; ntes_mail_province=; ntes_mail_sex=; mail_style=js3; /; mail_sid= %(sid)s ; USERTRACK=; /
        ntes_mail_firstpage=normal; NTES_SESS=%(ntes_sess)s; /
        NETEASE_SSN= %(user)s ; NETEASE_ADV=%(ntes_adv)s' % { 'user' : self . username , 'coremail' : coremail , 'sid' : sid , 'ntes_sess' : ntes_sess , 'ntes_adv' : ntes_adv }
        return True
    def get_contact_page (self ):
        conn = httplib . HTTPConnection (self . contact_host )
        conn . request ('GET' , self . contact_url , headers = self . contact_headers )
        response = conn . getresponse ()
        if not str (response . status ) == '200' :
            raise MailContactError ("Couldn't getc contact page. HTTP Code: %s , %s " % (response . status , response . reason ))
        page = response . read ()
        conn . close ()
        return page
    def get_contacts (self ):
        page = self . get_contact_page ()
        soup = BeautifulSoup (page )
        xmps = soup . findAll ('xmp' )
        for x in xmps :
            if x [ 'id' ] . startswith ('t' ):
                self . contacts . append ((x . contents [ 0 ], x . space . string ))

class SohuContact (MailContact ):
    def __init__ (self , username , password ):
        self . mail_type = ""
        self . username = username
        self . password = password      
        self . contacts = []  
        self . login_host = ''        
        self . login_url = ''
        self . login_data = urllib . urlencode ({
            'loginid' : self . username + self . mail_type ,
            'passwd' : self . password ,
            'sg' : '5175b065623bb194e85903f5e8c43386' ,
            'eru' : '' ,
            'ru' : '' ,    
            'appid' : 1000 ,
            'fl' : '1' ,
            'ct' : 1126084880 ,
            'vr' : '1|1'        
        self . login_headers = {
            'User-agent' : 'Opera/9.23' ,
            'Content-type' : 'application/x-www-form-urlencoded' ,
            'Accept' : 'text/xml,text/plain'            
        opener = urllib2 . build_opener (urllib2 . HTTPCookieProcessor (cookielib . CookieJar ()))
        urllib2 . install_opener (opener )
        self . contact_host = ''
        self . contact_url = '/webapp/contact'

    def login (self ):
        req = urllib2 . Request (self . login_url , self . login_data )
        conn = urllib2 . urlopen (req )
        self . contact_url = os . path . dirname (conn . geturl ())+ '/contact'
    def get_contacts (self ):
        req = urllib2 . Request (self . contact_url )
        conn = urllib2 . urlopen (req )
        buf = conn . readlines ()
        import simplejson
        info = simplejson . loads (buf [ 0 ])
        for i in info [ 'listString' ]:
            self . contacts . append ((i [ 'name' ], i [ 'email' ]))

class HotmailContact (MailContact ):
    def __init__ (self , username , password ):
        self . mail_type = ""
        self . username = username
        self . password = password      
        self . contacts = []  
        self . login_host = ''        
        self . login_url = '/ppsecure/post.srf?id=2'
        self . login_data = urllib . urlencode ({
            'login' : self . username + self . mail_type ,
            'passwd' : self . password ,
            'PPSX' : 'Pass' ,
            'LoginOption' : 2 ,
            'PwdPad' : 'IfYouAreReadingThisYouHaveTooMuchFreeTime' [ 0 : - len (self . password )],
            'PPFT' : 'B1S2dWnsGTFLpX9h8fxfE*ym5OABStpt0fjo%21YICXQOy1b %21x P4dRx8F1h1w6tR8ZyLP4h3TYGS8gSZGku3j7CxQ4poqr'
        self . login_headers = {
            'Content-type' : 'application/x-www-form-urlencoded' ,
            'Accept' : 'text/xml,text/plain' ,
            'Cookie' : 'CkTst=G1213457870062; MobileProf=2AV3mTOwJEE8smIfIyq69wbCn08y6UX7910BtLhqTto2MYrNSBW5hhlEuGlMJdMwwGq1WcxtENCAI1JSyTNfrS23ArFLxDjBNk!xtbIj0iglbu8DQVg9TnSTPtHj975deR; MUID=C2DC0F9324AA47DCB05CE14B989D89C2; ANON=A=E81AEA51F927860B07BBA712FFFFFFFF&E=69f&W=2; s_lastvisit=1213455335875; MH=MSFT; wlidperf=throughput=2087.201125175809&latency=1.422; MSPRequ=lt=1213455763&co=1&id=2; MSPOK=uuid-d75c4c53-1b6e-433c-af95-c3c0175a48cd; CkTst=G1213455761093; [email protected]; MSPCID=0f45e10de2ad38c9; NAP=V=1.7&E=6b4&C=bKkGf4IbC96JLFhsoKyccKm1Kf7jjhX5I3C1ofjvyMoY3iI9j0b6gg&W=2; MSPSoftVis=@:@; BrowserSense=Win=1&Downlevel=0&WinIEOnly=0&Firefox=1&FirefoxVersion=2.0; mktstate=U=&E=en-us; mkt1=norm=en-us; s_cc=true; s_sq=%5B%5BB%5D%5D; MSPP3RD=3688532421' ,
            'Referer' : ''

        self . contact_host = ''
        self . contact_url = '/mail/GetContacts.aspx'
    def getInputValue (self , name , content ):
    def login (self ):
        # 登录过程见 %8f%96hotmail%e8%81%94%e7%b3%bb%e4%ba%ba%e5%88%97%e8%a1%a8/
        conn = httplib . HTTPSConnection (self . login_host )
        conn . request ('GET' , 'login.srf?id=2' )
        response = conn . getresponse ()
        conn = httplib . HTTPSConnection (self . login_host )
        conn . request ('POST' , self . login_url , self . login_data , self . login_headers )
        response = conn . getresponse ()
        if not str (response . status ) == '200' :
            raise MailContactError ("Couldn't getc contact page. HTTP Code: %s , %s " % (response . status , response . reason ))
        page = response . read ()
        print page
    def get_contacts (self ):
        conn = httplib . HTTPConnection (self . contact_host )
        conn . request ('GET' , self . contact_url )
        response = conn . getresponse ()
        if not str (response . status ) == '200' :
            raise MailContactError ("Couldn't getc contact page. HTTP Code: %s , %s " % (response . status , response . reason ))
        page = response . read ()
        conn . close ()
        print page

class SinaContact (MailContact ):

class YahooContact (MailContact ):

class MsnContact (MailContact ):

def get_mailcontact (user , password , mailtype ):
    if mailtype == "" :
        g = M126Contact (user , password )
    elif mailtype == "" :
            g = M163Contact (user , password )
    elif mailtype == "" :
            g = SohuContact (user , password )
    elif mailtype == "" :
            g = HotmailContact (user , password )
    elif mailtype == "" :
            g = SinaContact (user , password )   
    elif mailtype == "" :
        g = GMailContact (user , password )
    try :
        g . login ()
        g . get_contacts ()
        return g . contacts
    except :
        return []

def get_csvcontact (iter ):
    contact , name = [], None
    reader = csv . reader (iter )
    for r in reader :
        for c in r :
            if not c or not len (c . strip ()):
            m = re . search ('/w+@/w+(?:/./w+)+' , c )
            if m :
                print name , m . group (0 )
                contact . append ((name , m . group (0 )))
            else :
                name = c
    return contact

def get_imcontact (iter ):
    contact = []
    reader = csv . reader (iter )
    for r in reader :
        for c in r :
            m = re . search ('/w+@/w+(?:/./w+)+' , c )
            if m :
                print m
                contact . append ((m ))
    return contact

if __name__ == '__main__' :
    pdb . set_trace ()
    httplib . HTTPSConnection . debuglevel = 1
    httplib . HTTPConnection . debuglevel = 1    
    g = GMailContact ('***' , '***' )
    g . login ()
    g . get_contacts ()
    print g . contacts
    g = M163ContactContact ('***' , '***' )
    g . login ()
    g . get_contacts ()
    print g . contacts
