python导出邮箱里的联系人,支持Gmail等

Python语言 : 导出邮箱里的联系人:支持Gmail,126,网易,搜狐,Hotmail,新浪,雅虎,MSN

#!/usr/bin/env python
#coding=utf-8
from BeautifulSoup import BeautifulSoup
import os , urllib , urllib2 , pdb
import cookielib
import httplib
import csv , re

GDATA_URL = '/accounts/ClientLogin'

class MailContactError (Exception ):
    pass

class MailContact :
    def __init__ (self , username , password ):
        pass
    def login (self ):
        pass
    def get_contacts (self ):
        pass
    def get_contact_page (self ):
        pass
   
class GMailContact (MailContact ):
    """
    A class to retrieve a users contacts from their Google Account.
   
    Dependencies:
    -------------
    * BeautifulSoup.
    * That's it. :-)

    Usage:
    ------
    >>> g = GMailContact('[email protected]', 'password')
    >>> g.login()
    (200, 'OK')
    >>> g.get_contacts()
    >>> g.contacts
    [(u'Persons Name', '[email protected]'), ...]


    """
    def __init__ (self , username = '[email protected]' , password = 'test' , service = 'cp' ):
        self . mail_type = "@gmail.com"
        self . username = username + self . mail_type
        self . password = password
        self . account_type = 'HOSTED_OR_GOOGLE'   # Allow both Google Domain and Gmail accounts
        self . service = service                   # Defaults to cp (contacts)
        self . source = 'google-data-import'       # Our application name
        self . code = ''                           # Empty by default, populated by self.login()
        self . contacts = []                       # Empty list by default, populated by self.get_contacts()
   
    def login (self ):
        """
        Login to Google. No arguments.
        """
        data = urllib . urlencode ({
            'accountType' : self . account_type ,
            'Email' : self . username ,
            'Passwd' : self . password ,
            'service' : self . service ,
            'source' : self . source
        })
        headers = {
            'Content-type' : 'application/x-www-form-urlencoded' ,
            'Accept' : 'text/plain'
        }
       
        conn = httplib . HTTPSConnection ('google.com' )
        conn . request ('POST' , GDATA_URL , data , headers )
        response = conn . getresponse ()
        if not str (response . status ) == '200' :
            raise GdataError ("Couldn't log in. HTTP Code: %s , %s " % (response . status , response . reason ))
           
        d = response . read ()
       
        self . code = d . split (" /n " )[ 2 ] . replace ('Auth=' , '' )
        conn . close ()
        return response . status , response . reason
   
    def _request (self , max_results = 200 ):
        """
        Base function for requesting the contacts. We'll allow other methods eventually
        """
        url = '/m8/feeds/contacts/ %s /base/?max-results= %d ' % (self . username , max_results )
       
        headers = { 'Authorization' : 'GoogleLogin auth= %s ' % self . code }
       
        conn = httplib . HTTPConnection ('www.google.com' )
        conn . request ('GET' , url , headers = headers )
        response = conn . getresponse ()
        if not str (response . status ) == '200' :
            raise MailContactError ("Couldn't log in. HTTP Code: %s , %s " % (response . status , response . reason ))
       
        page = response . read ()
        conn . close ()
        return page
   
    def get_contacts (self , max_results = 200 ):
        """ Parses the contacts (using BeautifulSoup) from self._request, and then populates self.contacts
        """
        soup = BeautifulSoup (self . _request (max_results ))
        self . contacts = []
        for entry in soup . findAll ('title' ):
            if len (entry . parent . findAll ([ 'gd:email' , 'title' ])) == 2 :
                s = entry . parent . findAll ([ 'gd:email' , 'title' ])
                self . contacts . append ((s[ 0 ] . string , s[ 1 ] . get ('address' )))
       
        return

class M126Contact (MailContact ):
    def __init__ (self , username , password ):
        self . mail_type = "@126.com"
        self . username = username
        self . password = password        
        self . login_host = 'entry.mail.126.com'
        self . login_url = '/cgi/login?redirTempName=https.htm&hid=10010102&lightweight=1&verifycookie=1&language=0&style=-1'
        self . login_data = urllib . urlencode ({
            'domain' : '126.com' ,
            'language' : 0 ,
            'bCookie' : '' ,
            'user' : self . username ,
            'pass' : self . password ,
            'style' : - 1 ,
            'remUser' : '' ,
            'secure' : '' ,
            'enter.x' : '%B5%C7+%C2%BC'
        })
        self . login_headers = {
            'Content-type' : 'application/x-www-form-urlencoded' ,
            'Accept' : 'text/xml,text/plain' ,
            'Refer' : 'http://www.126.com/'
        }
        self . contact_host = 'g2a10.mail.126.com'
        self . contact_url = '/coremail/fcg/ldvcapp?funcid=prtsearchres&sid= %(sid)s &listnum=200&tempname=address %% 2faddress.htm'
       

    def login (self ):
        conn = httplib . HTTPSConnection (self . login_host )
        conn . request ('POST' , self . login_url , self . login_data , self . login_headers )
        response = conn . getresponse ()
        if not str (response . status ) == '200' :
            raise MailContactError ("Couldn't log in. HTTP Code: %s , %s " % (response . status , response . reason ))
        #sc="Coremail=aaYgsaQsvSmKa%MBgzxnddkKzjPJUTbMddRUIgVwfeiBUd; path=/; domain=.126.com"
        #sid="MBgzxnddkKzjPJUTbMddRUIgVwfeiBUd"
        sc = response . getheader ('Set-Cookie' )
        if not sc or sc . find ("Coremail" ) == - 1 :
            #用户密码不正确
            raise MailContactError ("Email user %s%s password %s not correct!" % (self . username , self . mail_type , self . password ))
        cookie = sc . split ()[ 0 ]
        coremail = cookie [ cookie . find ('=' )+ 1 : cookie . find (';' )]
        sid = coremail [ coremail . find ('%' )+ 1 :]
        self . contact_url = self . contact_url % { 'sid' : sid }
        self . contact_headers = {
        'Cookie' : 'MAIL126_SSN= %(user)s ; NETEASE_SSN= %(user)s ; nts_mail_user= %(user)s ; logType=df; ntes_mail_firstpage=normal; /
        Coremail= %(coremail)s ;mail_host=g2a14.mail.126.com; mail_sid= %(sid)s ; mail_uid= %(user)s @126.com; /
        mail_style=dm3; oulink_h=520; ntes_mail_noremember=true' % { 'user' : self . username , 'coremail' : coremail , 'sid' : sid }
        }
        conn . close ()
       
    def get_contact_page (self ):
        conn = httplib . HTTPConnection (self . contact_host )
        conn . request ('GET' , self . contact_url , headers = self . contact_headers )
        response = conn . getresponse ()
        if not str (response . status ) == '200' :
            raise MailContactError ("Couldn't getc contact page. HTTP Code: %s , %s " % (response . status , response . reason ))
        page = response . read ()
        conn . close ()
        return page
       
    def get_contacts (self ):
        page = self . get_contact_page ()
        self . contacts = []
        soup = BeautifulSoup (page )
        xmps = soup . findAll ('xmp' )
        for x in xmps :
            if x [ 'id' ] . startswith ('t' ):
                self . contacts . append ((x . contents [ 0 ], x . space . string ))

class M163Contact (MailContact ):
    def __init__ (self , username , password ):
        self . mail_type = "@163.com"
        self . username = username
        self . password = password      
        self . contacts = []  
        self . login_host = 'reg.163.com'        
        self . login_url = '/logins.jsp?type=1&url=http://fm163.163.com/coremail/fcg/ntesdoor2?lightweight=1&verifycookie=1&language=-1&style=-1'
       
        self . login_data = urllib . urlencode ({
            'verifycookie' : 1 ,
            'style' : - 1 ,
            'product' : 'mail163' ,
            'username' : self . username ,
            'password' : self . password ,
            'selType' : - 1 ,
            'remUser' : '' ,
            'secure' : 'on'
        })
        self . login_headers = {
            'Content-type' : 'application/x-www-form-urlencoded' ,
            'Accept' : 'text/xml,text/plain' ,
            'Refer' : 'http://mail.163.com/'
        }
        self . contact_host = 'g2a10.mail.163.com'
       

    def login (self ):
        conn = httplib . HTTPSConnection (self . login_host )
        conn . request ('POST' , self . login_url , self . login_data , self . login_headers )
        response = conn . getresponse ()
        if not str (response . status ) == '200' :
            raise MailContactError ("Couldn't log in. HTTP Code: %s , %s " % (response . status , response . reason ))
       
        sc1 = response . getheader ('Set-Cookie' )
        '''
            Set-Cookie: NTES_SESS=ohAWkiyj.OCjHdh1BK4ToxPcUvFX2fSLaN3FaU0cRInzLoieELdifjyqnBdk4C8qWIZkirZ7.JF.IPFDuR7BcAtKL; domain=.163.com; path=/
            Set-Cookie: NETEASE_SSN=weafriend; domain=.163.com; path=/; expires=Mon, 08-Jun-2009 10:42:26 GMT
            Set-Cookie: NETEASE_ADV=11&24&1212921746999; domain=.163.com; path=/; expires=Mon, 08-Jun-2009 10:42:26 GMT
        '''
        ntes_sess , ntes_adv = None , None
        for s in sc1 . split ():
            if s. startswith ('NTES_SESS' ):
                ntes_sess = s[ s. find ('=' )+ 1 : s. find (';' )]
            elif s. startswith ('NETEASE_ADV' ):
                ntes_adv = s[ s. find ('=' )+ 1 : s. find (';' )]
        if not ntes_sess or not ntes_adv :
            #用户密码不正确
            raise MailContactError ("Email user %s%s password %s not correct!" % (self . username , self . mail_type , self . password ))
       
        url = '/coremail/fcg/ntesdoor2?lightweight=1&verifycookie=1&language=-1&style=-1&username=weafriend'
        headers = { 'cookie' : sc1 }
        conn = httplib . HTTPConnection ('fm163.163.com' )
        conn . request ('GET' , url ,{}, headers )
        response = conn . getresponse ()
        sc2 = response . getheader ('Set-Cookie' )
        coremail = sc2 [ sc2 . find ('=' )+ 1 : sc2 . find (';' )]
        sid = coremail [ coremail . find ('%' )+ 1 :]
        self . contact_url = '/coremail/fcg/ldvcapp?funcid=prtsearchres&sid=' + sid + '&listnum=200&tempname=address %2f address.htm'
       
       
        self . contact_headers = {
        'Cookie' : 'MAIL163_SSN= %(user)s ; vjlast=1212911118; vjuids=-99d7a91f6.1156a6ea3cd.0.9e6d0e6f029e78; /
        _ntes_nuid=7118c6a1c9d16ee59a045a2e66186af8;  NTES_adMenuNum=3; /
        _ntes_nnid=7118c6a1c9d16ee59a045a2e66186af8,0|www|urs|163mail|news|ent|sports|digi|lady|tech|stock|travel|music|2008|; /
        NTES_UFC=9110001100010000000000000000000000100000000000000002331026300000; logType=-1; nts_mail_user=weafriend:-1:1; /
        Province=010; _ntes_nvst=1212911122953,|www|urs|; Coremail= %(coremail)s ; /
        wmsvr_domain=g1a109.mail.163.com; ntes_mail_truename=; ntes_mail_province=; ntes_mail_sex=; mail_style=js3; /
        mail_host=g1a109.mail.163.com; mail_sid= %(sid)s ; USERTRACK=58.31.69.214.1212911333143304; /
        ntes_mail_firstpage=normal; NTES_SESS=%(ntes_sess)s; /
        NETEASE_SSN= %(user)s ; NETEASE_ADV=%(ntes_adv)s' % { 'user' : self . username , 'coremail' : coremail , 'sid' : sid , 'ntes_sess' : ntes_sess , 'ntes_adv' : ntes_adv }
        }
        return True
       
       
       
    def get_contact_page (self ):
        conn = httplib . HTTPConnection (self . contact_host )
        conn . request ('GET' , self . contact_url , headers = self . contact_headers )
        response = conn . getresponse ()
        if not str (response . status ) == '200' :
            raise MailContactError ("Couldn't getc contact page. HTTP Code: %s , %s " % (response . status , response . reason ))
        page = response . read ()
        conn . close ()
        return page
       
    def get_contacts (self ):
        page = self . get_contact_page ()
        soup = BeautifulSoup (page )
        xmps = soup . findAll ('xmp' )
        for x in xmps :
            if x [ 'id' ] . startswith ('t' ):
                self . contacts . append ((x . contents [ 0 ], x . space . string ))




class SohuContact (MailContact ):
    def __init__ (self , username , password ):
        self . mail_type = "@sohu.com"
        self . username = username
        self . password = password      
        self . contacts = []  
        self . login_host = 'passport.sohu.com'        
        self . login_url = 'http://passport.sohu.com/login.jsp'
        self . login_data = urllib . urlencode ({
            'loginid' : self . username + self . mail_type ,
            'passwd' : self . password ,
            'sg' : '5175b065623bb194e85903f5e8c43386' ,
            'eru' : 'http://login.mail.sohu.com/login.php' ,
            'ru' : 'http://login.mail.sohu.com/login_comm.php' ,    
            'appid' : 1000 ,
            'fl' : '1' ,
            'ct' : 1126084880 ,
            'vr' : '1|1'        
        })
        self . login_headers = {
            'User-agent' : 'Opera/9.23' ,
            'Content-type' : 'application/x-www-form-urlencoded' ,
            'Accept' : 'text/xml,text/plain'            
        }
        opener = urllib2 . build_opener (urllib2 . HTTPCookieProcessor (cookielib . CookieJar ()))
        urllib2 . install_opener (opener )
        self . contact_host = 'www50.mail.sohu.com'
        self . contact_url = '/webapp/contact'

    def login (self ):
        req = urllib2 . Request (self . login_url , self . login_data )
        conn = urllib2 . urlopen (req )
        self . contact_url = os . path . dirname (conn . geturl ())+ '/contact'
       
    def get_contacts (self ):
        req = urllib2 . Request (self . contact_url )
        conn = urllib2 . urlopen (req )
        buf = conn . readlines ()
        import simplejson
        info = simplejson . loads (buf [ 0 ])
        for i in info [ 'listString' ]:
            self . contacts . append ((i [ 'name' ], i [ 'email' ]))

class HotmailContact (MailContact ):
    def __init__ (self , username , password ):
        self . mail_type = "@hotmail.com"
        self . username = username
        self . password = password      
        self . contacts = []  
        self . login_host = 'login.live.com'        
        self . login_url = '/ppsecure/post.srf?id=2'
        self . login_data = urllib . urlencode ({
            'login' : self . username + self . mail_type ,
            'passwd' : self . password ,
            'PPSX' : 'Pass' ,
            'LoginOption' : 2 ,
            'PwdPad' : 'IfYouAreReadingThisYouHaveTooMuchFreeTime' [ 0 : - len (self . password )],
            'PPFT' : 'B1S2dWnsGTFLpX9h8fxfE*ym5OABStpt0fjo%21YICXQOy1b %21x P4dRx8F1h1w6tR8ZyLP4h3TYGS8gSZGku3j7CxQ4poqr'
        })
        self . login_headers = {
            'Content-type' : 'application/x-www-form-urlencoded' ,
            'Accept' : 'text/xml,text/plain' ,
            'Cookie' : 'CkTst=G1213457870062; MobileProf=2AV3mTOwJEE8smIfIyq69wbCn08y6UX7910BtLhqTto2MYrNSBW5hhlEuGlMJdMwwGq1WcxtENCAI1JSyTNfrS23ArFLxDjBNk!xtbIj0iglbu8DQVg9TnSTPtHj975deR; MUID=C2DC0F9324AA47DCB05CE14B989D89C2; ANON=A=E81AEA51F927860B07BBA712FFFFFFFF&E=69f&W=2; s_lastvisit=1213455335875; MH=MSFT; wlidperf=throughput=2087.201125175809&latency=1.422; MSPRequ=lt=1213455763&co=1&id=2; MSPOK=uuid-d75c4c53-1b6e-433c-af95-c3c0175a48cd; CkTst=G1213455761093; [email protected]; MSPCID=0f45e10de2ad38c9; NAP=V=1.7&E=6b4&C=bKkGf4IbC96JLFhsoKyccKm1Kf7jjhX5I3C1ofjvyMoY3iI9j0b6gg&W=2; MSPSoftVis=@:@; BrowserSense=Win=1&Downlevel=0&WinIEOnly=0&Firefox=1&FirefoxVersion=2.0; mktstate=U=&E=en-us; mkt1=norm=en-us; s_cc=true; s_sq=%5B%5BB%5D%5D; MSPP3RD=3688532421' ,
            'Referer' : 'https://login.live.com/ppsecure/post.srf?id=2&bk=1213455763'
        }

        self . contact_host = 'by120w.bay120.mail.live.com'
        self . contact_url = '/mail/GetContacts.aspx'
   
    def getInputValue (self , name , content ):
        pass
    def login (self ):
        # 登录过程见http://blog.jiexoo.com/2008/05/21/%e7%94%a8httpclient%e8%8e%b7%e5 %8f%96hotmail%e8%81%94%e7%b3%bb%e4%ba%ba%e5%88%97%e8%a1%a8/
        conn = httplib . HTTPSConnection (self . login_host )
        conn . request ('GET' , 'login.srf?id=2' )
        response = conn . getresponse ()
       
        conn = httplib . HTTPSConnection (self . login_host )
        conn . request ('POST' , self . login_url , self . login_data , self . login_headers )
        response = conn . getresponse ()
        if not str (response . status ) == '200' :
            raise MailContactError ("Couldn't getc contact page. HTTP Code: %s , %s " % (response . status , response . reason ))
        page = response . read ()
        print page
       
       
    def get_contacts (self ):
        conn = httplib . HTTPConnection (self . contact_host )
        conn . request ('GET' , self . contact_url )
        response = conn . getresponse ()
        if not str (response . status ) == '200' :
            raise MailContactError ("Couldn't getc contact page. HTTP Code: %s , %s " % (response . status , response . reason ))
        page = response . read ()
        conn . close ()
        print page

class SinaContact (MailContact ):
    pass



class YahooContact (MailContact ):
    pass

class MsnContact (MailContact ):
    pass

def get_mailcontact (user , password , mailtype ):
    if mailtype == "126.com" :
        g = M126Contact (user , password )
    elif mailtype == "163.com" :
            g = M163Contact (user , password )
    elif mailtype == "sohu.com" :
            g = SohuContact (user , password )
    elif mailtype == "hotmail.com" :
            g = HotmailContact (user , password )
    elif mailtype == "sina.com" :
            g = SinaContact (user , password )   
    elif mailtype == "gmail.com" :
        g = GMailContact (user , password )
    try :
        g . login ()
        g . get_contacts ()
        return g . contacts
    except :
        return []
       
       



def get_csvcontact (iter ):
    contact , name = [], None
    reader = csv . reader (iter )
    for r in reader :
        for c in r :
            if not c or not len (c . strip ()):
                continue
            m = re . search ('/w+@/w+(?:/./w+)+' , c )
            if m :
                print name , m . group (0 )
                contact . append ((name , m . group (0 )))
                break
            else :
                name = c
    return contact

def get_imcontact (iter ):
    contact = []
    reader = csv . reader (iter )
    for r in reader :
        for c in r :
            m = re . search ('/w+@/w+(?:/./w+)+' , c )
            if m :
                print m
                contact . append ((m ))
    return contact

if __name__ == '__main__' :
    pdb . set_trace ()
    httplib . HTTPSConnection . debuglevel = 1
    httplib . HTTPConnection . debuglevel = 1    
    g = GMailContact ('***' , '***' )
    g . login ()
    g . get_contacts ()
    print g . contacts
   
    g = M163ContactContact ('***' , '***' )
    g . login ()
    g . get_contacts ()
    print g . contacts

你可能感兴趣的:(python)