批量QQ资料采集

#coding=utf8
import httplib2,os,sys,time
from bs4 import BeautifulSoup
os.chdir(sys.path[0])
try:
    from urlparse import parse_qsl,urlparse
    from urllib import urlencode,quote
except:
    from urllib.parse import parse_qsl,urlparse,urlencode,quote

conn=httplib2.Http()

sid ='AdAlwUskI5zWGvAKG22JRbQ'#这个是关键


heads={
       'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11',
       'Content-Type': 'application/x-www-form-urlencoded'
       }
'''
uid=''
pwd=''


data={
'login_url':r'http://pt.3g.qq.com/s?aid=nLogin',
'sidtype':'1',
'q_from':'',
'loginTitle':'手机腾讯网',
'bid':'0',
'qq':uid,
'pwd':pwd,
'loginType':'3',
'loginsubmit':'登录'
}

def getsid(conn=conn):
    h,c=conn.request(r'http://pt.3g.qq.com/s?aid=nLogin',headers=heads)
    cont=re.search(r'action="(.*?)"', c).group(1).replace(r'&','&')
    print cont
    h,c=conn.request(cont,method='POST',body=urlencode(data),headers=heads)
    r=urlparse(h.get('location')).query
    return dict(parse_qsl(r)).get('sid')

'''
def get(con):
    lst=[]
    a=BeautifulSoup(con)
    cont=a.findAll('div',attrs={'class':'main-module bm-gray'})
    lst.append(cont[0].text.encode('gb18030').strip().split()[1])
    lst.append(cont[1].text.encode('gb18030').strip().split()[1][:-5])
    for i in xrange(4,12):
        lst.append(cont[i].text.encode('gb18030').strip().split('\xa3\xba')[1])
    return lst
    
    
f=open('no.txt').readlines()

h,c=conn.request(r'http://pt.3g.qq.com/s?aid=nLogin3gqqbysid&3gqqsid={s}'.format(s=sid),headers=heads)
for i in f:
    h,c=conn.request(r'http://q32.3g.qq.com/g/s?sid={s}&aid=nqqUserInfo&u={u}'.format(s=sid,u=i.strip()),headers=heads)
    print '\t'.join(get(c))
    time.sleep(1)

你可能感兴趣的:(批量QQ资料采集)