python人人语音爬虫(登陆尚未完成,需要使用先登录在查cookie中的t)

import urllib, urllib2, cookielib,re,json

def LoginRenren(url,t_cookie):
    cookie = {"t": t_cookie}#cookie中的t需要到浏览器中去查
    cookie = "".join(x + "=" + cookie[x] + ";" for x in cookie)
    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookielib.CookieJar()))
    urllib2.install_opener(opener)
    req = urllib2.Request(url)   
    req.add_header('Cookie', cookie)
    content = urllib2.urlopen(req).read()
    return content

def searchMp3(content):    
    reMp3 = r'http://fmn.rrimg.com/fmn\d{3}/audio/\d{8}/\d{4}/\w+.mp3'
    mp3Url = re.findall(reMp3,content)
    return mp3Url

def downloadMp3(mp3Url,num):
    localPath = r"C:\Users\john\Desktop\renrenMp3\%d.mp3"%num
    urllib.urlretrieve(mp3Url,localPath)

def main(albumUrl,t_cookie):#firstpage
    num = 0
    pageNum = 0
    mp3Url= []
    while True:
        pageUrl = albumUrl+'/bypage/ajax?curPage=%d&pagenum=40'%pageNum
        jsonContent = LoginRenren(pageUrl,t_cookie)
        content = json.loads(jsonContent)
        if content["photoList"]:
            tempList = searchMp3(jsonContent)
            mp3Url = mp3Url+tempList
        else:
            break
        pageNum +=1
    print mp3Url
    for eachMp3 in mp3Url:
        num+=1
        downloadMp3(eachMp3,num)

if __name__=='__main__':
    #albumUrl = raw_input("albumUrl=")#http://photo.renren.com/photo/465457202/album-868663788
    #t_cookie = raw_input("t_cookie=")#134cc936f2785fa03902fe3185e517f64
    albumUrl = 'http://photo.renren.com/photo/465457202/album-868663788'
    t_cookie = '134cc936f2785fa03902fe3185e517f64'
    main(albumUrl,t_cookie)


你可能感兴趣的:(python,爬虫人人语音)