下载二更的视频

下载二更的视频_第1张图片
image.png
#coding=utf-8
import re,urllib2,os,urllib,requests,json,cPickle,time

def getHtmlCode(url):
    response = urllib2.urlopen(url)
    return response.read()

def getUrls(htmlString):
    regUrl = re.compile("href=\"\/\/www.ergengtv.com\/video\/(.+?).html")
    return regUrl.findall(htmlString)

def getTitle(htmlString):
    regTitle = re.compile("title\": \"(.+?)\",")
    return regTitle.findall(htmlString)

def getMediaId(htmlString):
    regMediaId = re.compile("media_id\": (.+?),")
    return regMediaId.findall(htmlString)

def getCreateTime(htmlString):
    regCreateTime = re.compile("create_at\": (.+?),")
    return regCreateTime.findall(htmlString)

if __name__ == '__main__':
    fileMediaId = open('mediaIds.pkl', 'rb')
    mediaIdSaved = cPickle.load(fileMediaId)
    fileMediaId.close()
    print len(mediaIdSaved)
    for i in range(27,119):
        url = 'https://www.ergengtv.com/video/list/0_' + str(i) + '.html'
        htmlCode = getHtmlCode(url)
        urls = getUrls(htmlCode)
        urls = list(set(urls))
        for urlId in urls:
            url2 = 'https://www.ergengtv.com/video/' + urlId + '.html'
            htmlCode2 = getHtmlCode(url2)
            createTimes = getCreateTime(htmlCode2)
            timeString = time.localtime(float(createTimes[0]))
            createTime = time.strftime('%Y-%m-%d',timeString)
            titles = getTitle(htmlCode2)
            mediaIds = getMediaId(htmlCode2)
            mediaId = mediaIds[0]
            fileName = createTime + '--' + titles[0]
            print fileName
            if mediaId in mediaIdSaved:
                print  'exsied-------------->  '
            else:
                try:
                    apiUrl = 'https://member.ergengtv.com/api/video/vod/?id=' + mediaIds[0]
                    htmlCode3 = getHtmlCode(apiUrl)
                    decodeJson = json.loads(htmlCode3)
                    downloadUrl = decodeJson["msg"]["segs"]["1080p"][0]["url"]
                    downloadUrl = downloadUrl.replace('http', 'https')

                    urllib.urlretrieve(downloadUrl,"%s.mp4" %("videos/" + fileName))
                    print 'done'

                    mediaIdSaved.append(mediaId)
                    fileMediaId = open('mediaIds.pkl', 'wb')
                    cPickle.dump(mediaIdSaved,fileMediaId,protocol=01)
                    fileMediaId.close()
                except:
                    print "error"

你可能感兴趣的:(下载二更的视频)