爬取小视频

…本来是准备在爱奇艺上抓的,但是…作为小白的我被登录验证难住了,所以就只找了一个不太那么正规的网站抓了些小视频


代码如下:

在这里插入代码片
import re
import requests
import time
import os
#from lxml import etree
#一堆库
def gethtmls(url):

    headers = {

         'Cookie': 'UM_distinctid=1677d7c96e00-0aba0d8ce198d8-37664109-144000-1677d7c96e3837; CNZZDATA5056480=cnzz_eid%3D325905205-1543995314-https%253A%252F%252Fwww.baidu.com%252F%26ntime%3D1543995314',
        'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Mobile Safari/537.36'
        }
    time.sleep(2) 
    #这个网站不太稳定,主机有时不会反应,随意就慢点
    r  = requests.get(url , headers = headers).text
    return r
    #htmls = re.findall(r'class="con".*?src=.*?.mp4',r)
    #print(htmls)
def getmap4video(html,infolist):

    href_  = re.findall(r'class="con".*?src=.*?.mp4',html)
    for i in href_:
        infolist.append(i.split('=')[-1])

def savevideo(infolist):
    headers = {

        'Cookie': 'UM_distinctid=1677d7c96e00-0aba0d8ce198d8-37664109-144000-1677d7c96e3837; CNZZDATA5056480=cnzz_eid%3D325905205-1543995314-https%253A%252F%252Fwww.baidu.com%252F%26ntime%3D1543995314',
        'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Mobile Safari/537.36'
    }
    root  = 'C://testvideo//'
    for j in infolist:
        path = root +j.split('/')[-1]
        print(path)
        videos = requests.get(j,  headers = headers)
        if not os.path.exists(path):
            with open(path,'wb') as f:
                f.write(videos.content)
                f.close()
                print('sucess')

def main():
    infolist = []
    url = 'http://www.jiefuku.com/xsp/'
    htmls = gethtmls(url)
    getmap4video(htmls,infolist)
    savevideo(infolist)
main()

欢迎各位提出不足

你可能感兴趣的:(爬取小视频)