爬取视频案例

1.在python中要爬取使用的模块requests模块完成相应的操作

', detail_resp.text).group(1) # 详情 content = re.search('
(.*?)
', detail_resp.text).group(1) # 视频地址 video_url = re.search('srcUrl="(.*?)"',detail_resp.text).group(1) dic = {"title": title, "subdate": subdate, "f_count": f_count, "author": author, "content": content,"video_url":video_url} # 开始下载视频文件 # download_video(video_url,title) pool.submit(download_video,video_url,title) # 异步提交任务到线程池 datas.append(dic) # 请求首页列表 def get_page_data(categoryId): url = "https://www.pearvideo.com/category_loading.jsp?reqType=5&categoryId=%s&start=" % categoryId for i in range(page_num): url1 = url + str(i * 12) #一页显示12 显示多页就是多页*12 resp = requests.get(url1) if resp.status_code == 200: print("请求成功返回!") get_details(resp) def download_video(video_url,video_name): print("开始下载",video_name) resp = requests.get(video_url) dir = os.path.dirname(__file__) video_name = video_name.replace('"',"") #当标题出现特殊字符,转义下 video_name = video_name.replace('?', "") file_path = os.path.join(dir,"videos",video_name+".mp4") #文件名拼接下 if os.path.exists(file_path): print(video_name,"+++++++++++++++++++++已经下载过了!") return with open(file_path,"wb") as f: f.write(resp.content) #注意resp.content 是显示二进制形式,用于图片,视频 #如果是resp.test 是显示文本形式的 字符串 pass # 将数据写入json文件 def write_json(): with open("datas.json", "wt") as f: json.dump(datas, f) if __name__ == '__main__': # 开启线程池 pool = ThreadPoolExecutor() get_page_data(31) # 写入 write_json()
爬取存储过程

 

转载于:https://www.cnblogs.com/gukai/p/10687414.html

你可能感兴趣的:(爬取视频案例)