打开www.youtube.com, 随便选择一个topic(我这里选的是lion king),再过滤选择时长小于4分钟
右键查看源代码信息
红色框内的就是视频的地址,利用正则表达式,很容易的就可以输出视频地址
# coding:utf-8
import re
import urllib
def getHtml(url):
page = urllib.urlopen(url)
html = page.read()
return html
def getUrl(html):
reg = r"(?<=a\shref=\"/watch).+?(?=\")"
urlre = re.compile(reg)
urllist = re.findall(urlre,html)
format = "https://www.youtube.com/watch%s\n"
f = open("E:\output.txt", 'a')
for url in urllist:
result = (format % url)
f.write(result)
f.close()
pages = 10
for i in range(1,pages):
html = getHtml("https://www.youtube.com/results?search_query=lion+king&lclk=short&filters=short&page=%s" % i)
print getUrl(html)
i += 1