直播课爬虫代码

'''爬虫阿铭linux教程,保存为本地的pdf文件'''

import re

import os

import pdfkit
import requests


if not os.path.exists("aminglinux"):
    os.mkdir("aminglinux")
os.chdir("aminglinux")

url = "http://www.apelearn.com/study_v2/"
s = requests.session()
text = s.get(url).text
print(text)
reg = re.compile(r'
  • .*
  • ') result = reg.findall(text) res = list(set(result)) pdfUrl = "http://www.apelearn.com/study_v2/" for i in res: url = "{0}{1}".format(pdfUrl, i) pdfFileName = i.replace("html", "pdf") print(pdfFileName) try: pdfkit.from_url(url, pdfFileName) except: continue

    你可能感兴趣的:(python)