import requests
from bs4 import BeautifulSoup
import threading
import time
import urllib.request
url = 'http://www.mee.gov.cn/hjzl/dqhj/cskqzlzkyb/'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'}
class DouYinMusic:
def __init__(self):
self.download_path()
@staticmethod#静态方法
def download_path(self=None):
global url
global headers
r = requests.get(url,headers=headers)
r.encoding = 'UTF-8'
soup = BeautifulSoup(r.text, 'html.parser')
for tag_div in soup.findAll(True, {'class': 'main_rt_list'}):
for tag_a in tag_div.find_all('a'):
print('tag_a', tag_a.get('href')[2:])
print('tag_a', tag_a.text)
t = threading.Thread(target=self.download_pdf, args=(tag_a))
time.sleep(0.5)
t.start()
def download_pdf(tag_a):
u = urllib.request.urlopen(url + tag_a.get('href')[2:])
f = open('C://Users//m//Desktop//down//' + tag_a.text + '.pdf', 'wb')
block_sz = 8192
while True:
buffer = u.read(block_sz)
if not buffer:
break
print('=====结束==========')
f.write(buffer)
f.close()
if __name__ == '__main__':
main = DouYinMusic()
#main.run()