from selenium import webdriver
from selenium.webdriver.common.by import By
import time,os,re
from requests_html import HTMLSession
session = HTMLSession()
class Spider():
os_path = os.getcwd()+'/深夜网易云/'
if not os.path.exists(os_path):
os.mkdir(os_path)
def __init__(self):
'''第一步准备数据,(url,headers,parms)'''
self.url = 'https://music.163.com/'
self.opt = webdriver.ChromeOptions()
self.opt.add_experimental_option('excludeSwitches', ['enable-automation'])
self.browser = webdriver.Chrome(options=self.opt)
self.browser.maximize_window()
def start_url(self):
'''第二步 发送请求'''
self.browser.get(self.url)
time.sleep(1.5)
iframe = self.browser.find_element(By.NAME,'contentFrame')
self.browser.switch_to.frame(iframe)
a_obj = self.browser.find_element(By.XPATH,'//*[@id="top-flag"]/dl[1]/dd/div/a').get_attribute('href')
headers = {
'cookie':'NMTID=00OReIe5LbjUauGR0BCscJMqTECeaQAAAGNvK3XVg; JSESSIONID-WYYY=guJquI7U6hAEKTkyyym5ppz2vdDq3%5Cp%2BdZVygmYOZ8YPAXsabSI7VSC53Cm2RA5fce89lFgJYPKJW%2BoZ%5CcZYz3lsfD88xE4BY1W5Kot9%2FqyCRpK%5CTHA%2F1gxf05zIn5c3cia%2BQQz%2By8UVb6eBso7cao%2FrTNXqtmlmIoT%2F%5CfoBfIljoVyz%3A1708269327449; _iuqxldmzr_=32; _ntes_nnid=9e5ee18c19e28f36ccb58dfd8ab6762b,1708267527482; _ntes_nuid=9e5ee18c19e28f36ccb58dfd8ab6762b; WEVNSM=1.0.0; WNMCID=kwzsgs.1708267527681.01.0; ntes_utid=tid._.PIG1WIXSIOBEEgRERULA44u1jfOyvTQI._.0; sDeviceId=YD-I0tfPeHFkd1BAxRUQBKQs9%2F0yOej%2BCbK; WM_NI=ZQYDmzoD8uTPA4VtS6OOFQRPV5%2FhEYavb282%2BKCTwX0o1NAPX%2FMNr6WzdL8TIeC4XKftxnHe%2F6mNRXN%2Bz4p8G%2BUuP%2BJVnyV6iDB1HSIdp5oqsRX%2F2WL1IEPDEqZOw9TcVHk%3D; WM_NIKE=9ca17ae2e6ffcda170e2e6ee8ecb6fb1ecfdaff97af5928bb6d44f938f8a87c834f8e9a88fe86ead9ff9a6e42af0fea7c3b92a9a8f97a5f072ad9cf896d75c83ec9a95d23e8d918ab2d15c96ebe189c853a3aaaed3bb3ae9958db5f654f4b1ffafc45c9cf183aff13e95a6acadbb608bbb8dd5db41a6acb88fcb6fedeaa3afd1548ef5b6d5f33485f59b96d3218a8aa29ad53eab9c8ed2cb598bbcc0d6ee7bb18e98d0b15d90ef87a6d35087af96b0f63398bd9aa9ea37e2a3; WM_TID=kbi865lee79ARAQUBFeA55uwzab8UvEP',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
}
response = session.get(a_obj,headers=headers).html
music_id = response.xpath('//ul[@class="f-hide"]/li/a/@href')
music_name = response.xpath('//ul[@class="f-hide"]/li/a/text()')
self.parse_data(music_id,music_name)
def parse_data(self,music_id,music_name):
'''第三步 解析'''
for music_ids,music_names in zip(music_id,music_name):
music_ids = music_ids[6:]
url = 'http://music.163.com/song/media/outer/url?'+music_ids
print(url)
print(music_names)
print('=======')
data = session.get(url).content
self.save_data(data,music_names)
def save_data(self,data,mp3_name):
'''保存'''
song_name = re.sub('[\/:*?"<>]','-',mp3_name)
with open (self.os_path+song_name+'.mp3','wb')as f:
f.write(data)
print(f"歌曲{mp3_name}保存成功")
if __name__ == '__main__':
s = Spider()
s.start_url()