将想要下载的歌曲名字存在列表中,批量搜索并下载代码。
因为webdriver打开网页缓慢的原因,我考虑使用selenium控制已经打开的chrome网页,毕竟直接用chrome搜索歌曲和打开网页还是很快的。首先在pycharm中打开终端,输入以下命令切换到谷歌浏览器目录下:
cd \d C:\Program Files (x86)\Google\Chrome\Application 1
执行命令打开谷歌浏览器并保存配置在本地,这里路径可以选择和代码同级目录:
chrome.exe --remote-debugging-port=9222 --user-data-dir="e:\py_code\Reptile" 1
执行后会打开谷歌浏览器 ,在地址栏输入我们的网址:
http://www.gequdaquan.net/gqss/index.html
OK,准备工作完成!接下来交给脚本干活。
chrome_options = Options() # chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222") chrome_options.debugger_address = "127.0.0.1:9222" chrome_driver = "chromedriver.exe" driver = webdriver.Chrome(chrome_driver, chrome_options=chrome_options)
这没啥好讲的,固定格式和端口号,具体参照谷歌中自动控制说明。
因为前一篇文章说了下载歌曲需要点击播放,所以静音好点:
try: driver.find_element_by_xpath("//a[@class='player-btn btn-quiet']").click() except: pass
这里用try except框架,因为如果用户点击了静音,再一次点击是静音按钮的class属性会发生变化从而找不到对应的xpath会引发异常,所以如果异常直接跳过即可。
这里初始化搜索的所有歌曲名字,本地保存路径和歌曲下载地址的列表:
search_name = ["万有引力","苦笑"] savaer_path = "D://music//" list = []
下面这段代码调用我们自己写的getMusicUrl函数获取每个搜索名字对应歌曲的URL:
print("开始获取url") for name in search_name: music_url = getMusicUrl(driver,name) list.append(music_url)
函数具体实现和上一篇博客基本相同,不做累述:
def getMusicUrl(driver, search_name): print("打开搜素框") driver.find_element_by_xpath("//span[@data-action = \"search\"]").click() # 点击按钮 print("搜索音乐") getXpath(driver, "//div[@class='search-group']/input[@id='search-wd']").clear() getXpath(driver, "//div[@class='search-group']/input[@id='search-wd']").send_keys(search_name) getXpath(driver, "//div[@class='search-group']/button[@class='search-submit']").submit() time.sleep(5) print("播放音乐") # 因为搜索会重新加载界面,如果获取不到按钮控件则不能调用点击函数会抛出异常 flag = True while flag is True: try: flag = False target = getXpath(driver,"//div[@class='list-item'][1]") ActionChains(driver).move_to_element(target).perform() getXpath(driver, "//div[@class='list-item'][1]/span[@class='music-name']/div[@class='list-menu']/span[@class='list-icon icon-play']").click() except: flag =True print("获取地址") #music_url = str(driver.find_element_by_xpath("//audio").get_attribute("src")) music_url = str(getXpath(driver, "//audio").get_attribute("src")) print(music_url) print("关闭音乐") getXpath(driver, "//a[@class='player-btn btn-play btn-state-paused']").click() return music_url
下面这段代码遍历url列表,并批量下载:
print("开始下载") num = 0 for _url in list: DownloadFile(_url, savaer_path, search_name[num] + ".mp3") num+=1
下载代码,上一篇博客已经详细介绍不做累述:
def DownloadFile(mp3_url, save_url,file_name): try: if mp3_url is None or save_url is None or file_name is None: print('参数错误') return None # 文件夹不存在,则创建文件夹 folder = os.path.exists(save_url) if not folder: os.makedirs(save_url) # 读取MP3资源 res = requests.get(mp3_url,stream=True) # 获取文件地址 file_path = os.path.join(save_url, file_name) print('开始写入文件:', file_path) # 打开本地文件夹路径file_path,以二进制流方式写入,保存到本地 with open(file_path, 'wb') as fd: for chunk in res.iter_content(): fd.write(chunk) print(file_name+' 成功下载!') except: print("程序错误")
# coding=utf-8 from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.action_chains import ActionChains import requests import os import time def DownloadFile(mp3_url, save_url,file_name): try: if mp3_url is None or save_url is None or file_name is None: print('参数错误') return None # 文件夹不存在,则创建文件夹 folder = os.path.exists(save_url) if not folder: os.makedirs(save_url) # 读取MP3资源 res = requests.get(mp3_url,stream=True) # 获取文件地址 file_path = os.path.join(save_url, file_name) print('开始写入文件:', file_path) # 打开本地文件夹路径file_path,以二进制流方式写入,保存到本地 with open(file_path, 'wb') as fd: for chunk in res.iter_content(): fd.write(chunk) print(file_name+' 成功下载!') except: print("程序错误") def getMusicUrl(driver, search_name): # opt = webdriver.ChromeOptions() # 创建浏览器 # #opt.add_argument('--headless') #无窗口模式 # opt.add_argument("--mute-audio") # 静音 # driver = webdriver.Chrome(options=opt) # 创建浏览器对象 # # print("打开网页") # driver.get('http://www.gequdaquan.net/gqss/index.html') # 打开网页 print("打开搜素框") driver.find_element_by_xpath("//span[@data-action = \"search\"]").click() # 点击按钮 print("搜索音乐") getXpath(driver, "//div[@class='search-group']/input[@id='search-wd']").clear() getXpath(driver, "//div[@class='search-group']/input[@id='search-wd']").send_keys(search_name) getXpath(driver, "//div[@class='search-group']/button[@class='search-submit']").submit() time.sleep(5) print("播放音乐") # 因为搜索会重新加载界面,如果获取不到按钮控件则不能调用点击函数会抛出异常 flag = True while flag is True: try: flag = False target = getXpath(driver,"//div[@class='list-item'][1]") ActionChains(driver).move_to_element(target).perform() getXpath(driver, "//div[@class='list-item'][1]/span[@class='music-name']/div[@class='list-menu']/span[@class='list-icon icon-play']").click() except: flag =True print("获取地址") #music_url = str(driver.find_element_by_xpath("//audio").get_attribute("src")) music_url = str(getXpath(driver, "//audio").get_attribute("src")) print(music_url) print("关闭音乐") getXpath(driver, "//a[@class='player-btn btn-play btn-state-paused']").click() return music_url def getXpath(driver,path): flag = True while flag is True: try: flag = False driver.find_element_by_xpath(path) except: flag = True return driver.find_element_by_xpath(path) if __name__ == '__main__': search_name = ["万有引力","苦笑"] savaer_path = "D://music//" list = [] chrome_options = Options() # chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222") chrome_options.debugger_address = "127.0.0.1:9222" chrome_driver = "chromedriver.exe" driver = webdriver.Chrome(chrome_driver, chrome_options=chrome_options) try: driver.find_element_by_xpath("//a[@class='player-btn btn-quiet']").click() except: pass print("开始获取url") for name in search_name: music_url = getMusicUrl(driver,name) list.append(music_url) driver.quit() print("开始下载") num = 0 for _url in list: DownloadFile(_url, savaer_path, search_name[num] + ".mp3") num+=1
源码文件加群:1136192749