Python网络爬虫与信息提取!爬虫批量搜索音乐并下载!

目标

将想要下载的歌曲名字存在列表中,批量搜索并下载代码。

准备

因为webdriver打开网页缓慢的原因,我考虑使用selenium控制已经打开的chrome网页,毕竟直接用chrome搜索歌曲和打开网页还是很快的。首先在pycharm中打开终端,输入以下命令切换到谷歌浏览器目录下:

cd \d C:\Program Files (x86)\Google\Chrome\Application
1

执行命令打开谷歌浏览器并保存配置在本地,这里路径可以选择和代码同级目录:

chrome.exe --remote-debugging-port=9222 --user-data-dir="e:\py_code\Reptile"
1

执行后会打开谷歌浏览器 ,在地址栏输入我们的网址:

http://www.gequdaquan.net/gqss/index.html

OK,准备工作完成!接下来交给脚本干活。

代码设计

网页驱动的配置:

	chrome_options = Options()
    # chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
    chrome_options.debugger_address = "127.0.0.1:9222"
    chrome_driver = "chromedriver.exe"
    driver = webdriver.Chrome(chrome_driver, chrome_options=chrome_options)

这没啥好讲的,固定格式和端口号,具体参照谷歌中自动控制说明。

网页静音

因为前一篇文章说了下载歌曲需要点击播放,所以静音好点:

    try:
        driver.find_element_by_xpath("//a[@class='player-btn btn-quiet']").click()
    except:
        pass

这里用try except框架,因为如果用户点击了静音,再一次点击是静音按钮的class属性会发生变化从而找不到对应的xpath会引发异常,所以如果异常直接跳过即可。

初始化

这里初始化搜索的所有歌曲名字,本地保存路径和歌曲下载地址的列表:

    search_name = ["万有引力","苦笑"]
    savaer_path = "D://music//"
    list = []


批量获取url

下面这段代码调用我们自己写的getMusicUrl函数获取每个搜索名字对应歌曲的URL:

    print("开始获取url")
    for name in search_name:
        music_url = getMusicUrl(driver,name)
        list.append(music_url)

函数具体实现和上一篇博客基本相同,不做累述:

def getMusicUrl(driver, search_name):
    print("打开搜素框")
    driver.find_element_by_xpath("//span[@data-action = \"search\"]").click()  # 点击按钮

    print("搜索音乐")
    getXpath(driver, "//div[@class='search-group']/input[@id='search-wd']").clear()
    getXpath(driver, "//div[@class='search-group']/input[@id='search-wd']").send_keys(search_name)
    getXpath(driver, "//div[@class='search-group']/button[@class='search-submit']").submit()

    time.sleep(5)
    print("播放音乐")
    # 因为搜索会重新加载界面,如果获取不到按钮控件则不能调用点击函数会抛出异常
    flag = True
    while flag is True:
        try:
            flag = False
            target = getXpath(driver,"//div[@class='list-item'][1]")
            ActionChains(driver).move_to_element(target).perform()
            getXpath(driver, "//div[@class='list-item'][1]/span[@class='music-name']/div[@class='list-menu']/span[@class='list-icon icon-play']").click()
        except:
            flag =True

    print("获取地址")
    #music_url = str(driver.find_element_by_xpath("//audio").get_attribute("src"))
    music_url = str(getXpath(driver, "//audio").get_attribute("src"))
    print(music_url)


    print("关闭音乐")
    getXpath(driver, "//a[@class='player-btn btn-play btn-state-paused']").click()

    return music_url

批量下载

下面这段代码遍历url列表,并批量下载:

    print("开始下载")
    num = 0
    for _url in list:
        DownloadFile(_url, savaer_path, search_name[num] + ".mp3")
        num+=1


下载代码,上一篇博客已经详细介绍不做累述:

def DownloadFile(mp3_url, save_url,file_name):
    try:
        if mp3_url is None or save_url is None or file_name is None:
            print('参数错误')
            return None
        # 文件夹不存在,则创建文件夹
        folder = os.path.exists(save_url)
        if not folder:
            os.makedirs(save_url)
        # 读取MP3资源
        res = requests.get(mp3_url,stream=True)
        # 获取文件地址
        file_path = os.path.join(save_url, file_name)
        print('开始写入文件:', file_path)
        # 打开本地文件夹路径file_path,以二进制流方式写入,保存到本地
        with open(file_path, 'wb') as fd:
            for chunk in res.iter_content():
                fd.write(chunk)
        print(file_name+' 成功下载!')
    except:
        print("程序错误")

运行验证

Python网络爬虫与信息提取!爬虫批量搜索音乐并下载!_第1张图片

完整代码

# coding=utf-8

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains

import requests

import os

import time


def DownloadFile(mp3_url, save_url,file_name):
    try:
        if mp3_url is None or save_url is None or file_name is None:
            print('参数错误')
            return None
        # 文件夹不存在,则创建文件夹
        folder = os.path.exists(save_url)
        if not folder:
            os.makedirs(save_url)
        # 读取MP3资源
        res = requests.get(mp3_url,stream=True)
        # 获取文件地址
        file_path = os.path.join(save_url, file_name)
        print('开始写入文件:', file_path)
        # 打开本地文件夹路径file_path,以二进制流方式写入,保存到本地
        with open(file_path, 'wb') as fd:
            for chunk in res.iter_content():
                fd.write(chunk)
        print(file_name+' 成功下载!')
    except:
        print("程序错误")


def getMusicUrl(driver, search_name):
    # opt = webdriver.ChromeOptions()  # 创建浏览器
    # #opt.add_argument('--headless')                #无窗口模式
    # opt.add_argument("--mute-audio")  # 静音
    # driver = webdriver.Chrome(options=opt)  # 创建浏览器对象
    #
    # print("打开网页")
    # driver.get('http://www.gequdaquan.net/gqss/index.html')  # 打开网页

    print("打开搜素框")
    driver.find_element_by_xpath("//span[@data-action = \"search\"]").click()  # 点击按钮

    print("搜索音乐")
    getXpath(driver, "//div[@class='search-group']/input[@id='search-wd']").clear()
    getXpath(driver, "//div[@class='search-group']/input[@id='search-wd']").send_keys(search_name)
    getXpath(driver, "//div[@class='search-group']/button[@class='search-submit']").submit()

    time.sleep(5)
    print("播放音乐")
    # 因为搜索会重新加载界面,如果获取不到按钮控件则不能调用点击函数会抛出异常
    flag = True
    while flag is True:
        try:
            flag = False
            target = getXpath(driver,"//div[@class='list-item'][1]")
            ActionChains(driver).move_to_element(target).perform()
            getXpath(driver, "//div[@class='list-item'][1]/span[@class='music-name']/div[@class='list-menu']/span[@class='list-icon icon-play']").click()
        except:
            flag =True

    print("获取地址")
    #music_url = str(driver.find_element_by_xpath("//audio").get_attribute("src"))
    music_url = str(getXpath(driver, "//audio").get_attribute("src"))
    print(music_url)


    print("关闭音乐")
    getXpath(driver, "//a[@class='player-btn btn-play btn-state-paused']").click()

    return music_url

def getXpath(driver,path):
    flag = True
    while flag is True:
        try:
            flag = False
            driver.find_element_by_xpath(path)
        except:
            flag = True

    return driver.find_element_by_xpath(path)


if __name__ == '__main__':
    search_name = ["万有引力","苦笑"]
    savaer_path = "D://music//"
    list = []

    chrome_options = Options()
    # chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
    chrome_options.debugger_address = "127.0.0.1:9222"
    chrome_driver = "chromedriver.exe"
    driver = webdriver.Chrome(chrome_driver, chrome_options=chrome_options)
    try:
        driver.find_element_by_xpath("//a[@class='player-btn btn-quiet']").click()
    except:
        pass


    print("开始获取url")
    for name in search_name:
        music_url = getMusicUrl(driver,name)
        list.append(music_url)

    driver.quit()
    print("开始下载")
    num = 0
    for _url in list:
        DownloadFile(_url, savaer_path, search_name[num] + ".mp3")
        num+=1

源码文件加群:1136192749

 

你可能感兴趣的:(Python网络爬虫与信息提取!爬虫批量搜索音乐并下载!)