Python网易云歌单爬取(非VIP)

import requests
from lxml import etree
# url = 'https://music.163.com/#/playlist?id=5017583325'
url = 'https://music.163.com/playlist?id=5017583325'    #页面进行伪装去F12找到url
headers={
    'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}

def get_content(url):
    #1、请求要抓取的网站 - {获取网络返回的数据}
    html=requests.get(url,headers=headers).content.decode('utf-8')
    # print(html)
    #加载
    tree = etree.HTML(html)  #提取数据
    """
    href="/song?id=1450025246"
    1450025246
    https://m10.music.126.net/20200710223628/9374c032dcb478c7876df254c07d7a8f/yyaac/obj/wonDkMOGw6XDiTHCmMOi/2694770763/b19f/7299/0f78/7788084b5a1f457dd6125a3722a639f9.m4a
    xpath参数:
        // :根目录
        []  :谓语  -条件
        /   :   单个元素
        @   :  提取这个元素
    """

    url_music = tree.xpath('//ul[@class="f-hide"]/li/a/@href')
    name = tree.xpath('//ul[@class="f-hide"]/li/a/text()')
    # print(url_music,name)
    # http://music.163.com/song/media/outer/url?id=    #音乐外链
    for index,item in enumerate(url_music):
        url_id = item.split('=')[-1]
        file_name = name[index]
        print(url_id,file_name)
        music_base='http://music.163.com/song/media/outer/url?id=%s'%url_id
        print(music_base)
        file_path=r'D:\Desktop\pystudy\网易云音乐\%s.mp3'%file_name   #自建立文件夹的绝对路径

        with open(file_path,'wb') as mu:
            req = requests.get(music_base,headers=headers)
            mu.write(req.content)
get_content(url)

你可能感兴趣的:(爬虫,python)