爬虫:网易云流行歌手

import requests
import os
import re
import socket
from bs4 import BeautifulSoup
import urllib.request

try:
    # 创建名为"流行歌手"的文件夹,并切换到该目录
    os.mkdir("流行歌手")
    os.chdir("流行歌手")
except:
    # 如果文件夹已存在,只切换到该目录
    os.chdir("流行歌手")

# 获取当前工作目录
wz = os.getcwd()

# 设置请求头信息
headers = {
    "Host": "music.163.com",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
    "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2",
    "Accept-Encoding": "gzip, deflate, br",
    "Content-Type": "application/x-www-form-urlencoded",
    "Content-Length": "408",
    "DNT": "1",
    "Connection": "keep-alive",
    "Upgrade-Insecure-Requests": "1",
    "Cache-Control": "max-age=0",
    "TE": "Trailers"
}

# 设置请求时的Cookies
cookies = {
    "_iuqxldmzr_": "32",
    "_ntes_nnid": "420d9ef2fe95cfa4416255cb475b8340,1584829204209",
    "_ntes_nuid": "420d9ef2fe95cfa4416255cb475b8340",
    "JSESSIONID-WYYY": "SJfnvo2fe7eHF+bi+iJylYd4KQM0rdtHwPHy79COvbpXA\o5Gxn61Gusrc01Y35DrIjkDwjfmI6VUumeUJ0+MPOVo81\dvk\8OfGMdF\lz10uarhSIegJiqNQ8Ed+lQUP8Dz+0PJCQXo07lyy1QA0fwizKbmyXJ/gjA77liRaaQ3iQcT:1584834484275",
    "WM_NI": "qHZOeNCdPUBdti/l4gOMJEw3IMFhbOOEcHZPYGij9+yLYZ1Se0a4PQy9X6wEBa7O86H9IWvzWErTpacw2R5fT+ojaCUI3vvJo8dSk7gMFh7kTZTocNMUZjr2eFNnvkv9ZFA=",
    "WM_NIKE": "9ca17ae2e6ffcda170e2e6eed3d07ee9889c91e674898e8ea3d45e939a9aafb65a90aff9a8d363818be597fb2af0fea7c3b92ab39896b0e84681bca0b6c170b8b7e18db76d8d9386a6f6398b938a89e1458d9bada9d95c9b99a78bbc21bb9497bbb459a7f0afd5d35bbbe798d7f64f82bff9b0bb3e8ce787b8f572959bbeb5f36ba1a9feb3b86e98f5a18be4499797fd97f542ade8acb4ea3da5938a89b66198869c90e2468f8b968bf13aa79a828ff561b6e796a7c437e2a3",
    "WM_TID": "eV7OJswUsoNABUUVEQYvEf7rDfySR/rg"
}

# 设置请求时的数据参数
data = {
    "params": "h8dUnRbn5YKnkEr4BxNeZ4wWdFnneodXDMFZtJFjaOd0oWHakgW0XFsK28289T9brfPJjkP4sqBrdd0lPQ4izeA2Ah6/6ro7aMZ+6pjcc2PDgDgdoLUONNJ2TO2APPSk",
    "encSecKey": "d312212802283a795933c0a62d04740b865590f5aa83b92113394960b014eb3d10e2f594bca71362122d3a18334eb25ec25792804a682735f36ebe77c16045ecb15e909d56a5662b72dc19ffe0bf3580fbf6d02a8f5746a5a53045fd5cf67ee2d0d84246d4275227f455278f4f2fb9470ea9a8515c9701d071665b44a1ca3e86"
}

# 发送POST请求获取歌手列表
httpname = "https://music.163.com/weapi/artist/top"
response = requests.post(httpname, headers=headers, cookies=cookies, data=data)
idre = '"id":\d+'
# 使用正则表达式提取歌手的ID
pipeiid = re.findall(idre, response.text)
id = []
for i in pipeiid:
    shanhou = re.sub('"id":', '', i)
    id.append(shanhou)

# 设置网络请求超时时间
socket.setdefaulttimeout(15)
i = 0

# 重新设置请求头信息
headers = {
    'Referer': 'http://music.163.com/',
    'Host': 'music.163.com',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
}

# 遍历歌手列表并下载歌曲
for i in id:
    os.chdir(wz)
    http = "https://music.163.com/artist?id=" + str(i)
    play_url = http
    s = requests.session()
    response = s.get(play_url, headers=headers, timeout=30).content
    responsetext = s.get(play_url, headers=headers, timeout=30).text
    rename = '"title": ".+"'
    geshouname = re.search(rename, responsetext).group()
    geshouname = re.sub('"title": "', '', geshouname)
    geshouname = re.sub('"', '', geshouname)
    print(geshouname)
    try:
        # 创建歌手文件夹,并切换到该目录
        os.mkdir(geshouname)
        os.chdir(geshouname)
    except:
        os.chdir(geshouname)
    s = BeautifulSoup(response, 'lxml')
    main = s.find('ul', {'class': 'f-hide'})
    lists = []
    for music in main.find_all('a'):
        list = []
        musicUrl = 'http://music.163.com/song/media/outer/url' + music['href'][5:] + '.mp3'
        musicName = music.text
        list.append(musicName)
        list.append(musicUrl)
        lists.append(list)
    for i in lists:
        url = i[1]
        name = i[0]
        try:
            print('正在下载', name)
            urllib.request.urlretrieve(url, './%s.mp3' % name)
            print('下载成功')
        except:
            print('下载失败')

# 在当前目录创建报告文件并写入任务完成信息
with open("报告.txt", "w+", encoding="utf-8") as file:
    file.write("已经完成了采集任务")

他采集了网易云的免费的音乐,不过现在通常使用API点歌,可以通过这种方式获取音乐。

你可能感兴趣的:(爬虫,网易云音乐,Python,python,网易)