简单爬虫下载QQ音乐

环境:python3.6
需要用到的库:requests;json;os;multiprocessing
通过使用开发者工具进行比较查看确定需要用到的三个URL

u3 = "https://c.y.qq.com/soso/fcgi-bin/client_search_cp?ct=24&qqmusic_ver=1298&new_json=1&remoteplace=txt.yqq.song&searchid=63514736641951294&t=0&aggr=1&cr=1&catZhida=1&lossless=0&flag_qc=0&p=1&n=20&w=%E5%88%98%E5%BE%B7%E5%8D%8E&g_tk=5381&jsonpCallback=searchCallbacksong1651&loginUin=0&hostUin=0&format=jsonp&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq&needNewCode=0"#取得media_mid

w字段为我们搜索的关键字

u2 = "https://c.y.qq.com/base/fcgi-bin/fcg_music_express_mobile3.fcg?g_tk=5381&jsonpCallback=MusicJsonCallback3987557968181119&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq&needNewCode=0&cid=205361747&callback=MusicJsonCallback3987557968181119&uin=0&songmid=002Q4Jgw0dxWxt&filename=C400002Q4Jgw0dxWxt.m4a&guid=5939185138"#返回songmid和vkey

需要取得u2需要有songmid和filename

u1 = "dl.stream.qqmusic.qq.com/C400002Q4Jgw0dxWxt.m4a?vkey=2B7DD0E2584277EE4C9839F48E3F92F573E443960895BCF8CB107114E2753A49FDE61425996E1F984CD7C4970738AA3A878B1CF0D14501D0&guid=5939185138&uin=0&fromtag=66"#通过filename和vkey=确定歌曲链接

具体代码为:
"""
import requests
import random
import re
import json
import os
import multiprocessing

class DownLoadMusic(object):

def __init__(self, singer_name):
    self.singer_name = singer_name

def make_directory(self):
    folder_path ='F:\\test_auto\\spider\\music\\%s' % self.singer_name
    os.makedirs(folder_path)
    return folder_path

def get_mid_by_name(self):#通过歌手名字取得mid
    name = self.singer_name
    url="https://c.y.qq.com/soso/fcgi-bin/client_search_cp?ct=24&qqmusic_ver=1298&new_json=1&remoteplace=txt.yqq.song&searchid=63514736641951294&t=0&aggr=1&cr=1&catZhida=1&lossless=0&flag_qc=0&p=1&n=20&w=%s&jsonpCallback=searchCallbacksong1651&loginUin=0&hostUin=0&format=jsonp&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq&needNewCode=0"%name
    response = requests.get(url)
    result_ = response.content
    result = result_.decode('utf-8')
    list_ = re.findall(r'\{(.*)\}', result, re.S)[0]
    result = re.findall(r'\{.*\}', list_, re.S)[0]
    #"file":{"media_mid":"002Q4Jgw0dxWxt","size_128":4177629,"size_320":10418855,"size_aac":6075090,"size_ape":25501357,"size_dts":0,"size_flac":26906455,"size_ogg":5951775,"size_try":0,"strMediaMid":"002Q4Jgw0dxWxt","try_begin":0,"try_end":0}
    result1 = re.findall(r'\"file.*?\}', result, re.S)
    mids = []
    for i in result1:
        mm = re.findall(r'\{.*\}', i, re.S)[0]
        x = json.loads(mm)
        mids.append(x['strMediaMid'])
    return mids

def get_music_by_mid(self, mid_):
    try:
        folder_path = self.make_directory()
    except:
        folder_path ='F:\\test_auto\\spider\\music\\%s' % self.singer_name
    filename = u'C400'+mid_+u'.m4a'
    url = "https://c.y.qq.com/base/fcgi-bin/fcg_music_express_mobile3.fcg?g_tk=5381&jsonpCallback=MusicJsonCallback3987557968181119&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq&needNewCode=0&cid=205361747&callback=MusicJsonCallback3987557968181119&uin=0&songmid=%s&filename=%s&guid=5939185138"%(mid_, filename)
    response = requests.get(url)
    result_ = response.content
    result = result_.decode('utf-8')
    x = re.findall(r'\[.*?\]', result, re.S)[0]
    y = re.findall(r'\{.*?\}', x, re.S)[0]#正则表达式取得报文
    z = json.loads(y)#转化成字典
    vkey = z['vkey']
    uz = "http://dl.stream.qqmusic.qq.com/%s?vkey=%s&guid=5939185138&uin=0&fromtag=66" % (filename, vkey)
    try:
        response = requests.get(uz)
        result = response.content
    except:
        print("地址或者网络有点问题...")
    else:
        name = str(random.randint(1000, 9999))+'.m4a'
        file_path = folder_path + '\\' + name
        print("正在下载歌曲%s" % name)
        with open(file_path, 'wb') as f:
            f.write(result)

'''
‘’‘’
def main():
singer_name = input('请输入歌手名称:')
download = DownLoadMusic(singer_name)#多进程下载英语
mids = download.get_mid_by_name()
p = multiprocessing.Pool(4)
p.map(download.get_music_by_mid, mids)
p.close()
p.join()

if name == 'main':
main()
'''

你可能感兴趣的:(简单爬虫下载QQ音乐)