网易云音乐api爬取歌手歌词

from utils import *
import json
from pypinyin import lazy_pinyin
from time import sleep
from connection import *

headers = {'Referer': 'http://music.163.com/',
           'Host': 'music.163.com',
           'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36',
           'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', }
# singer_id = 6452 # 周杰伦
# singer_id = 9272  # 孙燕姿
# singer_id = 3684  # 林俊杰
# singer_id = 5346  # 王力宏
singer_ids = [6452,9272]
l_total = []
for singer_id in singer_ids:
    # 歌手专辑接口
    url_singer = "http://music.163.com/api/artist/albums/%d?id=%d&offset=0&total=true&limit=100" % (singer_id, singer_id)
    # 解析过程:byte转str ,str转json
    res_singer = str(requests.get(url=url_singer, headers=headers).content, encoding='utf-8')
    res_singer = json.loads(res_singer)
    singer_name = res_singer['artist']['name']
    for album in res_singer['hotAlbums']:
        album_name = album['name']
        album_id = album['id']
        album_sub_type = album['subType']
        if album_sub_type != '录音室版':
            print(album_name, album_id, album_sub_type)
            continue
        print(album_name, album_id)
        # 专辑内歌曲接口
        url_album = "http://music.163.com/api/album/%d?ext=true&id=%d&offset=0&total=true&limit=20" % (
            album['id'], album['id'])
        res_album = str(requests.get(url=url_album, headers=headers).content, encoding='utf-8')
        res_album = json.loads(res_album)
        for i in range(len(res_album['album']['songs'])):
            try:
                song_name = res_album['album']['songs'][i]['name']
                song_id = res_album['album']['songs'][i]['id']
                print('    ', song_name, song_id)
                # 歌曲歌词接口
                url_lyric = "http://music.163.com/api/song/lyric?id=%d&lv=-1&kv=-1&tv=-1" % song_id
                res_lyric = str(requests.get(url=url_lyric, headers=headers).content, encoding='utf-8')
                res_lyric = json.loads(res_lyric)
                # 解析歌词
                res_lyric = res_lyric['lrc']['lyric']
                res_lyric = re.sub(r'\[.*\]', '', res_lyric)  # lrc提示
                res_lyric = re.sub(r'\n+', '\n', res_lyric)  # 多行回车
                res_lyric = re.sub(r'\n +', '\n', res_lyric)  # 前置空格
                res_lyric = res_lyric.split('\n')  # 切割
                for line in res_lyric:
                    if line != '':
                        l_tmp = [singer_name, singer_id, album_name, album_id, song_name, song_id, line,
                                 lazy_pinyin(line[0])[0]]
                        l_total.append(l_tmp)
                sleep(0.1)  # 防封
            except Exception as e:
                print(Exception, e)
                pass
    # 生成df
    df_total = pd.DataFrame(l_total,
                            columns=['singer_name', 'singer_id', 'album_name', 'album_id', 'song_name', 'song_id',
                                     'content', 'first_py'])
    # 存库
    to_sql(df_total, 'lyrics', if_exists='append', indices=['first_py'])
# 设置激活状态
sql = "update  lyric_chain.lyrics set is_active=1 where content not regexp '[::]'"
execute_sql(sql)

 

你可能感兴趣的:(网易云音乐api爬取歌手歌词)