from utils import *
import json
from pypinyin import lazy_pinyin
from time import sleep
from connection import *
headers = {'Referer': 'http://music.163.com/',
'Host': 'music.163.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', }
# singer_id = 6452 # 周杰伦
# singer_id = 9272 # 孙燕姿
# singer_id = 3684 # 林俊杰
# singer_id = 5346 # 王力宏
singer_ids = [6452,9272]
l_total = []
for singer_id in singer_ids:
# 歌手专辑接口
url_singer = "http://music.163.com/api/artist/albums/%d?id=%d&offset=0&total=true&limit=100" % (singer_id, singer_id)
# 解析过程:byte转str ,str转json
res_singer = str(requests.get(url=url_singer, headers=headers).content, encoding='utf-8')
res_singer = json.loads(res_singer)
singer_name = res_singer['artist']['name']
for album in res_singer['hotAlbums']:
album_name = album['name']
album_id = album['id']
album_sub_type = album['subType']
if album_sub_type != '录音室版':
print(album_name, album_id, album_sub_type)
continue
print(album_name, album_id)
# 专辑内歌曲接口
url_album = "http://music.163.com/api/album/%d?ext=true&id=%d&offset=0&total=true&limit=20" % (
album['id'], album['id'])
res_album = str(requests.get(url=url_album, headers=headers).content, encoding='utf-8')
res_album = json.loads(res_album)
for i in range(len(res_album['album']['songs'])):
try:
song_name = res_album['album']['songs'][i]['name']
song_id = res_album['album']['songs'][i]['id']
print(' ', song_name, song_id)
# 歌曲歌词接口
url_lyric = "http://music.163.com/api/song/lyric?id=%d&lv=-1&kv=-1&tv=-1" % song_id
res_lyric = str(requests.get(url=url_lyric, headers=headers).content, encoding='utf-8')
res_lyric = json.loads(res_lyric)
# 解析歌词
res_lyric = res_lyric['lrc']['lyric']
res_lyric = re.sub(r'\[.*\]', '', res_lyric) # lrc提示
res_lyric = re.sub(r'\n+', '\n', res_lyric) # 多行回车
res_lyric = re.sub(r'\n +', '\n', res_lyric) # 前置空格
res_lyric = res_lyric.split('\n') # 切割
for line in res_lyric:
if line != '':
l_tmp = [singer_name, singer_id, album_name, album_id, song_name, song_id, line,
lazy_pinyin(line[0])[0]]
l_total.append(l_tmp)
sleep(0.1) # 防封
except Exception as e:
print(Exception, e)
pass
# 生成df
df_total = pd.DataFrame(l_total,
columns=['singer_name', 'singer_id', 'album_name', 'album_id', 'song_name', 'song_id',
'content', 'first_py'])
# 存库
to_sql(df_total, 'lyrics', if_exists='append', indices=['first_py'])
# 设置激活状态
sql = "update lyric_chain.lyrics set is_active=1 where content not regexp '[::]'"
execute_sql(sql)