爬取千千音乐的歌词、歌曲到本地

#爬取千千音乐的歌词
import urllib
import urllib.request
from bs4 import BeautifulSoup


def getHtml(url,kname):
    headers={"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)"}
    request=urllib.request.Request(url,headers=headers)
    html=urllib.request.urlopen(request).read().decode('utf-8')
    if(kname==''):
        topB(html)
    else:
        getWord(html,kname)
     

def cbk(a, b, c): 
    per = 100.0 * a * b / c 
    if per > 100: 
        per = 100 
    print ('%.2f%%' % per)

def getWord(html,kname):
    bs = BeautifulSoup(html, "html.parser")
    for i in bs.find_all('div',class_="lrc-list"):
        music=i.get('data-lrclink')
    path="H:\\陌心_demo\\python\\"+kname+".txt"
    urllib.request.urlretrieve(music,path,cbk)  

def topB(html):
    bs=BeautifulSoup(html,"html.parser")
    author=bs.select('.song-title a')#歌曲
    topNum=[]
    for n in author:
        link=n.get('href')#链接名称
        authorTitle=n.get_text()#歌曲名称
        print("链接:"+link,"歌曲名称:"+authorTitle)
def kd(url,host_url,kname):
    if(kname=='' or url==''):
        print("信息不能为空,请重新填写!")
        getHtml(host_url,'')
    else:
        getHtml(url,kname)
    
if __name__=="__main__":
    #url和kname输入为空格可以爬取千千音乐链接以及排名
    url=input('请输入千千音乐的热歌榜音乐链接:【例:http://music.taihe.com/song/672865438】')
    kname=input("请输入保存文本的名称:")
    host_url="http://music.taihe.com/top/dayhot"#今日热歌榜单
    kd(url,host_url,kname)
#单个爬取音乐到本地,如果想全部爬取,可以关联上面的代码,多写一个循环
import requests
import json
import re


def qqmusic(search,params,knum):
    r=requests.get(search,params=params)
    http_json=loads_jsonp(r.text)
    http_json_link=http_json['bitrate']['file_link']#音乐下载链接
    p=requests.get(http_json_link)
    file_format=http_json['bitrate']['file_format']#音乐类型
    file_name=http_json['songinfo']['album_title']
    with open('%s.%s' % (file_name,file_format),'wb') as f:   #写文件
        f.write(p.content)
#解析jsonp
def loads_jsonp( _jsonp):
    try:
        return json.loads(re.match(".*?({.*}).*", _jsonp, re.S).group(1))
    except:
        raise ValueError('Invalid Input')

if __name__=="__main__":
    knum=input("请输入链接后面的数字:例如【265715650】")
    search="http://musicapi.taihe.com/v1/restserver/ting?method=baidu.ting.song.playAAC&format=jsonp&callback=jQuery172005759096318752466_1582270788635&from=web"
    params={"songid":knum,"_":"1582270791657"}
    qqmusic(search,params,knum)

 

你可能感兴趣的:(学习笔记,爬虫)