今天,弄了个网易云音乐爬取的爬虫,奈何打包一直报错,百度了半天,好心累…(代码复制粘贴可以,仅供学习参考)
import time
from tkinter import *
from selenium import webdriver
import os
from urllib.request import urlretrieve
import tkinter.messagebox
# 构造字典
item = {}
item['song_id'] = []
item['song_name'] = []
item['singer_content'] = []
# 获取音乐信息
def get_music_name():
if entry.get() == "":
tkinter.messagebox.showerror(title="搜索内容提示", message="您没有输入任何内容")
return
# 获取歌曲的名称
name = entry.get()
url = "https://music.163.com/#/search/m/?s={}&type=1".format(name)
# 隐藏浏览器
option = webdriver.ChromeOptions()
option.add_argument('headless')
driver = webdriver.Chrome(options=option)
# 搜索歌曲页面
# driver = webdriver.Chrome()
driver.get(url)
driver.switch_to.frame('g_iframe') # 根据id查洵
# 获取歌曲的id
req = driver.find_element_by_id('m-search')
a_r_1 = req.find_elements_by_xpath('.//div[@class="item f-cb h-flag "]/div[2]/div/div/a')
a_r_2 = req.find_elements_by_xpath('.//div[@class="item f-cb h-flag even "]/div[2]/div/div/a')
i = len(a_r_1)
t = len(a_r_2)
for j in range(i):
reqs_href_1 = a_r_1[j].get_attribute("href")
song_id_1 = reqs_href_1.split("=")[-1]
item['song_id'].append(song_id_1)
# print(song_id_1)
for z in range(t):
reqs_href_2 = a_r_2[z].get_attribute("href")
song_id_2 = reqs_href_2.split("=")[-1]
item['song_id'].append(song_id_2)
# print(song_id_2)
song_name_title_1 = req.find_elements_by_xpath('.//div[@class="item f-cb h-flag "]/div[2]/div/div//b')
song_name_title_2 = req.find_elements_by_xpath('.//div[@class="item f-cb h-flag even "]/div[2]/div/div//b')
i = len(song_name_title_1)
w = len(song_name_title_2)
for j in range(i):
song_name = song_name_title_1[j].get_attribute("title")
item['song_name'].append(song_name)
# print(song_name)
for j in range(w):
song_name = song_name_title_2[j].get_attribute("title")
item['song_name'].append(song_name)
# print(song_name)
# 获取歌手信息
song_name_title_1 = req.find_elements_by_xpath('.//div[@class="item f-cb h-flag "]/div[@class="td w1"]')
song_name_title_2 = req.find_elements_by_xpath('.//div[@class="item f-cb h-flag even "]/div[@class="td w1"]')
list_content = song_name_title_1 + song_name_title_2
for j in list_content:
j = j.text
item['singer_content'].append(j)
# print(j)
# print(item)
# 写入歌曲信息
song_view(item)
# 下载歌曲
# song_load(item)
driver.quit()
# 信息展示界面
def song_view(item):
w = len(item['song_name'])
for i in range(w):
listbox1.insert(END, "{}.歌曲名字:{} 歌手信息:{}".format(i, item['song_name'][i], item['singer_content'][i]))
def song_downland():
# print(listbox1.curselection())
list_content_1 = listbox1.curselection() # 获取选中内容的下标
if entry.get() == "":
tkinter.messagebox.showerror(title="搜索框无内容", message="请输入搜索框中的内容")
return
elif not list_content_1:
tkinter.messagebox.showerror(title="您没有选中内容", message="请选择下载的内容")
return
list_content_2 = list_content_1[0]
# print(list_content_1)
# print(list_content_2)
# print(list_content_2)
# print(type(list_content_2))
# twe = entry1.get()
if list_content_1 != "":
# twe = int(twe)
tkinter.messagebox.askokcancel("提示信息:", message="您选择了{}".format(item["song_name"][list_content_2]))
song_id = item["song_id"][list_content_2]
song_name = item["song_name"][list_content_2]
singer_content = item["singer_content"][list_content_2]
# 显示数据下载情况
text.insert(END, "\n歌曲:{} 正在下载....".format(song_name))
url = "https://music.163.com/song/media/outer/url?id={}.mp3".format(song_id)
# 创建文件夹
os.makedirs("music", exist_ok=True) # exist_ok=True保证没有同名的文件夹,有了就不会再去创建
path = "music\{}{}.mp3".format(song_name,singer_content)
# listbox1.see(END)
# # 更新
# listbox1.update()
# # 下载
urlretrieve(url, path)
time.sleep(10)
text.insert(END, "\n{}下载完成".format(song_name))
time.sleep(2)
text.clipboard_clear()
else:
tkinter.messagebox.showerror("提示", message="请选中需要下载的内容")
def delete_info():
num = listbox1.size()
if num > 0:
tkinter.messagebox.askokcancel(title="提示", message="是否清除?")
listbox1.delete(0, num - 1)
item.clear()
item['song_id'] = []
item['song_name'] = []
item['singer_content'] = []
text.delete(0.0, END)
else:
tkinter.messagebox.showinfo(title="内容为空", message="小可爱,空内容我清除不了")
# 搭建界面
win = Tk()
win.title("网易云音乐:")
win.geometry("800x500+350+100")
label = Label(win, text="请输入下载的歌曲:", font=("华文行楷", 16))
label.grid(row=0, column=0)
# 输入框
entry = Entry(win, font=("华文行楷", 20))
entry.grid(row=0, column=1, sticky=W)
# # 输入框
# entry1 = Entry(win, font=("华文行楷", 20), width=5)
# entry1.grid(row=0, column=2)
listbox1 = Listbox(win, font=("宋体", 14), fg="blue", width=45, height=16)
listbox1.grid(row=1, column=0) # columnspan=2
sc1 = Scrollbar(win, orient='horizontal')
listbox1.configure(xscrollcommand=sc1.set)
sc1['command'] = listbox1.xview
sc1.grid(row=2, column=0)
# sc2 = Scrollbar(win)
# listbox1.configure(xscrollcommand=sc2.set)
# sc2['command'] = listbox1.yview
# sc2.grid(row=1, column=1,sticky=W)
# scr1 = Scrollbar(root)
# lb.configure(yscrollcommand=scr1.set)
# scr1['command'] = lb.yview
# scr1.grid(row=5, column=4)
text = Text(win, font=("华文行楷", 14), width=24, height=12)
text.grid(row=1, columnspan=2, sticky=E)
# 点击按钮
button1 = Button(win, text="点击搜索", font=("隶书", 15), command=get_music_name)
button1.grid(row=0, column=2, sticky=W)
button2 = Button(win, text="退出程序", font=("微软雅黑", 15), command=quit)
button2.grid(row=3, column=1, sticky=E)
button3 = Button(win, text="点击下载", font=("微软雅黑", 15), command=song_downland)
button3.grid(row=4, columnspan=3, sticky=S)
button4 = Button(win, text="点击清除", font=("微软雅黑", 15), command=delete_info)
button4.grid(row=3, column=0, sticky=W)
win.mainloop()