作者介绍:双非本科大三网络工程专业在读,阿里云专家博主,专注于Java领域学习,擅长web应用开发、数据结构和算法,初步涉猎Python人工智能开发和前端开发。
主页:@逐梦苍穹
所属专栏:项目
您的一键三连,是我创作的最大动力
平时如果想要更方便快捷的获取各大平台热搜榜的数据,使用python实现自动化程序是最合适的方式。
下面通过简单的python代码,获取各热搜榜单(某度、某音、某博)的内容并实现可视化。
代码的执行效果:
运行之后,首先看到一个初始化窗口打开在屏幕的正中央:
这个代码只获取前二十条数据,大家可以根据自己屏幕大小,来调整展示数据条数。
这几个网站的热搜榜很好爬,网站没有做这部分的反爬机制,所以这里就不展示详细的过程了。
浏览器F12调试一下便知。
hot_search = 'https://aweme-hl.snssdk.com/aweme/v1/hot/search/list/?detail_list=1'
headers = {
"User-Agent": "Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Mobile Safari/537.36"
}
hot_json = requests.get(hot_search, headers=headers).json()
hot_list = []
for data in hot_json['data']['word_list']:
item = data['word']
hot_list.append(item)
if len(hot_list) >= 20:
break
url = 'https://top.baidu.com/board?tab=realtime'
response = requests.get(url)
html = response.content
# 使用BeautifulSoup解析页面内容
soup = BeautifulSoup(html, 'html.parser')
# 提取热搜数据
hot_list = []
for item in soup.find_all('div', {'class': 'c-single-text-ellipsis'}):
hot_list.append(item.text)
if len(hot_list) >= 20:
break
hot_url = 'https://weibo.com/ajax/side/hotSearch'
response = requests.get(hot_url).json()
hotgovs = response['data']['hotgovs']
realtime = response['data']['realtime']
hot_list = []
# 获取热搜词
hot_list.extend(entry['word'] for entry in hotgovs)
# 获取实时热搜词
for entry in realtime:
hot_list.append(entry['word'])
if len(hot_list) >= 20:
break
def main():
root = tk.Tk()
root.title("热搜榜获取")
# 设置初始窗口大小
window_width = 300
window_height = 200
center_window(root, window_width, window_height)
# initial_button = ttk.Button(root, text="热搜榜获取", command=root.deiconify)
# initial_button.pack(pady=(50, 10)) # 垂直方向上外边距为50,下外边距为10
douyin_button = ttk.Button(root, text="某音热搜榜", command=lambda: show_douyin_hot_search(root))
douyin_button.pack(pady=(30, 10)) # 垂直方向上外边距和下外边距都为10
baidu_button = ttk.Button(root, text="某度热搜榜", command=lambda: show_baidu_hot_search(root))
baidu_button.pack(pady=(10, 10)) # 垂直方向上外边距为10,下外边距为50
weibo_button = ttk.Button(root, text="某博热搜榜", command=lambda: show_weibo_hot_search(root))
weibo_button.pack(pady=(10, 30))
root.mainloop()
这里还有一个函数,是设置该图形化窗口在电脑的正中间展示的:
def center_window(root, width, height):
screen_width = root.winfo_screenwidth()
screen_height = root.winfo_screenheight()
x_coordinate = (screen_width - width) // 2
y_coordinate = (screen_height - height) // 2
root.geometry(f"{width}x{height}+{x_coordinate}+{y_coordinate}")
draw函数如下:
def draw(hot_list, root, name):
try:
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['font.weight'] = 'bold'
plt.rcParams['axes.unicode_minus'] = False
# 绘制条形图
fig, ax = plt.subplots(figsize=(12, 6))
x = range(len(hot_list))
y = list(reversed(range(1, len(hot_list) + 1)))
plt.barh(x, y, tick_label=hot_list, height=0.9) # 调整条形图的高度
plt.gca().invert_yaxis()
# 添加标题和标签
current_datetime = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
plt.title(f'{name}热搜排行榜({current_datetime})')
# 调整坐标轴刻度
plt.xticks(range(1, len(hot_list) + 1))
# 调整条形图之间的间隔
plt.subplots_adjust(hspace=0.8, wspace=0.5)
# 添加按钮
ax_button = plt.axes([0.01, 0.01, 0.05, 0.05]) # 调整按钮位置和大小,放置在最左上角
button = Button(ax_button, '返回', color='lightgoldenrodyellow', hovercolor='0.975')
def on_button_click(event):
plt.close() # 关闭Matplotlib窗口
root.deiconify() # 显示初始窗口
button.on_clicked(on_button_click)
def on_close(event):
root.deiconify()
fig.canvas.mpl_disconnect(cids['close_event']) # Disconnect the callback
# Connect the callback function to the close event
cids = {'close_event': fig.canvas.mpl_connect('close_event', on_close)}
manager = plt.get_current_fig_manager()
center_window(manager.window, 1200, 600)
ax.tick_params(axis='y', labelsize=12)
# 显示图形
plt.tight_layout()
plt.show()
except Exception as e:
print(e)
import tkinter as tk
from tkinter import ttk
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
from matplotlib.widgets import Button
import requests
from datetime import datetime
def draw(hot_list, root, name):
try:
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['font.weight'] = 'bold'
plt.rcParams['axes.unicode_minus'] = False
# 绘制条形图
fig, ax = plt.subplots(figsize=(12, 6))
x = range(len(hot_list))
y = list(reversed(range(1, len(hot_list) + 1)))
plt.barh(x, y, tick_label=hot_list, height=0.9) # 调整条形图的高度
plt.gca().invert_yaxis()
# 添加标题和标签
current_datetime = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
plt.title(f'{name}热搜排行榜({current_datetime})')
# 调整坐标轴刻度
plt.xticks(range(1, len(hot_list) + 1))
# 调整条形图之间的间隔
plt.subplots_adjust(hspace=0.8, wspace=0.5)
# 添加按钮
ax_button = plt.axes([0.01, 0.01, 0.05, 0.05]) # 调整按钮位置和大小,放置在最左上角
button = Button(ax_button, '返回', color='lightgoldenrodyellow', hovercolor='0.975')
def on_button_click(event):
plt.close() # 关闭Matplotlib窗口
root.deiconify() # 显示初始窗口
button.on_clicked(on_button_click)
def on_close(event):
root.deiconify()
fig.canvas.mpl_disconnect(cids['close_event']) # Disconnect the callback
# Connect the callback function to the close event
cids = {'close_event': fig.canvas.mpl_connect('close_event', on_close)}
manager = plt.get_current_fig_manager()
center_window(manager.window, 1200, 600)
ax.tick_params(axis='y', labelsize=12)
# 显示图形
plt.tight_layout()
plt.show()
except Exception as e:
print(e)
def show_douyin_hot_search(root):
root.withdraw() # 隐藏初始窗口
hot_search = 'https://aweme-hl.snssdk.com/aweme/v1/hot/search/list/?detail_list=1'
headers = {
"User-Agent": "Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Mobile Safari/537.36"
}
hot_json = requests.get(hot_search, headers=headers).json()
hot_list = []
for data in hot_json['data']['word_list']:
item = data['word']
hot_list.append(item)
if len(hot_list) >= 20:
break
draw(hot_list, root, '某音')
def show_baidu_hot_search(root):
root.withdraw() # 隐藏初始窗口
# 发起HTTP请求获取某度热搜页面内容
url = 'https://top.baidu.com/board?tab=realtime'
response = requests.get(url)
html = response.content
# 使用BeautifulSoup解析页面内容
soup = BeautifulSoup(html, 'html.parser')
# 提取热搜数据
hot_list = []
for item in soup.find_all('div', {'class': 'c-single-text-ellipsis'}):
hot_list.append(item.text)
if len(hot_list) >= 20:
break
draw(hot_list, root, '某度')
def show_weibo_hot_search(root):
root.withdraw() # 隐藏初始窗口
hot_url = 'https://weibo.com/ajax/side/hotSearch'
response = requests.get(hot_url).json()
hotgovs = response['data']['hotgovs']
realtime = response['data']['realtime']
hot_list = []
# 获取热搜词
hot_list.extend(entry['word'] for entry in hotgovs)
# 获取实时热搜词
for entry in realtime:
hot_list.append(entry['word'])
if len(hot_list) >= 20:
break
draw(hot_list, root, '某博')
def center_window(root, width, height):
screen_width = root.winfo_screenwidth()
screen_height = root.winfo_screenheight()
x_coordinate = (screen_width - width) // 2
y_coordinate = (screen_height - height) // 2
root.geometry(f"{width}x{height}+{x_coordinate}+{y_coordinate}")
def main():
root = tk.Tk()
root.title("热搜榜获取")
# 设置初始窗口大小
window_width = 300
window_height = 200
center_window(root, window_width, window_height)
# initial_button = ttk.Button(root, text="热搜榜获取", command=root.deiconify)
# initial_button.pack(pady=(50, 10)) # 垂直方向上外边距为50,下外边距为10
douyin_button = ttk.Button(root, text="某音热搜榜", command=lambda: show_douyin_hot_search(root))
douyin_button.pack(pady=(30, 10)) # 垂直方向上外边距和下外边距都为10
baidu_button = ttk.Button(root, text="某度热搜榜", command=lambda: show_baidu_hot_search(root))
baidu_button.pack(pady=(10, 10)) # 垂直方向上外边距为10,下外边距为50
weibo_button = ttk.Button(root, text="某博热搜榜", command=lambda: show_weibo_hot_search(root))
weibo_button.pack(pady=(10, 30))
root.mainloop()
if __name__ == "__main__":
main()
python获取各热搜榜单并实现可视化