1. 需要的类库
import requests
from bs4 import BeautifulSoup
import pandas as pd
2. 请求榜单
def fetch_ranking_data():
url = "https://m.xxx.com/rankm/" #某家
response = requests.get(url)
if response.status_code == 200:
return response.content
else:
print(f"Error fetching data. Status code: {response.status_code}")
return None
3. 解析响应
def parse_html(html_content):
soup = BeautifulSoup(html_content, 'html.parser')
rank_items = soup.find_all('div', class_='placeholder one-img-plc')
data = []
for rank_item in rank_items:
rank_num = rank_item.select_one('.rank-num').text
title = rank_item.select_one('.plc-title').text
url = rank_item.select_one('a')['href']
data.append({
'Rank': rank_num,
'Title': title,
'URL': url
})
return data
4.输出文件
def create_excel(data):
df = pd.DataFrame(data)
df.to_excel('ranking_data.xlsx', index=False)
print("Excel file created successfully.")
5. 成果展示