python练习--获取网页数据并写入excel保存


from bs4 import BeautifulSoup
import urllib.request

url = 'https://movie.douban.com/top250'

# 获取网页中所有的"下一页"的链接地址url
def get_links(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36'
    }
    request = urllib.request.Request(url, headers=headers)
    response = urllib.request.urlopen(request)

    html = response.read()
    soup = BeautifulSoup(html, 'lxml')
    pages = soup.find('div', class_='paginator').find_all('a')
    linklist = list()
    for page in pages:
        str = url + page.get('href')
        linklist.append(str)
    return linklist


# 根据url获取页面中的电影名称和分数,并添加到传入的列表中
def get_info(url, score_list):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36'
    }
    request = urllib.request.Request(url, headers=headers)
    response = urllib.request.urlopen(request)

    html = response.read()
    soup = BeautifulSoup(html, 'lxml')

    movies = soup.find_all('div', class_='info')

    for movie in movies:
        title = movie.find('span', class_='title').get_text()
        rating_num = movie.find('span', class_='rating_num').get_text()
        score_list.append({'title': title, 'score': rating_num})

# 声明一个空的列表
scoreList = list()

# 根据url获取所有的子链接url
linklist = get_links(url)

#获取主url中的电影名称和分数
get_info(url,scoreList)

#获取子链接url中的电影名称和分数,并写入到scoreList中 相当于java中的list套map
for link in linklist:
    get_info(link, scoreList)

# 引入openpyxl,写入到excel文件中
from openpyxl import Workbook
wb = Workbook()
sheet = wb.active

#表头的输入
sheet['A1'].value = 'number'
sheet['B1'].value = 'title'
sheet['C1'].value = 'score'

# 将列表中的数据,全部写入到Excel中,并进行保存
a = 2
for d in scoreList:
    title = d['title']
    score = d['score']
    sheet.cell(row=a, column=1).value=a-1
    sheet.cell(row=a, column=2).value=title
    sheet.cell(row=a, column=3).value=score
    a += 1

# 保存文件
wb.save('douban_top_movie.xlsx')


你可能感兴趣的:(python学习,python,excel,开发语言)