import urllib.request
import urllib.parse
# 1.项目需求下载豆瓣电影1-10页
def creat_request(page):
url = 'https://movie.douban.com/j/chart/top_list?type=5&interval_id=100:90&action=&'
data = {
'start': (page - 1) * 20,
'limit': '20'
}
data = urllib.parse.urlencode(data)
url = url + data
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.54'
}
request = urllib.request.Request(url=url, headers=headers)
return request
def get_content(request):
response = urllib.request.urlopen(request)
content = response.read().decode('utf-8')
return content
def download(page, content):
fp = open('豆瓣' + str(page) + '.json', 'w', encoding='utf-8')
fp.write(content)
fp.close()
if __name__ == '__main__':
start_page = int(input('请输入需要下载的开始页'))
end_page = int(input('请输入需要下载的结束页'))
for page in range(start_page, end_page + 1):
# 定制请求对象
request = creat_request(page)
# 模拟浏览器向服务器发送请求使用解码技术来获取请求的内容
content = get_content(request)
# 下载内容
download(page, content)