简单网络爬虫

简单网络爬虫

import requests
import re
import os
import time

# 请求网页

headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36'
}
response = requests.get('https://www.sohu.com/a/223358396_773938', headers=headers)
html = response.text

# 解析网页
urls = re.findall('src="(.*.jpeg)"', html)
dir_name = './old_1'
if not os.path.exists(dir_name):
	os.mkdir(dir_name)

# 保存图片
for url in urls:
	time.sleep(1)
	file_name = url.split('/')[-1]
	response = requests.get(url, headers = headers)
	with open(dir_name + '/' + file_name, 'wb') as f:
		f.write(response.content)

你可能感兴趣的:(开发)