python中基于网络请求的模块,模拟浏览器发送请求
pip install requests
指定 url
发起请求 get/post
获取相应数据
持久化存储
import requests
url = "https://www.sogou.com/"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"
}
resp = requests.get(url=url,headers=headers)
page = resp.text
with open("sougou.html","w",encoding="utf-8") as f:
f.write(page)
resp.close()
print(page
import requests
url = "https://www.sogou.com/web"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"
}
name = input("请输入要查询的内容:")
data = {
"query":name
}
resp = requests.get(url=url,headers=headers,params=data)
with open(name+".html","w",encoding="utf-8") as f:
f.write(resp.text)
resp.close()
print(resp.text)
import requests
url = "https://fanyi.baidu.com/sug"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"
}
words = input("请输入要查询的单词:")
data ={
"kw":words
}
resp =requests.post(url=url,headers=headers,data=data)
response = resp.json()
for i in response["data"]:
print(i)
resp.close()
请求方法为 post,需要携带参数
响应数据类型类 jason 类型,返回的是obj对象,字典类型
import json
import requests
url = "https://movie.douban.com/j/chart/top_list"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"
}
data = {
"type":24,
"interval_id": "100:90",
"action": "",
"start": 0,
"limit": "20"
}
resp = requests.get(url=url,headers=headers,params=data)
resp_obj = resp.json()
with open("豆瓣电影排行榜.json","w",encoding="utf-8") as f:
json.dump(resp_obj,f,ensure_ascii=False)
print("爬取完成!!!")
resp.close()
json 类型文件保存使用方法为**json.dump(obj,文件名,ensure_ascii=False)**函数
import requests
url = "http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"
}
params = {
"op":"keyword"
}
data = {
"cname": "",
"pid": "",
"keyword": input("请输入要查询的城市:"),
"pageIndex": 1,
"pageSize": 10
}
resp = requests.post(url=url,headers=headers,params=params,data=data)
with open("肯德基餐厅位置.text","w",encoding="utf-8") as fp:
fp.write(resp.text)
print("爬取完成!!!")
resp.close()