反爬
反反爬:
1.请求头伪造
2.多次采集数据 Time.sleep(random)
3.ip地址的代理(推荐
import urllib.request
from urllib import request
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36"
}
url = "https://www.baidu.com"
response = request.Request(url=url,headers=headers)
resp = request.urlopen(response)
data = resp.read()
print(data)
with open("baidu.html","wb") as f:
f.write(data)
from urllib import request
import random
us = [
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50
(KHTML, like Gecko) Version/5.1 Safari/534.50"
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)"
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"
"Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1"
]
headers = {
"User-Agent":random.choice(us)
}
print(headers)
url = "https://www.baidu.com"
response = request.Request(url=url,headers=headers)
resp = request.urlopen(response)
data = resp.read()
print(data)
# with open("qq.html","wb") as f:
# f.write(data)
import random
from urllib import request
import chardet
us = [
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50
(KHTML, like Gecko) Version/5.1 Safari/534.50"
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)"
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"
"Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1"
]
headers = {
"User-Agent":random.choice(us)
}
url = "http://www.sina.com.cn"
#真正的请求头对象
req = request.Request(url=url,headers=headers)
resp = request.urlopen(req)
data = resp.read()
#返回的是字典对象
res = chardet.detect(data)
char = res.get("encoding")
print(char)
#print(res)
html = data.decode(char)
# html = data.decode("gb2312",errors="ignore")
# #先转为二进制数据 转为字符串
# print(html)
# with open("qq.html","wb") as f:
# f.write(data)
from urllib import request
from urllib import parse
url = "https://www.baidu.com/s?"
wd = input("请输入你要搜索的关键字:")
params = {
"wd":wd
}
ps = parse.urlencode(params)
print(ps)
from urllib import request
from urllib import parse
headers = {
"User-Agent":"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us)
AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50"
}
url = "https://www.baidu.com/s?"
wd = input("请输入你要搜索的关键字:")
params = {
"wd":wd
}
#获取编码格式
ps = parse.urlencode(params)
print(ps)
url = url + ps
print(url)
# rsp = request.urlopen(url)
req = request.Request(url=url,headers=headers)
resp = request.urlopen(req)
data = resp.read()
print(data)
with open("get.html","wb") as f:
f.write(data)
import urllib.request
import urllib
url = "https://fanyi.youdao.com/translate?"
headers = {
"User-Agent":"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us)
AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50"
}
word = input("请输入你要翻译的单词:")
from_data ={
"i":word,
"from":"AUTO",
"to":"AUTO",
"smartresult":"dict",
"client":"fanyideskweb",
"doctype":"json",
"version":"2.1",
"keyfrom":"fanyi.web",
"action":"FY_BY_REALTlME",
}
data = urllib.parse.urlencode(from_data)
data = data.encode(encoding = "utf-8")
request = urllib.request.Request(url,data=data,headers=headers)
response = urllib.request.urlopen(request)
html = response.read().decode(encoding="utf-8").strip()
print(html)