简单脚本分享
简单爬行页面 3.x与2.x
import urllib.request #3.x版本
url='http://www.baidu.com/'
def getHtml(url):
page=urllib.request.urlopen(url)
html=page.read().decode(encoding='utf-8',errors='strict')
return html
print(getHtml(url))
import requests #2.x版本
import string
headers = {
'Connection': 'Keep-Alive',
'Accept': 'text/html, application/xhtml+xml, */*',
'Accept-Language': 'en-US,en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36 LB'
}
url="https://www.qq.com"
res=requests.get(url,headers)
print(res.text)
post传参,设置cookie,截取返回页面固定长度 2.x
url='http://106.75.72.168:2222/index.php'
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, compress',
'Accept-Language': 'en-us;q=0.5,en;q=0.3',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:22.0) Gecko/20100101 Firefox/22.0',
'cookie': 'Hm_lvt_9d483e9e48ba1faa0dfceaf6333de846=1542198011; role=Zjo1OiJucXp2YSI7'
}
payload={'filename':'1.php','data[]':''}
r=requests.post(url,headers=headers,data=payload)
url="http://106.75.72.168:2222"+r.content[82:128]
r=requests.get(url)
print r.content
字典制作 各版本
with open('wordlist.txt','w+') as f:
for i in range(0,10):
for j in range(0,10):
for k in range(0,10):
for h in range(0,10):
f.write('1391040'+str(i)+str(j)+str(k)+str(h)+'\n')
f.close
保存本地到wordlist.txt文件里1391040xxxx生成后四位的字典。
python登陆网站 3.x
from urllib import request#导入urllib模块里的request
from urllib import parse#parse模块里的编码
from urllib.request import urlopen
values ={'zhanghao':'admin','mima':'admin'}
data=parse.urlencode(values).encode('utf-8')#提交类型不能为str,需要为byte类型,parse.urlencode方法的作用是把dict格式的参数转换为url参数,并以utf-8编码,可以拼接为HTTP请求。
url='http://127.0.0.1/login.php'
request=request.Request(url,data)
response=urlopen(request)
print(response.read().decode())#加入decode才能使网页解码成中文
整理 3.x版本
import urllib.parse#urlencode
import urllib.request#Request,urlopen
'''
response=urllib.request.urlopen("http://127.0.0.1")
print(response.read().decode())
'''
#设置header和data
'''
url='http://127.0.0.1/login.php'
user_agent='Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.93 Safari/537.36'
values={'zhanghao':'admin','mima':'admin'}
headers={'User-Agent':user_agent}
data=urllib.parse.urlencode(values).encode('utf-8')
request=urllib.request.Request(url,data,headers)
response=urllib.request.urlopen(request)
page=response.read()
print(page.decode())
'''
#设置代理 避免因为某个IP的访问次数过多导致的禁止访问
'''
enable_proxy = True
proxy_handler = urllib.request.ProxyHandler({"http":'http://some-proxy.com:8080'})
null_proxy_handler = urllib.request.ProxyHandler({})
if enable_proxy:
opener = urllib.request.build_opener(proxy_handler)
else:
opener = urllib.request.build_opener(null_proxy_handler)
urllib.request.install_opener(opener)
'''
#设置timeout
# urlopen与Request 区别https://blog.csdn.net/tao3741/article/details/75207879
'''
response=urllib.request.urlopen('http://127.0.0.1',timeout=10)
print(response.read().decode())
'''
#post put 等提交方式
'''
request=urllib.request.Request(url,data,headers)#post 直接写在data里
request=urllib.request.Request('http://127.0.0.1?a=1')#get直接写在url里
request = urllib.request.Request(url, data=data)#put和delete
request.get_method = lambda:'PUT' #or 'DELETE'#put和delete
'''
#使用DebugLog 把收发包的内容在屏幕上打印出来
'''
httpHandler = urllib.request.HTTPHandler(debuglevel=1)
httpsHandler = urllib.request.HTTPSHandler(debuglevel=1)
opener = urllib.request.build_opener(httpHandler, httpsHandler)
urllib.request.install_opener(opener)
response = urllib.request.urlopen('http://127.0.0.1', timeout = 5)
'''
#URLError异常属性判断
'''
request=urllib.request.Request('http://127.0.0.999')
try:
urllib.request.urlopen(request)
except urllib.error.URLError as e:
if hasattr(e, "code"): #hasattr 判断变量是否有某个属性
print(e.code)
if hasattr(e, "reason"):
print(e.reason)
else:
print("OK")
'''
参考:(https://www.cnblogs.com/dplearning/p/4854746.html)