注释:本文中所有代码均为Python2.7
很多时候我们需要使用代理,而且是使用全局的代理,这个是方便我们隐藏自己的身份和让网络达到本来不可达的目的地
1.http代理
import requests
proxies = {
"http": "http://10.10.1.10:3128",
"https": "http://10.10.1.10:1080",
}
requests.get("http://example.org", proxies=proxies)
2.socks代理
要安装一个模块pip install requests[socks]
import requests
proxies = {
'http': 'socks5://user:pass@host:port',
'https': 'socks5://user:pass@host:port'
request.get('http://www.baidu.com',proxies=proxies)
1.http代理
仅仅当前连接做代理,我们可能会在cc或者ddos的时候,需要这样的随机的代理,来模拟正常的访问行为
import urllib2
proxy_handler = urllib2.ProxyHandler({'http': '121.193.143.249:80'})
opener = urllib2.build_opener(proxy_handler)
r = opener.open('http://httpbin.org/ip')
print(r.read())
全局的代理,如果我们写一个业务代码,大多数时候我们需要一个全局代理,当然,这里还是推荐使用requests
import urllib2
proxy = urllib2.ProxyHandler({'http': '127.0.0.1:8087'})
opener = urllib2.build_opener(proxy)
urllib2.install_opener(opener)
response = urllib2.urlopen('http://feeds2.feedburner.com/MobileOrchard')
print response.read()
json是一种轻量级的数据交换格式,我们有的时候会需要使用各种网站的api进行交互,以前使用xml比较多,现在换成json了
函数 | 描述 |
---|---|
json.dumps | 将python对象编码成json字符串 |
json.loads | 将已编码的json字符串解码为python |
>>> import json
>>> #编码json
>>> data={'aa':1,'bb':2,'cc':3}
>>> j=json.dumps(data)
>>>type(j)
>>> #解码json
>>> jsonData = '{"a":1,"b":2,"c":3,"d":4,"e":5}';
>>> text = json.loads(jsonData)
>>> type(text)
####json api使用实例
#coding=utf-8
import requests
import json
r=requests.get('http://www.sojson.com/open/api/weather/json.shtml?city=武汉')
aa=json.loads(r.text)
print aa['city']
print aa['data']['yesterday']['notice']
print aa['data']['yesterday']['high']
print aa['data']['yesterday']['low']
#未来的数据,由于这里没有转换成完整的字典
for i in aa['data']['forecast']:
for x in i:
print i[x]
通过对特定模板的网站的已知目录的扫描,来猜出web网站的目录结构,尤其可以找到登录界面,后台界面,查询界面等
####关键代码实现
#coding=utf-8
import urllib2
def scan_url(url):
try:
header={}
header={"User-Agent"}=’Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)’
r=urllib2.Reuqests(url,headers=header)
res=urllib2.urlopen(r)
if len(res.read()): #判断返回是否有内容
print "[%d] => %s"%(response.code,url)#为什么是%d??
except urllib2.URLError ,e:
if hasattr(e,'code') and e.code !=404:#hassttr是判断class属性的,暂不介绍
print "!!! %d=> %s "%(e.code,url)
涉及到线程和队列的知识,后面讲解
import urllib2
import threading
import Queue
import urllib
threads = 5
target_url = "http://testphp.vulnweb.com"
wordlist_file = "/tmp/all.txt" # from SVNDigger
resume = None #设定一个结束开关
user_agent = "Mozilla/5.0 (X11; Linux x86_64; rv:19.0) Gecko/20100101 Firefox/19.0"
def build_wordlist(wordlist_file):
# read in the word list
fd = open(wordlist_file, "rb")
raw_words = fd.readlines()
fd.close()
found_resume = False
words = Queue.Queue()
for word in raw_words:
word = word.rstrip()#这里是为了去掉空格和回车符
if resume is not None:
if found_resume:
words.put(word)
else:
if word == resume:
found_resume = True
print "Resuming wordlist from: %s" % resume
else:
words.put(word)
return words
def dir_bruter(word_queue, extensions=None):
while not word_queue.empty():
attempt = word_queue.get()
attempt_list = []
# check to see if there is a file extension; if not, it's a directory
# path we're bruting
if "." not in attempt:
attempt_list.append("/%s/" % attempt)
else:
attempt_list.append("/%s" % attempt)
# if we want to bruteforce extensions
if extensions:
for extension in extensions:
attempt_list.append("/%s%s" % (attempt, extension))
# iterate over our list of attempts
for brute in attempt_list:
url = "%s%s" % (target_url, urllib.quote(brute))
try:
headers = {}
headers["User-Agent"] = user_agent
r = urllib2.Request(url, headers=headers)
response = urllib2.urlopen(r)
if len(response.read()):
print "[%d] => %s" % (response.code, url)
except urllib2.URLError as e:
if hasattr(e, 'code') and e.code != 404:#这里需要类的知识
print "!!! %d => %s" % (e.code, url)
pass
word_queue = build_wordlist(wordlist_file)
extensions = [".php", ".bak", ".orig", ".inc"]
for i in range(threads):
t = threading.Thread(target=dir_bruter, args=(word_queue,extensions,))
t.start()
1.如何提高爆破速度?