regexes = {"regex": {"regexesSqlInjection": [{"regex": "select.+(from|limit)", "remarks": "", "score": ""},
{"regex": "(?:(union(.*?)select))", "remarks": "", "score": ""},
{"regex": "group(\\s*)+by(\\s*)", "remarks": "", "score": ""},
{"regex": "order(\\s*)+by(\\s*)", "remarks": "", "score": ""},
{"regex": "waitfor(\\s*)+delay(\\s*)+'\\d", "remarks": "", "score": ""}, {
"regex": "select([\\s\\S]*?)case([\\s\\S]*?)when([\\s\\S]*?)then([\\s\\S]*?)else",
"remarks": "", "score": ""},
{"regex": "convert\\(+(int|char)", "remarks": "", "score": ""},
{"regex": "sleep\\((\\s*)(\\d*)(\\s*)\\)", "remarks": "", "score": ""},
{"regex": "(insert|replace)(\\s*)+into.+values(\\s|)\\(", "remarks": "",
"score": ""},
{"regex": "exec.+(xp_cmdshell|master\\.dbo\\.)", "remarks": "",
"score": ""},
{"regex": "declare.+@.+exec.+@", "remarks": "", "score": ""}],
"regexesSqlInjectionLow": [{"regex": "benchmark\\((.*)\\,(.*)\\) ", "remarks": "", "score": ""},
{"regex": "base64_decode\\(", "remarks": "", "score": ""},
{"regex": "(?:from(\\s*)+information_schema(\\s*))", "remarks": "",
"score": ""},
{"regex": "(?:(?:current_)user|database|schema|connection_id)\\s*\\(",
"remarks": "", "score": ""},
{"regex": "into(\\s+)+(?:dump|out)file\\s*", "remarks": "",
"score": ""}],
"regexesXss": [{"regex": "\\", "remarks": "", "score": ""}, {
"regex": "\\<(script|script/xss)(\\s*)+(.|)+src(\\s*|)+\\=(\\s*|)+('|\"|)(\\s*|)+(http:|https:|//).+\\>(.|)+\\",
"remarks": "", "score": ""}, {
"regex": "\\<(img|body|input|iframe|frame|embed|bgsound)(\\s*)+(.|)+src(\\s*|)+\\=(\\s*|)+('|\"|)(\\s*|)+(http:|https:|//|redirect|javascript:|vbscript:).+\\>",
"remarks": "", "score": ""}, {
"regex": "\\<(base|link|a)(\\s*)+(.|)+href(\\s*|)+\\=(\\s*|)+('|\")(\\s*|)+(http:|https:|//|javascript:).+\\>",
"remarks": "", "score": ""}, {
"regex": "\\",
"remarks": "", "score": ""}, {
"regex": "\\<(body|table)(\\s*)+(.|)+background(\\s*|)+\\=(\\s*|)+('|\"|)(\\s*|)+(http:|https:|//|javascript:).+\\>",
"remarks": "", "score": ""}, {
"regex": "\\",
"remarks": "", "score": ""}, {
"regex": "\\",
"remarks": "", "score": ""}, {
"regex": "\\",
"remarks": "", "score": ""}, {
"regex": "\\",
"remarks": "", "score": ""}],
"regexesBrowseDirectory": [{"regex": "\\.\\./\\.\\./+\\W", "remarks": "", "score": ""},
{"regex": "(?:etc\\/\\W*passwd)", "remarks": "", "score": ""},
{"regex": "c:\\winnt\\system32", "remarks": "", "score": ""},
{"regex": "\\.(svn|htaccess|bash_history)", "remarks": "",
"score": ""}],
"regexesElse": [{"regex": "(curl|wget)([\\s\\S]*?).php", "remarks": "", "score": ""},
{"regex": "\\$_(get|post|cookie|files|session|env|phplib|globals|server)\\[",
"remarks": "", "score": ""}, {
"regex": "/(attachments|upimg|images|css|uploadfiles|html|uploads|templets|static|template|data|inc|forumdata|upload|includes|cache|avatar)/(\\w+).(php|jsp)",
"remarks": "", "score": ""}, {
"regex": "(?:define|eval|file_get_contents|include|require|require_once|shell_exec|phpinfo|system|passthru|preg_\\w+|execute|echo|print|print_r|var_dump|(fp)open|alert|showmodaldialog)\\(",
"remarks": "", "score": ""},
{"regex": "/+\\w+\\.(bak|inc|old|mdb|sql|backup|java|class)$", "remarks": "",
"score": ""},
{"regex": "(vhost|bbs|host|wwwroot|www|site|root|hytop|flashfxp).*\\.rar",
"remarks": "", "score": ""},
{"regex": "(onmouseover|onerror|onload)\\='", "remarks": "", "score": ""},
{"regex": "xwork.methodaccessor ", "remarks": "", "score": ""},
{"regex": "xwork\\.MethodAccessor", "remarks": "", "score": ""},
{"regex": "(gopher|doc|php|glob|file|phar|zlib|ftp|ldap|dict|ogg|data)\\:\\/ ",
"remarks": "", "score": ""},
{"regex": "(phpmyadmin|jmx-console|jmxinvokerservlet)", "remarks": "",
"score": ""}, {"regex": "java\\.lang ", "remarks": "", "score": ""},
{"regex": "\\:\\$", "remarks": "", "score": ""}]}, "version": "0.1",
"name": "wafRegex", "update": "20170106"}
# -*- coding: UTF-8 -*
import sys
sys.path.append('../vsm')
sys.path.append('../bayes')
reload(sys)
sys.setdefaultencoding('utf8')
import re
import urllib
import pandas as pd
from baishan_regex import regexes as regexes1
#from baishan_rules4 import regexes as regexes1
def uriFilter(regexes,uri):##正则和对应的词库值(string)
uri = unicode(str(uri), errors='ignore')
##print uri
uri = urllib.unquote(uri).lower()#解码并进行小写化
##print uri
i = 0
regexes_mark=dict()
regexes_mark=regexes1["regex"]
regexes_group=list()
for st in regexes_mark:
regexes_group=regexes_mark[st]
regexes_real=dict()
for i in range(len(regexes_group)):
regexes_real=regexes_group[i]
#print regexes_real
# 将正则表达式编译成Pattern对象
try:
pattern = re.compile(r'%s'%(regexes_real["regex"]))##把正则表达式进行编译,生成pattern对象,r表示%s原样表示,不是用转义
except:
print regexes_real["regex"]
# 使用Pattern匹配文本,获得匹配结果,无法匹配时将返回None
match = pattern.search(uri)
if match:
# 使用Match获得分组信息
#print match.group()
print 'regex-%s is matched:'%(regexes_real["regex"])
print regexes_real["score"]
i = i + 1
if i != 0:
result = 'attack'
else:
result = 'normal'
return result
#---------------------获取参数名------------------
def getParam(url):
paramSet = []
try:
values = url.split('?',1)[1]##以?分割一次取序列为1
except:
values = url
if '&' in values:
for key_value in values.split('&'):##url中以&分割多个字符串
paramSet.append(key_value.split('=',1)[0]+'=')##以=分割一次,去序列为0的,也就是等号=前。比如format=json,这个就添加了format=
elif '=' in values:
paramSet.append(values.split('=',1)[0]+'=')
#print paramSet
return paramSet
#---------------------得到词库---------------------
def getValue(line,splits):
if splits in line:
f = line.split(splits,1)[1]##以splits分割一次,取序列为1的
else:
f = ''
if '&' in f: ##再次处理,下面以&分割取序列为0的
f = f.split('&')[0]
else:
f = f
#print f
return f
def testFilter(regexes,logPath):
#逐行读取文件
file = open(logPath)
index = 0
while 1:
line = file.readline()
if not line:
break
index = index + 1
line = line.split(' ',2)[2]##以空白分割两次,取序列为2的。
if getParam(line)!=[]:##列表表URL信息抽取出来了
for v in getParam(line):##又重新运行,写,时间有耗费。v是列表中一个个参数值
t = uriFilter(regexes,getValue(line,v))
if t == 'attack':
#print '%s:%s'%(index,line)
break
else:
continue
testFilter(regexes1,'t01.log')
fhand=open('testRulesResultdata0.txt')
counts=dict()
for line in fhand:
if line.startswith("regex-"):
t=line.split()
if len(t)>=1 and t[0] not in counts:
counts[t[0]]=1
elif len(t)>=1:
counts[t[0]]+=1
for str in counts:
print str,counts[str]