python+处理日志+处理URL防止SQL注入

regexes = {"regex": {"regexesSqlInjection": [{"regex": "select.+(from|limit)", "remarks": "", "score": ""},
                                             {"regex": "(?:(union(.*?)select))", "remarks": "", "score": ""},
                                             {"regex": "group(\\s*)+by(\\s*)", "remarks": "", "score": ""},
                                             {"regex": "order(\\s*)+by(\\s*)", "remarks": "", "score": ""},
                                             {"regex": "waitfor(\\s*)+delay(\\s*)+'\\d", "remarks": "", "score": ""}, {
                                                 "regex": "select([\\s\\S]*?)case([\\s\\S]*?)when([\\s\\S]*?)then([\\s\\S]*?)else",
                                                 "remarks": "", "score": ""},
                                             {"regex": "convert\\(+(int|char)", "remarks": "", "score": ""},
                                             {"regex": "sleep\\((\\s*)(\\d*)(\\s*)\\)", "remarks": "", "score": ""},
                                             {"regex": "(insert|replace)(\\s*)+into.+values(\\s|)\\(", "remarks": "",
                                              "score": ""},
                                             {"regex": "exec.+(xp_cmdshell|master\\.dbo\\.)", "remarks": "",
                                              "score": ""},
                                             {"regex": "declare.+@.+exec.+@", "remarks": "", "score": ""}],
                     "regexesSqlInjectionLow": [{"regex": "benchmark\\((.*)\\,(.*)\\)  ", "remarks": "", "score": ""},
                                                {"regex": "base64_decode\\(", "remarks": "", "score": ""},
                                                {"regex": "(?:from(\\s*)+information_schema(\\s*))", "remarks": "",
                                                 "score": ""},
                                                {"regex": "(?:(?:current_)user|database|schema|connection_id)\\s*\\(",
                                                 "remarks": "", "score": ""},
                                                {"regex": "into(\\s+)+(?:dump|out)file\\s*", "remarks": "",
                                                 "score": ""}],
                     "regexesXss": [{"regex": "\\.+\\", "remarks": "", "score": ""}, {
                         "regex": "\\<(script|script/xss)(\\s*)+(.|)+src(\\s*|)+\\=(\\s*|)+('|\"|)(\\s*|)+(http:|https:|//).+\\>(.|)+\\",
                         "remarks": "", "score": ""}, {
                                        "regex": "\\<(img|body|input|iframe|frame|embed|bgsound)(\\s*)+(.|)+src(\\s*|)+\\=(\\s*|)+('|\"|)(\\s*|)+(http:|https:|//|redirect|javascript:|vbscript:).+\\>",
                                        "remarks": "", "score": ""}, {
                                        "regex": "\\<(base|link|a)(\\s*)+(.|)+href(\\s*|)+\\=(\\s*|)+('|\")(\\s*|)+(http:|https:|//|javascript:).+\\>",
                                        "remarks": "", "score": ""}, {
                                        "regex": "\\",
                                        "remarks": "", "score": ""}, {
                                        "regex": "\\<(body|table)(\\s*)+(.|)+background(\\s*|)+\\=(\\s*|)+('|\"|)(\\s*|)+(http:|https:|//|javascript:).+\\>",
                                        "remarks": "", "score": ""}, {
                                        "regex": "\\+(.|)+\\{(.|)+background(|-image)(\\s*|)+:(.|)+url(\\s*|)+\\((\\s*|)+('|\"|)(\\s*|)+(http:|https:|javascript:).+\\",
                                        "remarks": "", "score": ""}, {
                                        "regex": "\\",
                                        "remarks": "", "score": ""}, {
                                        "regex": "\\",
                                        "remarks": "", "score": ""}, {
                                        "regex": "\\+(.|)+\\{(.|)+list-style-image(\\s*|)+:(.|)+url(\\s*|)+\\((\\s*|)+('|\"|)(\\s*|)+(http:|https:|//|javascript:).+\\",
                                        "remarks": "", "score": ""}],
                     "regexesBrowseDirectory": [{"regex": "\\.\\./\\.\\./+\\W", "remarks": "", "score": ""},
                                                {"regex": "(?:etc\\/\\W*passwd)", "remarks": "", "score": ""},
                                                {"regex": "c:\\winnt\\system32", "remarks": "", "score": ""},
                                                {"regex": "\\.(svn|htaccess|bash_history)", "remarks": "",
                                                 "score": ""}],
                     "regexesElse": [{"regex": "(curl|wget)([\\s\\S]*?).php", "remarks": "", "score": ""},
                                     {"regex": "\\$_(get|post|cookie|files|session|env|phplib|globals|server)\\[",
                                      "remarks": "", "score": ""}, {
                                         "regex": "/(attachments|upimg|images|css|uploadfiles|html|uploads|templets|static|template|data|inc|forumdata|upload|includes|cache|avatar)/(\\w+).(php|jsp)",
                                         "remarks": "", "score": ""}, {
                                         "regex": "(?:define|eval|file_get_contents|include|require|require_once|shell_exec|phpinfo|system|passthru|preg_\\w+|execute|echo|print|print_r|var_dump|(fp)open|alert|showmodaldialog)\\(",
                                         "remarks": "", "score": ""},
                                     {"regex": "/+\\w+\\.(bak|inc|old|mdb|sql|backup|java|class)$", "remarks": "",
                                      "score": ""},
                                     {"regex": "(vhost|bbs|host|wwwroot|www|site|root|hytop|flashfxp).*\\.rar",
                                      "remarks": "", "score": ""},
                                     {"regex": "(onmouseover|onerror|onload)\\='", "remarks": "", "score": ""},
                                     {"regex": "xwork.methodaccessor ", "remarks": "", "score": ""},
                                     {"regex": "xwork\\.MethodAccessor", "remarks": "", "score": ""},
                                     {"regex": "(gopher|doc|php|glob|file|phar|zlib|ftp|ldap|dict|ogg|data)\\:\\/  ",
                                      "remarks": "", "score": ""},
                                     {"regex": "(phpmyadmin|jmx-console|jmxinvokerservlet)", "remarks": "",
                                      "score": ""}, {"regex": "java\\.lang   ", "remarks": "", "score": ""},
                                     {"regex": "\\:\\$", "remarks": "", "score": ""}]}, "version": "0.1",
           "name": "wafRegex", "update": "20170106"}
# -*- coding: UTF-8 -*
import sys
sys.path.append('../vsm')
sys.path.append('../bayes')
reload(sys)
sys.setdefaultencoding('utf8')

import re
import urllib
import pandas as pd

from baishan_regex import regexes as regexes1
#from baishan_rules4 import regexes as regexes1

def uriFilter(regexes,uri):##正则和对应的词库值(string)
    uri = unicode(str(uri), errors='ignore')
    ##print uri
    uri = urllib.unquote(uri).lower()#解码并进行小写化
    ##print uri
    i = 0
    regexes_mark=dict()
    regexes_mark=regexes1["regex"]
    regexes_group=list()
    for st in regexes_mark:
        regexes_group=regexes_mark[st]
        regexes_real=dict()
        for i in range(len(regexes_group)):
            regexes_real=regexes_group[i]
            #print regexes_real
            # 将正则表达式编译成Pattern对象
            try:
                pattern = re.compile(r'%s'%(regexes_real["regex"]))##把正则表达式进行编译,生成pattern对象,r表示%s原样表示,不是用转义
            except:
                print regexes_real["regex"]
            # 使用Pattern匹配文本,获得匹配结果,无法匹配时将返回None
            match = pattern.search(uri)
            if match:
                # 使用Match获得分组信息
                #print match.group()
                print 'regex-%s is matched:'%(regexes_real["regex"])
                print regexes_real["score"]
                i = i + 1
    if i != 0:
        result = 'attack'
    else:
        result = 'normal'
    return result

#---------------------获取参数名------------------
def getParam(url):
    paramSet = []
    try:
        values = url.split('?',1)[1]##以?分割一次取序列为1
    except:
        values = url
    if '&' in values:
        for  key_value  in  values.split('&'):##url中以&分割多个字符串
            paramSet.append(key_value.split('=',1)[0]+'=')##以=分割一次,去序列为0的,也就是等号=前。比如format=json,这个就添加了format=
    elif '=' in values:
        paramSet.append(values.split('=',1)[0]+'=')
    #print paramSet
    return paramSet

#---------------------得到词库---------------------
def getValue(line,splits):
    if splits in line:
        f = line.split(splits,1)[1]##以splits分割一次,取序列为1的
    else:
        f = ''
    if '&' in f: ##再次处理,下面以&分割取序列为0的
        f = f.split('&')[0]
    else:
        f = f
    #print f
    return f

def testFilter(regexes,logPath):
    #逐行读取文件
    file = open(logPath)
    index = 0
    while 1:
        line = file.readline()
        if not line:
            break
        index = index + 1
        line = line.split('	',2)[2]##以空白分割两次,取序列为2的。
        if getParam(line)!=[]:##列表表URL信息抽取出来了
            for v in getParam(line):##又重新运行,写,时间有耗费。v是列表中一个个参数值
                 t =  uriFilter(regexes,getValue(line,v))
                 if t == 'attack':
                    #print '%s:%s'%(index,line)
                    break
                 else:
                    continue

testFilter(regexes1,'t01.log')
fhand=open('testRulesResultdata0.txt')
counts=dict()
for line in fhand:
    if line.startswith("regex-"):
        t=line.split()
        if len(t)>=1 and t[0] not in counts:
            counts[t[0]]=1
        elif len(t)>=1:
            counts[t[0]]+=1
for str in counts:
    print str,counts[str]



你可能感兴趣的:(数据库后台)