有关识别CMS(一)

在网上找到的一个CMS识别程序,如下,第二个是自己写的CMS识别,从MongoDB读取规则进行判断

#!/usr/bin/python
# -*- coding: UTF-8 -*-

import os
import sys
import threading
import urllib2
import random

is_identification = False
g_index = 0
lock = threading.Lock()

def usage():
    print '''
    python whatweb.py 线程数 目标网站
    example:
    python whatweb.py 20 http://www.baidu.com
    python whatweb.py 15 wwww.baidu.com
    '''

def list_file(dir):
    files = os.listdir(dir)
    return  files

def request_url(url='', data=None, header={}):
    page_content = ''
    request = urllib2.Request(url, data, header)

    try:
        response = urllib2.urlopen(request)
        page_content = response.read()
        #print page_content
    except Exception, e:
        pass

    return page_content

def whatweb(target):
    global is_identification
    global g_index
    global cms

    while True:
        if is_identification:
            break

        if g_index > len(cms)-1:
            break

        lock.acquire()
        eachline = cms[g_index]
        g_index = g_index + 1
        lock.release()

        if len(eachline.strip())==0 or eachline.startswith('#'):
            pass
        else:
            #eachline添加strip()防止空行,后面赋值出错
            url, pattern, cmsname = eachline.split('------')
            html = request_url(target+url)
            rate = float(g_index)/float(len(cms))
            ratenum = int(100*rate)
            sys.stdout.write(random.choice('x+') + ' ' + str(ratenum) + '% ' + target+url +  "\r")
            sys.stdout.flush()

            if pattern.upper() in html.upper():
                is_identification = True
                print "[*] 成功识别CMS:%s,匹配的URL:%s,匹配的规则:%s" % (cmsname.strip('\n').strip('\r'), url, pattern)
                break
    #print threading.currentThread().getName(),'exit'
    return


if __name__ == '__main__':

    if len(sys.argv) != 3:
        usage()
        sys.exit()

    threadnum = int(sys.argv[1])
    target_url = sys.argv[2]

    f = open('./cms.txt')
    cms = f.readlines()
    threads = []

    if target_url.endswith('/'):
        target_url = target_url[:-1]

    if target_url.startswith('http://') or target_url.startswith('https://'):
        pass
    else:
        target_url = 'http://' + target_url

    for i in range(threadnum):
        t = threading.Thread(target=whatweb, args=(target_url,))
        threads.append(t)

    print u'[*] 开启%d线程'  % threadnum

    for t in threads:
        t.start()

    for t in threads:
        t.join()

    print u"\n[*] All threads exit"

从数据库读取并且存在MongoDB中

import mongo
import pymongo
import builtwith
from Config import FileConfig

import sys

reload(sys)

sys.setdefaultencoding('utf8')

#----------------------------------------------------------------------
def get_cms_rules(url):
    """"""
    
    try:
        s = builtwith.parse(url)
        print s
        mongo.ls_Info.update({"URL":url},
                             {"$set":s},
                             upsert = True)
        print 'success'
    except Exception,e:
        pass

    
'''
#----------------------------------------------------------------------
def get_cms_rules(url):
    """"""
    cms_num = 1
    conn = mongo.ls_db.CMS_Rules
    cms_data = conn.find()
    for i in cms_data:
        for cms_rules in i['cms_rules']['value']:
            if len(cms_rules.strip()) == 0 or cms_rules[0] == '#':
                pass
            else:
                url_path, pattern, cms_name = cms_rules.split('------')
                html = link_code(str(url)+str(url_path))
                print  "[+] NO." + str(cms_num) + " cms_rule"
                cms_num = int(cms_num) + 1
                
                print str(url)+str(url_path)
                print len(html)
                if pattern.upper() in html.upper():
                    print '[+]url: ' + url + 'path:' + url_path + 'cms' + cms_name
                    mongo.ls_Info.update({'URL': url},
                                            {"$set":{'url_path':url_path,'cms_name':cms_name.strip('\n').strip('\r')}},
                                            upsert = True)
                    print 'one step'
                    break
                
#get_cms_rules('https://www.www.me/')
'''

你可能感兴趣的:(有关识别CMS(一))