[python] CVE信息大集合

这段代码从cve.mitre.org和cve.scap.org.cn两个网站处爬取到相应信息。最终以漏洞的CVE编号为文件名将内容保存到其中。

代码运行的情况如下:

[python] CVE信息大集合_第1张图片

此时文件夹下会产生新的文件:

[python] CVE信息大集合_第2张图片

文件的内容为:

[python] CVE信息大集合_第3张图片

之后进行相应的容错实验,以CVE-2005-2125为例是有编号无记录的情况,程序运行之后,生成的文本内容如下:

[python] CVE信息大集合_第4张图片

再以CVE-2016-0001为例,看编号不存在的情况下,生成文件的内容:

[python] CVE信息大集合_第5张图片


程序代码:

# -*- coding: utf-8 -*
import urllib2,httplib
import re
import sys,time
import ctypes

TITLE = 'default title'
ENDESCRIPTION = 'default english description'
CNDESCRIPTION = 'default chinese description'
TIME = 'default time'
BULLETIN = 'default bulletin'
STD_OUTPUT_HANDLE = -11
FOREGROUND_DARKSKYBLUE = 0x03 # dark skyblue.
FOREGROUND_RED = 0x0c # red.
FOREGROUND_BLUE = 0x09 # blue.
FOREGROUND_GREEN = 0x0a # green.

# get handle
std_out_handle = ctypes.windll.kernel32.GetStdHandle(STD_OUTPUT_HANDLE)
 
def set_cmd_text_color(color, handle=std_out_handle):
    Bool = ctypes.windll.kernel32.SetConsoleTextAttribute(handle, color)
    return Bool
 
#reset white
def resetColor():
    set_cmd_text_color(FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE)

#dark sky blue
def printDarkSkyBlue(mess):
	set_cmd_text_color(FOREGROUND_DARKSKYBLUE)
	sys.stdout.write(mess)
	resetColor()

def eninfo(id):
	'Get the English info about the vul.In detail,this func will get English desc and vul publish time.'
	global ENDESCRIPTION
	global TIME
	enurl = "http://cve.mitre.org/cgi-bin/cvename.cgi?name="+id
	#print "正在从"+enurl+"获取信息"
	header = {
	'Host': 'cve.mitre.org',
	'Proxy-Connection': 'keep-alive',
	'Cache-Control': 'max-age=0',
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
	'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.132 Safari/537.36',
	'Accept-Language': 'zh-CN,zh;q=0.8',
	'Cookie': '__utmt=1; __utma=78438598.677229659.1437618458.1438155430.1438843412.3; __utmb=78438598.2.10.1438843412; __utmc=78438598; __utmz=78438598.1438843412.3.3.utmcsr=baidu|utmccn=(organic)|utmcmd=organic'
	}
	req = urllib2.Request(enurl,None,header)
	response = urllib2.urlopen(req)	
	htmlpage = response.read()
	
	try:
		ENDESCRIPTION = re.compile(r'Description.*?',re.DOTALL).findall(htmlpage)#在一个相对较大的范围捕捉英文描述
		ENDESCRIPTION = re.compile(r'.*?',re.DOTALL).findall(ENDESCRIPTION[0])#将范围进一步缩小得到英文描述
		ENDESCRIPTION = ENDESCRIPTION[0][16:-7]#仅取下来英文描述
	except IndexError,e:
		ENDESCRIPTION = '此漏洞还未被收录'
	for i in range(20):
		print "PROCESS:"+str(i)+"%\r",
		sys.stdout.flush()
		time.sleep(0.05)

	try:
		TIME = re.compile(r'Assigned.*?',re.DOTALL).findall(htmlpage)#在一个较大的范围捕捉时间信息
		TIME = re.compile(r'\(.*?\)',re.DOTALL).findall(TIME[0])
		TIME = TIME[0][1:-1]
	except IndexError,e:
		TIME = ''
	for i in range(21,40):
		print "PROCESS:"+str(i)+"%\r",
		sys.stdout.flush()
		time.sleep(0.05)
	
def cninfo(id):
	#print 'cninfo'
	global CNDESCRIPTION
	global TITLE
	global BULLETIN
	cnurl = "http://cve.scap.org.cn/"+id+".html"
	#print "正在从"+cnurl+"获取信息"
	header = {
	'Host': 'cve.scap.org.cn',
	'Connection': 'keep-alive',
	'Cache-Control': 'max-age=0',
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
	'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.132 Safari/537.36',
	'Accept-Language': 'zh-CN,zh;q=0.8',
	'Cookie': 'bdshare_firstime=1437619489480; _pk_ref.1.4171=%5B%22%22%2C%22%22%2C1438843771%2C%22http%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DAIqDjeti4hvqQIM_gh8hcn3M5yZnr8pBsmByN4KWhtKKL9mJ3moNKZKir4C6scKb%26wd%3D%26eqid%3Deef55dd50001d8db0000000355c30207%22%5D; Hm_lvt_ca227db14814d01f2e44f01433e48552=1437619469,1438843770; Hm_lpvt_ca227db14814d01f2e44f01433e48552=1438843890; _pk_id.1.4171=d3473bd4dc121139.1437619469.2.1438843890.1437619490.'
	}
	req = urllib2.Request(cnurl,None,header)
	response = urllib2.urlopen(req)
	htmlpage = response.read()
	
	TITLE = re.compile(r'',re.DOTALL).findall(htmlpage)
	TITLE = re.compile(r'".*?"',re.DOTALL).findall(TITLE[0])
	TITLE = TITLE[1][1:-1]
	for i in range(41,60):
		print "PROCESS:"+str(i)+"%\r",
		sys.stdout.flush()
		time.sleep(0.05)
		
	try:
		CNDESCRIPTION = re.compile(r'

.*?

',re.DOTALL).findall(htmlpage) CNDESCRIPTION = re.compile(r'

.*?

',re.DOTALL).findall(CNDESCRIPTION[0]) CNDESCRIPTION = CNDESCRIPTION[0][51:-4]#开头原来是3 CNDESCRIPTION = CNDESCRIPTION.replace('?','')#去除?; CNDESCRIPTION = CNDESCRIPTION.replace('
','')#去除
except IndexError,e: CNDESCRIPTION = '此漏洞无中文描述' for i in range(61,80): print "PROCESS:"+str(i)+"%\r", sys.stdout.flush() time.sleep(0.05) try: BULLETIN = re.compile(r'bulletin_cnnvd">.*?',re.DOTALL).findall(htmlpage) BULLETIN = re.compile(r'.*?',re.DOTALL).findall(BULLETIN[0]) BULLETIN = BULLETIN[0][56:-5]#开头原来是4 BULLETIN = BULLETIN.replace('?','')#去除? BULLETIN = BULLETIN.replace('
','')#去除
except IndexError,e: BULLETIN = '此漏洞无补丁信息' for i in range(81,101): print "PROCESS:"+str(i)+"%\r", sys.stdout.flush() time.sleep(0.05) if __name__=="__main__": cveid=raw_input("输入待查询的CVE:") eninfo(cveid) cninfo(cveid) fobj = open(cveid+'.txt','w') print >> fobj,"TITLE:\n"+TITLE+"\n" print >> fobj,"TIME:\n"+TIME+"\n" print >> fobj,"CNDESCRIPTION:\n"+CNDESCRIPTION+"\n" print >> fobj,"ENDESCRIPTION:\n"+ENDESCRIPTION+"\n" print >> fobj,"BULLETIN:\n"+BULLETIN fobj.close() print "漏洞信息已经保存在"+cveid+".txt" printDarkSkyBlue("Author: WANG,Peng")

完事儿

你可能感兴趣的:(python)