因为项目需要,长时间使用360的quake和奇安信的hunter进行资产收集。但是收集到的资产很多无法访问,hunter的都是200。所以写了一个简单的脚本,让quake和hunter进行对比,去掉重复部分,然后再探测存活,存活url导入新的文本。
# -*- couding: utf-8 -*-
from modulefinder import STORE_GLOBAL
import os
from tkinter import E
import requests
def qc(url1,url2):
#读取奇安信的文件内容
for i in url1.readlines():
str1.append(i.replace("\n",""))
#读取360的文件内容
for j in url2.readlines():
str2.append(j.replace("\n",""))
#取出重复的内容
url_re = [] #定义重复的数组
for line in str2:
if line in str1:
url_re.append(line)
#str_all = set(str1 + str2) #将两个文件合并
#for i in re:
#if i in str_all:
#str_all.remove(i) #去掉重复的部分
#去除360.txt重复的部分
print("============开始去重================")
for g in url_re:
if g in str2:
print("重复的url为:" + g)
str2.remove(g)
#重新写入文件
#for str in str2:
# print(str)
# with open("360_quChong.txt","a+",encoding="utf-8") as f:
# f.write(str + '\n')
url_wancheng = str2
return url_wancheng
def url_scan(urls):
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0'}
url_save = open("360_OK.txt",'w')
num = 0
s1 = 0
for url in urls:
num += 1
try:
head = requests.get(url,headers = headers,timeout = 5 ).status_code
if head == 200:
s1 += 1
print("\033[0;31;40m目标存活:\033[0m" + url)
url_save.write(url + '\n')
except:
print("\033[0;32;40m访问失败:\033[0m")
print("去重后共有url:%d 存活:%d" %(num,s1))
if __name__ == "__main__":
hunter = "qianxin.txt"
quake = "360.txt"
#定义两个数组分别装两个文件的url
str1 = []
str2 = []
url1 = open(hunter,'r')
url2 = open(quake,'r')
url_list = qc(url1,url2)
print("=============开始探测存活===============")
url_scan(url_list)
print("\033[0;32;40m=====探测结束,存活已导入360_OK.txt=====\033[0m")
代码很简单,只探测了200的返回值。有不足的地方还望各位大佬们指正。