今天想上fofa找几个网站实战下,fofa高级会员有点贵,用的一个共享号
当天请求过多,明天再试,恩,那不调用api了,自己写一个爬虫,获取web界面的总行了吧
多线程爬取 3.8s
下面是异步爬取 2.16s,还是快得到点的哈
代码都放下边 只需要将'authorization' 换成自己的 上号后F12就行了
fofa的普通会员跟高级会员网站查询量是一样的,也就是普通会员也能从api的100条用这个脚本快速获取10000条数据
如果是普通用户,只能获取50个数据,还有一个思路就是使用日期
语法: app="phpStudy探针" && before="2021-05-25"
这样获取到的就是5-25号之前的,更改日期 24号之前 23号之前,4 月 3月 然后添加到列表的时候记得去重,这个就不先折腾了
异步的
import requests
import base64
import time
from concurrent.futures import ThreadPoolExecutor
import aiohttp
import asyncio
from functools import wraps
from asyncio.proactor_events import _ProactorBasePipeTransport
def silence_event_loop_closed(func):
@wraps(func)
def wrapper(self, *args, **kwargs):
try:
return func(self, *args, **kwargs)
except RuntimeError as e:
if str(e) != 'Event loop is closed':
raise
return wrapper
_ProactorBasePipeTransport.__del__ = silence_event_loop_closed(_ProactorBasePipeTransport.__del__)
ip_list=[]
# 下面第9行的authorization 需要自己登陆后手动获取
async def download(page):
url='https://api.fofa.so/v1/search?'
headers={
'authorization': 'eyJhbGciOiJIUzUxMiIsImtpZCttIk5XWTVZakF4TVRkalltSTJNRFZsWXpRM05EWXdaakF3TURVMlkyWTNZemd3TUdRd1pUTmpZUT09IiwidHlwIjoiSldUIn0.eyJpZCI6MTAzNTU2LCJtaWQiOjEwMDA2MjkyNSwidXNlcm5hbWUiOiJiZ2JpbmdzZWMiLCJleHAiOjE2MjE5ODA3NzQuNDc5NTg0LCJpc3MiOiJyZWZyZXNoIn0.txor3e8ydq4PlhuSXXVuyBMtBoPgA1aDfqVBQlj0U2hsfmPx9DojqIYMunjk5BwKjmOTQddMM7WBj5kdHNwBug'
}
print(f'正在爬取第{page}页')
paramaa={
'q': 'app="phpStudy探针"',
'qbase64': qbase64,
'full': 'false',
'pn': page,
'ps': '10'
}
async with aiohttp.ClientSession() as s:
async with s.get(url=url,headers=headers,params=paramaa) as resp:
r=await resp.json()
for k in range(10):
ip_list.append(r['data']['assets'][k]['id'])
print(f'第{page}页,爬取完成')
async def main():
task=[download(i) for i in range(1,101)]
await asyncio.wait(task)
if __name__ == '__main__':
qbase64 = base64.b64encode(bytes(input("输入获取的:").encode('utf-8'))).decode('utf-8')
star=time.time()
asyncio.run(main())
print(ip_list)
print(len(ip_list))
print(time.time()-star)
with open('./fofa.txt', 'w', encoding='utf-8') as f:
for i in ip_list:
f.write(i + '\n')
多线程的
import requests
import base64
import time
from concurrent.futures import ThreadPoolExecutor
ip_list=[]
# 下面第9行的authorization 需要自己登陆后手动获取
def get_one_page(page,qbase64):
url='https://api.fofa.so/v1/search?'
headers={
'authorization': 'eyJhbGciOiJIUzUxMiIsImtpZCnsIk5XWTVZakF4TVRkalltSTJNRFZsWXpRM05EWXdaakF3TURVMlkyWTNZemd3TUdRd1pUTmpZUT09IiwidHlwIjoiSldUIn0.eyJpZCI6MTAzNTU2LCJtaWQiOjEwMDA2MjkyNSwidXNlcm5hbWUiOiJiZ2JpbmdzZWMiLCJleHAiOjE2MjE5ODA3NzQuNDc5NTg0LCJpc3MiOiJyZWZyZXNoIn0.txor3e8ydq4PlhuSXXVuyBMtBoPgA1aDfqVBQlj0U2hsfmPx9DojqIYMunjk5BwKjmOTQddMM7WBj5kdHNwBug'
}
print(f'正在爬取第{page}页')
params={
'q': 'app="phpStudy探针"',
'qbase64': qbase64,
'full': 'false',
'pn': page,
'ps': '10'
}
r=requests.get(url,headers=headers,params=params).json()
#ip_list.append([r['data']['assets'][i]['id'] for i in range(10)])
for k in range(10):
ip_list.append(r['data']['assets'][k]['id'])
print(f'第{page}页,爬取完成')
def main():
qbase64 = base64.b64encode(bytes(input("输入获取的:").encode('utf-8')))
with ThreadPoolExecutor(50) as f:
for i in range(1,101): #
f.submit(get_one_page,page=i,qbase64=qbase64)
if __name__ == '__main__':
star=time.time()
main()
print(ip_list)
print(len(ip_list))
print(time.time()-star)
with open('./fofa.txt', 'w', encoding='utf-8') as f:
for i in ip_list:
f.write(i + '\n')
今天写脚本遇到的坑,代码没问题,
就是会报RuntimeError: Event loop is closed,虽然也能正常爬取完并保存下来,但是结尾一串红字看着总不舒服
解决办法来源 https://blog.csdn.net/L_W_D_/article/details/113684744