fofa爬取ip

from bs4 import BeautifulSoup
import requests
from time import sleep
import re
headers = {
     
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36',
    'cookie': 'Hm_lvt_9490413c5eebdadf757c2be2c816aedf=1615862900,1617079059,1617094775,1617155303; befor_router=%2F; refresh_token=eyJhbGciOiJIUzUxMiIsImtpZCI6Ik5XWTVZakF4TVRkalltSTJNRFZsWXpRM05EWXdaakF3TURVMlkyWTNZemd3TUdRd1pUTmpZUT09IiwidHlwIjoiSldUIn0.eyJpZCI6NTI3NTIsIm1pZCI6MTAwMDM1MTM0LCJ1c2VybmFtZSI6Iueul-WRveWFiOeUnyIsImV4cCI6MTYyMDQ1NTIyMywiaXNzIjoicmVmcmVzaCJ9.ZYWH7dIHc0Y24SiW1VVyUMB0gtycZ4phARHhfwDfiEMnvATm3Cpz0O8AxQ3h4qQ3Fas_g1C0PLH76hFZBXAS3Q; fofa_token=eyJhbGciOiJIUzUxMiIsImtpZCI6Ik5XWTVZakF4TVRkalltSTJNRFZsWXpRM05EWXdaakF3TURVMlkyWTNZemd3TUdRd1pUTmpZUT09IiwidHlwIjoiSldUIn0.eyJpZCI6NTI3NTIsIm1pZCI6MTAwMDM1MTM0LCJ1c2VybmFtZSI6Iueul-WRveWFiOeUnyIsImV4cCI6MTYyMDMwODI0NS44OTE4NjcsImlzcyI6InJlZnJlc2gifQ.kLiBXlPSagIAj8Cnxjol1aMAUAlrYFpuOjWXVfGCf_KR83gU_l57P7xLWuT1vmyZkNFRzFLqdUrkzAFqBtmrhQ; Hm_lvt_b5514a35664fd4ac6a893a1e56956c97=1620209412,1620209827,1620228731,1620264999; user=%7B%22id%22%3A52752%2C%22mid%22%3A100035134%2C%22is_admin%22%3Afalse%2C%22username%22%3A%22%E7%AE%97%E5%91%BD%E5%85%88%E7%94%9F%22%2C%22nickname%22%3A%22%22%2C%22email%22%3A%22fortune_teller%40foxmail.com%22%2C%22avatar_medium%22%3A%22https%3A%2F%2Fnosec.org%2Fmissing.jpg%22%2C%22avatar_thumb%22%3A%22https%3A%2F%2Fnosec.org%2Fmissing.jpg%22%2C%22key%22%3A%22%22%2C%22rank_name%22%3A%22%E9%AB%98%E7%BA%A7%E4%BC%9A%E5%91%98%22%2C%22rank_level%22%3A2%2C%22company_name%22%3A%22%22%2C%22coins%22%3A0%2C%22credits%22%3A14695%2C%22expiration%22%3A%22-%22%2C%22login_at%22%3A0%7D; Hm_lpvt_b5514a35664fd4ac6a893a1e56956c97=1620265424',
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9'
}
def file_put(str):
    with open("ip.txt","a") as f:
        f.write(str)
def spider_link(url):
    comment = requests.get(url,headers=headers)
    soup = BeautifulSoup(comment.text,'lxml')
    r = soup.find_all('a',attrs={
     'target':'_blank'})
    #ip爬取
    a = re.compile(r'target="_blank">(.*)')
    for item in r:
        item = str(item)
        if re.findall(a,item):
            ip = re.findall(a, item)[0]
            sleep(2)
            print(ip)
            file_put(ip + "\n")
    #超链接爬取
    # for eachone in r:
    #     link = eachone['href']
    #     print(link)
    #     sleep(1)
    #     file_put(link + "\n")
for i in range(50):
    spider_link("https://fofa.so/result?qbase64=Y291bnRyeT0iVVMi&page="+ str(i) +"&page_size=10")

fofa爬取ip_第1张图片
fofa爬取ip_第2张图片
可以成功写入txt文件中,只需要更改cookie即可,这里有个问题就是页数多会丢失数据,之后再修改吧,先做个记录。

你可能感兴趣的:(爬虫)