用途:个人学习笔记,有所借鉴,欢迎指正
主要包含对requests库和Web爬虫解析库的使用,python爬虫自动化,批量信息收集
Python开发工具:PyCharm 2022.1 激活破解码_安装教程 (2022年8月25日更新~)-小白学堂
import requests,time
from bs4 import BeautifulSoup
#
# 1
#
# 上海交通大学
#
# 3994
# 10523
#
def get_eduName():
for i in range(1,209):
url = 'https://src.sjtu.edu.cn/rank/firm/0/?page=%s'%str(i)
try:
s=requests.get(url).text
print('------->正在获取第%s页面数据'%str(i))
soup = BeautifulSoup(s, 'lxml')
edu1=soup.find_all('tr')
for edu in edu1:
edu_name=edu.a.string
print(edu_name)
with open('eduname.txt','a+',encoding='utf-8') as f:
f.write(edu_name+'\n')
f.close()
except Exception as e:
time.sleep(1)
pass
if __name__ == '__main__':
get_eduName()
import requests
from bs4 import BeautifulSoup
header={
#登录fofa,浏览器查看数据包中的登录凭证fofa_token
'cookie':'fofa_token=eyJhbGciOiJIUzUxMiIsImtpZCI6Ik5XWTVZakF4TVRkalltSTJNRFZsWXpRM05EWXdaakF3TURVMlkyWTNZemd3TUdRd1pUTmpZUT09IiwidHlwIjoiSldUIn0.eyJpZCI6MjgyNzMsIm1pZCI6MTAwMDIxOTg4LCJ1c2VybmFtZSI6InhpYW9kaXNlYyIsImV4cCI6MTY3MTI4MjUzOH0.0ukMGFIrIvzDOzpUl9JglOoMpzbIPCczGRDeqKdmFYHfStd2jdwc6LGby3Ke0UR2suvErzhOTPYL2ACe4Goi8Q; '
}
url='https://fofa.info/result?qbase64=dGl0bGU9IuS4iua1t%2BS6pOmAmuWkp%2BWtpiIgJiYgY291bnRyeT0iQ04i'
s=requests.get(url,headers=header).text
soup = BeautifulSoup(s, 'lxml')
#获取页数
edu1=soup.find_all('p',attrs={'class': 'hsxa-nav-font-size'})
for edu in edu1:
edu_name = edu.span.get_text()
i=int(edu_name)/10
yeshu=int(i)+1
print(yeshu)
for ye in range(1,yeshu+1):
url = 'https://fofa.info/result?qbase64=dGl0bGU9IuS4iua1t%2BS6pOmAmuWkp%2BWtpiIgJiYgY291bnRyeT0iQ04i&page='+str(ye)+'&page_size=10'
print(url)
s = requests.get(url, headers=header).text
edu1=soup.find_all('span',attrs={'class': 'hsxa-host'})
for edu in edu1:
edu_name = edu.a.get_text().strip()
print(edu_name)
Python代码实现:
import requests
import base64
#https://fofa.info/api/v1/search/all?email=your_email&key=your_key&qbase64=dGl0bGU9ImJpbmci
def get_fofa_data(email,apikey):
for eduname in open('eduname.txt',encoding='utf-8'):
e=eduname.strip()
search='"%s" && country="CN" && title=="Error 404--Not Found"'%e
b=base64.b64encode(search.encode('utf-8'))
b=b.decode('utf-8')
url='https://fofa.info/api/v1/search/all?email=%s&key=%s&qbase64=%s'%(email,apikey,b)
s=requests.get(url).json()
print('查询->'+eduname)
print(url)
if s['size'] != 0:
print(eduname+'有数据啦!')
for ip in s['results']:
print(ip[0])
else:
print('没有数据')
if __name__ == '__main__':
email='[email protected]' #自己fofa账号
apikey='0fccc926c6d0c4922cbdc620659b9a42' #fofa个人中心的apikey
get_fofa_data(email,apikey)