绿盟扫描报告信息提取

1、下载第三方扩展库

 filterbuilder.jar
 htmllexer.jar
 htmlparser.jar
 jsoup-1.9.2.jar
 junit.jar
 sax2.jar
 thumbelina.jar


2、执行以下方法

package com.zgs.look;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.filters.OrFilter;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.tags.TableTag;
import org.htmlparser.util.NodeList;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class HtmlLook {
	private static String ENCODE = "UTF-8";
	
	public static void main(String[] args) {
		String szContent = openFile( "d:/index.html");
		try {
			Document doc = Jsoup.parse(szContent);
			Elements elList=doc.getElementsByAttributeValue("id","vulDataTable");
			szContent=elList.outerHtml();
			Parser parser = Parser.createParser(szContent, ENCODE);
			NodeFilter[]  filters = new NodeFilter[2];
	        filters[0] = new NodeClassFilter(TableTag.class); 
	        filters[1] = new NodeClassFilter(LinkTag.class);
			NodeFilter filter =new OrFilter (filters);
	        NodeList list = parser.extractAllNodesThatMatch(filter);
	        String ldName="";String ldJianjie="";
	        for (int i = 0; i < list.size(); i++) {    
	        	Node node =  list.elementAt(i);  
	        	if(node instanceof LinkTag){
	        		String nodeHtml=node.toHtml();
	        		if(nodeHtml.contains("onclick")&&nodeHtml.contains("vul-")){
	        			if(!"".equals(ldName)&&!"".equals(ldJianjie)){
	        				//提交数据
	        				System.out.println("---commit---漏洞名称-------"+ldName);
		        			System.out.println("---commit---漏洞简介-------"+ldJianjie);
		        			ldName="";ldJianjie="";
	        			}
	        			String level="";
	        			if(nodeHtml.contains("vul-vh")){
	        				level="高危漏洞";
	        			}else if(nodeHtml.contains("vul-vm")){
	        				level="中危漏洞";
	        			}else if(nodeHtml.contains("vul-vl")){
	        				level="低危漏洞";
	        			}
	        			ldName=getLinkTagContent(nodeHtml)+"-----"+level+"------";
//	        			System.out.println("---漏洞名称-----"+getLinkTagContent(nodeHtml)+"-----"+level+"------");
	        		}
	        	}else{
	        		ldJianjie=getTableTagContent(node.toHtml());
	        	}
	        }  
	        
	        
		} catch (Exception e) {
            e.printStackTrace();
		}
	}
    /**
     * 提取文件里面的文本信息
     * @param szFileName
     * @return
     */
	public static String openFile(String szFileName) {
		try {
			BufferedReader bis = new BufferedReader(new InputStreamReader(new FileInputStream(new File(szFileName)), ENCODE));
			String szContent = "";
			String szTemp;

			while ((szTemp = bis.readLine()) != null) {
				szContent += szTemp + "\n";
			}
			bis.close();
			return szContent;
		} catch (Exception e) {
			return "";
		}
	}
	/**
	 * 提取标签a内的内容
	 * return a;
	 */
	public static String getLinkTagContent(String link){
		String content="";
		Pattern pattern = Pattern.compile("]*>(.*?)");
		Matcher matcher = pattern.matcher(link);
		if(matcher.find()){
			content=matcher.group(1);
		}
		return content;
	}
	/**
	 * 解析Table标签内的东西
	 * @param table
	 */
	public static String getTableTagContent(String table){
		Map conMap=new HashMap();
		String content="";
		Document doc = Jsoup.parse(table);
		Elements elList=doc.getElementsByAttributeValue("class","cmn_table plumb");
		Element el=elList.first();
		Elements trLists = el.select("tr");
		for (int i = 0; i < trLists.size(); i++) {
             Elements tds = trLists.get(i).select("td");
             String key="";String val="";
             for (int j = 0; j < tds.size(); j++) {
                     String text = tds.get(j).text();
                     if(j==0){
                    	 key=text; 
                     }else{
                    	 val=text; 
                     }
             }
             conMap.put(key, val);
             content+="|"+key+"-"+val;
//             System.out.println(key+"-"+val);
		}
		return content;
	}
}


你可能感兴趣的:(Web安全)