如何使用Java提取html表单元素

    最近要用到Java提取html表单元素,学习使用htmlparser提取表单元素的一些简单常用的方法,在此总结一下!

第一步:读取指定的html文件

public static String ReadFile(String filepaths) throws IOException, ParserException
{
InputStreamReader istrem=null;
File file=new File(filepaths);
String readStr="";
try {
   istrem=new InputStreamReader(new FileInputStream(file),"unicode");
   BufferedReader iread=new BufferedReader(istrem);
   while(null!=(readStr=iread.readLine()))
   {
//      System.out.println(readStr);
    filepaths+=readStr.toString();
   }
} catch (FileNotFoundException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
}
return filepaths;
}

第二步:下载辅助工具包htmllexer.jar   htmlparser.jar导入项目

第三步:定义类要获取的信息

eg:public class InputT {

private String id;
    private String name;
    private String onkeydown;
    private String poppding;

//将其属性封装

}

第四步:将其存入map中

public class MapCollection {

public String id;
private InputT input;
Map<String,InputT> IMap=new HashMap<String, InputT>();
//以次将要用的的类封装到map中以便以后调用

public Map<String, InputT> getIMap() {
IMap.put(id,input);
return IMap;
}
public void setIMap(Map<String, InputT> iMap) {
IMap = iMap;
}


}
}


第五步:获取想要的信息

package com.text;

import java.util.HashMap;
import java.util.Map;
import org.htmlparser.NodeFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.Div;
import org.htmlparser.tags.ImageTag;
import org.htmlparser.tags.InputTag;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.tags.SelectTag;
import org.htmlparser.tags.TextareaTag;
import org.htmlparser.util.NodeList;

public class SplitToHtml {

MapCollection mc=new MapCollection();
    Map<String,Button> map= mc.getMap();   
Map<String,SelectT> Smap=new HashMap<String,SelectT>();
    Map<String,RiliT> Rmap=mc.getRmap();
    Map<String,InputT> input=mc.getIMap();
    Map<String,Inputdown> inputDw=mc.getIDmap();
    Map<String,Image> ImgMap=mc.getImgMap();
    Map<String,TextAreaT> TAMap=mc.getTAMap();
    Map<String,Divs> divMap=mc.getDivMap();
    Map<String,Alink> AMap=mc.getAMap();
   
    public void getSpitInput(String cssandtitle) //分隔文本框
    {
NodeFilter filter=new TagNameFilter("input"); //设置过滤器
        NodeList nodelt=Parsers.getParsers(cssandtitle).extractAllNodesThatMatch(filter,true); //节点列表中的匹配filter的节点
        System.out.println("匹配节点个数:"+nodelt.size());
        InputTag link=null;
        for(int i=0;i<nodelt.size();i++)
        {
         link=(InputTag) nodelt.elementAt(i);
         String id=link.getAttribute("id");             //获取熟悉值
      String onclick = link.getAttribute("onclick");      //获取熟悉值
   String ponding = link.getAttribute("PropBindings");     //获取熟悉值
   String name = link.getAttribute("name");               //获取熟悉值
   String onkeydown = link.getAttribute("onkeydown");      //获取熟悉值
   String type = link.getAttribute("type");                //获取熟悉值
   String expression=link.getAttribute("expression");      //获取表达式值
            if(onclick!=null&&ponding==null&&expression!=null) //判断按钮
            {
             Button bt=new Button();
             bt.setExpression(expression);
                bt.setId(id);
             bt.setName(name);
             bt.setOnclick(onclick);
                String ids=bt.getId();
               link.setAttribute("id","<<<<<<<<<<<<<<<<<<"); //修改文本框id的值为"<<<<<<<<<<<<<<<<<<"
               link.setAttribute("onclick", "__________________"); //修改文本框的onclick事件为__________________
//                String str=nodelt.toHtml();
//                System.out.println("???????????????????????");
//                System.out.println(str);
//                System.out.println("???????????????????????");
             map.put(ids,bt);
            }
          if(onclick!=null&&ponding!=null){ //判断日历控件
                RiliT rt=new RiliT();
                rt.setId(id);
                   rt.setName(name);
                   rt.setOnclick(onclick);
                   String key=rt.getId();
                   Rmap.put(key,rt);
             }
           
            if(ponding!=null&&onkeydown==null&&onclick==null){//判断文本框
                InputT t = new InputT();
                   t.setId(id);
                   t.setName(name);
                   t.setPoppding(ponding);
                   String key=t.getId();
                   input.put(key, t);
            }
               if(ponding!=null&&onkeydown!=null){ //带回车事件的文本框
               
                Inputdown ind=new Inputdown();
                ind.setId(id);
                ind.setName(name);
                ind.setOnkeydown(onkeydown);
                String key=ind.getId();
                inputDw.put(key,ind);
               }
        }
    }
  

第六步:调用此方法

public static void main(String[] args) throws IOException, ParserException {

       String filepaths = "http://www.baidu.com/index.html";


        String cssandtitle=ReadFile(filepaths);
        SplitToHtml sph=new SplitToHtml();
       sph.getSplitToSelect(cssandtitle);
        }

你可能感兴趣的:(java,html)