java解析html

import java.io.File;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

public class ParserHTML {

public static void main(String[] args){
String filePath = "d:\\xx.html";
parseHTML(filePath);
}

private static void parseHTML(String filePath) {
try{
Document document = getDocumentInstance(filePath);
Element root = document.getDocumentElement();
NodeList nodeList = root.getElementsByTagName("form").item(0).getChildNodes();
for(int i=0;i<nodeList.getLength();i++){
Node subNode = nodeList.item(i);
if(subNode.getNodeType() == Node.ELEMENT_NODE){
Element eNode = (Element)subNode;
if(eNode.getTagName().equals("input")){
String name = subNode.getAttributes().getNamedItem("name").getNodeValue().toString();
String value = subNode.getAttributes().getNamedItem("value").getNodeValue().toString();
System.out.println("name=" + name + "  value=" + value);
}
}
}
}catch(Exception e){
System.out.println("errMsg: " + e.getMessage());
}
}

private static Document getDocumentInstance(String filePath)throws Exception{
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder db = factory.newDocumentBuilder();
return db.parse(new File(filePath));
}

java解析html表单以获取input值,源码已经过测试。备以后用。

你可能感兴趣的:(java,html,xml)