java解析html

import java.io.File;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

public class ParserHTML {

	public static void main(String[] args) {
		String filePath = "d:\\xx.html";
		parseHTML(filePath);
	}

	private static void parseHTML(String filePath) {
		try {
			Document document = getDocumentInstance(filePath);
			Element root = document.getDocumentElement();
			NodeList nodeList = root.getElementsByTagName("form").item(0)
					.getChildNodes();
			for (int i = 0; i < nodeList.getLength(); i++) {
				Node subNode = nodeList.item(i);
				if (subNode.getNodeType() == Node.ELEMENT_NODE) {
					Element eNode = (Element) subNode;
					if (eNode.getTagName().equals("input")) {
						String name = subNode.getAttributes().getNamedItem(
								"name").getNodeValue().toString();
						String value = subNode.getAttributes().getNamedItem(
								"value").getNodeValue().toString();
						System.out.println("name=" + name + "  value=" + value);
					}
				}
			}
		} catch (Exception e) {
			System.out.println("errMsg: " + e.getMessage());
		}
	}

	private static Document getDocumentInstance(String filePath)
			throws Exception {
		DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
		DocumentBuilder db = factory.newDocumentBuilder();
		return db.parse(new File(filePath));
	}
}

 

你可能感兴趣的:(java,html,xml)