HTML 标签闭合性检测

用于校验HTML标签是否合法,是否闭合, 使用的是HtmlParser开源包


package com.lhb.client.util;

import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;

import org.apache.commons.lang.StringUtils;
import org.htmlparser.Node;
import org.htmlparser.Parser;
import org.htmlparser.PrototypicalNodeFactory;
import org.htmlparser.nodes.RemarkNode;
import org.htmlparser.nodes.TagNode;
import org.htmlparser.tags.CompositeTag;
import org.htmlparser.util.NodeIterator;


public class ValidateHTML {

private final String CONTENT;
private Parser parser;

public ValidateHTML(String content) {
CONTENT = content;
parser = Parser.createParser(content, "GBK");
}

private static String getContent() {
byte[] con = null;
InputStream in = ValidateHTML.class.getResourceAsStream("content.txt");
try {
int length = in.available();
con = new byte[length];
in.read(con, 0, length);
} catch (IOException e) {
e.printStackTrace();
}

try {
return new String(con, "GBK");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
return "";
}
}

private void recusive(NodeIterator iterator) throws Exception {
while (iterator.hasMoreNodes()) {
Node node = iterator.nextNode();

if (node instanceof TagNode) {
TagNode tagNode = (TagNode)node;

if (!isClosed(tagNode)) {
throw new Exception("发现不完整的错误标签");
}

if (moreTag(tagNode)) {
throw new Exception("发现多余的结束标签");
}

TagNode endTagNode = (TagNode) tagNode.getEndTag();
if (endTagNode == null) {
continue;
}

if (isIgnored(endTagNode)) {
throw new Exception("发现没有闭合的标签");
}
} else if (node instanceof RemarkNode) {
RemarkNode remarkNode = (RemarkNode)node;
if (!remarkNode.toHtml().endsWith("-->")) {
throw new Exception("发现没有闭合的注释标签");
}
}

if (node.getChildren() == null) {
continue;
}
recusive(node.getChildren().elements());
}
}

private boolean isIgnored(TagNode tagNode) {
String tagName = tagNode.getTagName();
int position = tagNode.getTagBegin();
int length = tagName.length() + 3;
String subString = StringUtils.substring(CONTENT, position, position + length);
if (subString == null) {
return true;
}

return !subString.equalsIgnoreCase("");
}


private boolean moreTag(TagNode tagNode) {
return tagNode.toHtml().startsWith(" }


//检测不完整的标签或错误标签,例如

你可能感兴趣的:(技术交流)