package com.ri;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;
import org.apache.commons.io.IOUtils;
import org.springframework.util.ObjectUtils;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.xmlunit.builder.DiffBuilder;
import org.xmlunit.builder.Input;
import org.xmlunit.diff.Comparison;
import org.xmlunit.diff.ComparisonResult;
import org.xmlunit.diff.ComparisonType;
import org.xmlunit.diff.DefaultNodeMatcher;
import org.xmlunit.diff.Diff;
import org.xmlunit.diff.Difference;
import org.xmlunit.diff.DifferenceEvaluator;
import org.xmlunit.diff.ElementSelectors;
public class HtmlDiffExample {
public static void main(String[] args) {
// =====getControlDetails====== CHILD_NODELIST_LENGTH
// =====getControlDetails====== CHILD_LOOKUP
// =====getTestDetails====== CHILD_LOOKUP
// =====getTestDetails====== CHILD_LOOKUP
//由此可见xmlunit对差异会生成响应的类型,但是由于结构和长度标签不同差异getControlDetails或者getTestDetails会为空所以分别判断赋值。也有同时它俩都不为空
String html1 = "Q
";
//html1=Jsoup.parse(html1).body().html();
//html1=Jsoup.parseBodyFragment(html1).html();
String html2 = "Q
Q2
Q
";
//html2=Jsoup.parse(html2).body().html();
//html2=Jsoup.parseBodyFragment(html2).html();
// 创建一个记录并标记差异的DifferenceEvaluator
List<Object> differencesList = new ArrayList<>();
DifferenceEvaluator evaluator = new DifferenceEvaluator() {
@Override
public ComparisonResult evaluate(Comparison comparison, ComparisonResult originalResult) {
differencesList.add(comparison);
return originalResult;
}
};
// 使用builders创建DOM对象,并忽略空白字符差异
Diff myDiff = DiffBuilder.compare(Input.fromString(html1))
.withTest(Input.fromString(html2))
.ignoreWhitespace()
//.withNodeMatcher(new DefaultNodeMatcher(ElementSelectors.byName))// 对于元素匹配,我们只关心名称相同的元素(这将忽略属性顺序等差异)
.withNodeMatcher(new DefaultNodeMatcher(ElementSelectors.byNameAndText))
.withDifferenceEvaluator(evaluator)
.build();
if (myDiff.hasDifferences()) {
Document doc1AsDom;
Document doc2AsDom;
try {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(false);
doc1AsDom = factory.newDocumentBuilder().parse(IOUtils.toInputStream(html1));
doc2AsDom = factory.newDocumentBuilder().parse(IOUtils.toInputStream(html2));
XPath xpath = XPathFactory.newInstance().newXPath();
Iterable<Difference> differences = myDiff.getDifferences();
for (Difference difference : differences) {
ComparisonType comparisonType = difference.getComparison().getType();
if (difference.getComparison()!= null&&difference.getComparison().getControlDetails()!= null&&!ObjectUtils.isEmpty(difference.getComparison().getControlDetails().getXPath())) {
System.out.println("=====getControlDetails======"+"\t"+comparisonType);
Node controlNode = (Node) xpath.evaluate(difference.getComparison().getControlDetails().getXPath(), doc1AsDom, XPathConstants.NODE);
// 设置属性以标记差异
Attr attr = null;
switch (comparisonType) {
case CHILD_LOOKUP:
//if (controlNode.getNodeType() != Node.TEXT_NODE) {
//attr=doc1AsDom.createAttribute("data-diff");
attr=controlNode.getOwnerDocument().createAttribute("data-diff");
attr.setValue("deleted");
controlNode.getAttributes().setNamedItem(attr);
//}
break;
case CHILD_NODELIST_LENGTH:
break;
case TEXT_VALUE:
Node parentNode = controlNode.getParentNode();
attr=parentNode.getOwnerDocument().createAttribute("data-diff");
attr.setValue("deleted");
parentNode.getAttributes().setNamedItem(attr);
break;
}
}else if(difference.getComparison()!= null&&difference.getComparison().getTestDetails()!= null&&!ObjectUtils.isEmpty(difference.getComparison().getTestDetails().getXPath())) {
System.out.println("=====getTestDetails======"+"\t"+comparisonType);
switch (comparisonType) {
case CHILD_LOOKUP:
Node testNode = (Node) xpath.evaluate(difference.getComparison().getTestDetails().getXPath(), doc2AsDom, XPathConstants.NODE);
//Attr attr = doc2AsDom.createAttribute("data-diff");
Attr attr = testNode.getOwnerDocument().createAttribute("data-diff");
attr.setValue("update");
testNode.getAttributes().setNamedItem(attr);
break;
}
}
}
// 输出整个带有标记的HTML内容
StringWriter writer1 = new StringWriter();
Transformer transformer1 = TransformerFactory.newInstance().newTransformer();
transformer1.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
transformer1.transform(new DOMSource(doc1AsDom), new StreamResult(writer1));
String modifiedHtml1 = writer1.toString();
StringWriter writer2 = new StringWriter();
Transformer transformer2 = TransformerFactory.newInstance().newTransformer();
transformer2.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
transformer2.transform(new DOMSource(doc2AsDom), new StreamResult(writer2));
String modifiedHtml2 = writer2.toString();
System.out.println("Original HTML with marked differences: " + modifiedHtml1);
System.out.println("Modified HTML with marked differences: " + modifiedHtml2);
} catch (Exception e) {
throw new RuntimeException(e);
}
} else {
System.out.println("Documents are similar.");
}
}
}
=====getControlDetails====== CHILD_NODELIST_LENGTH
=====getTestDetails====== CHILD_LOOKUP
=====getTestDetails====== CHILD_LOOKUP
Original HTML with marked differences: <html>
<body>
<p>Q</p>
</body>
</html>
Modified HTML with marked differences: <html>
<body>
<p>Q</p>
<h1 data-diff="update">Q2</h1>
<h1 data-diff="update">Q</h1>
</body>
</html>