xmlunit比较HTML文档差异

package com.ri;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.List;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;
import org.apache.commons.io.IOUtils;
import org.springframework.util.ObjectUtils;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.xmlunit.builder.DiffBuilder;
import org.xmlunit.builder.Input;
import org.xmlunit.diff.Comparison;
import org.xmlunit.diff.ComparisonResult;
import org.xmlunit.diff.ComparisonType;
import org.xmlunit.diff.DefaultNodeMatcher;
import org.xmlunit.diff.Diff;
import org.xmlunit.diff.Difference;
import org.xmlunit.diff.DifferenceEvaluator;
import org.xmlunit.diff.ElementSelectors;

public class HtmlDiffExample {

    public static void main(String[] args) {
//    	=====getControlDetails======	CHILD_NODELIST_LENGTH
//		=====getControlDetails======	CHILD_LOOKUP
//		=====getTestDetails======	CHILD_LOOKUP
//		=====getTestDetails======	CHILD_LOOKUP
    	//由此可见xmlunit对差异会生成响应的类型,但是由于结构和长度标签不同差异getControlDetails或者getTestDetails会为空所以分别判断赋值。也有同时它俩都不为空

        String html1 = "

Q

"
; //html1=Jsoup.parse(html1).body().html(); //html1=Jsoup.parseBodyFragment(html1).html(); String html2 = "

Q

Q2

Q

"
; //html2=Jsoup.parse(html2).body().html(); //html2=Jsoup.parseBodyFragment(html2).html(); // 创建一个记录并标记差异的DifferenceEvaluator List<Object> differencesList = new ArrayList<>(); DifferenceEvaluator evaluator = new DifferenceEvaluator() { @Override public ComparisonResult evaluate(Comparison comparison, ComparisonResult originalResult) { differencesList.add(comparison); return originalResult; } }; // 使用builders创建DOM对象,并忽略空白字符差异 Diff myDiff = DiffBuilder.compare(Input.fromString(html1)) .withTest(Input.fromString(html2)) .ignoreWhitespace() //.withNodeMatcher(new DefaultNodeMatcher(ElementSelectors.byName))// 对于元素匹配,我们只关心名称相同的元素(这将忽略属性顺序等差异) .withNodeMatcher(new DefaultNodeMatcher(ElementSelectors.byNameAndText)) .withDifferenceEvaluator(evaluator) .build(); if (myDiff.hasDifferences()) { Document doc1AsDom; Document doc2AsDom; try { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setNamespaceAware(false); doc1AsDom = factory.newDocumentBuilder().parse(IOUtils.toInputStream(html1)); doc2AsDom = factory.newDocumentBuilder().parse(IOUtils.toInputStream(html2)); XPath xpath = XPathFactory.newInstance().newXPath(); Iterable<Difference> differences = myDiff.getDifferences(); for (Difference difference : differences) { ComparisonType comparisonType = difference.getComparison().getType(); if (difference.getComparison()!= null&&difference.getComparison().getControlDetails()!= null&&!ObjectUtils.isEmpty(difference.getComparison().getControlDetails().getXPath())) { System.out.println("=====getControlDetails======"+"\t"+comparisonType); Node controlNode = (Node) xpath.evaluate(difference.getComparison().getControlDetails().getXPath(), doc1AsDom, XPathConstants.NODE); // 设置属性以标记差异 Attr attr = null; switch (comparisonType) { case CHILD_LOOKUP: //if (controlNode.getNodeType() != Node.TEXT_NODE) { //attr=doc1AsDom.createAttribute("data-diff"); attr=controlNode.getOwnerDocument().createAttribute("data-diff"); attr.setValue("deleted"); controlNode.getAttributes().setNamedItem(attr); //} break; case CHILD_NODELIST_LENGTH: break; case TEXT_VALUE: Node parentNode = controlNode.getParentNode(); attr=parentNode.getOwnerDocument().createAttribute("data-diff"); attr.setValue("deleted"); parentNode.getAttributes().setNamedItem(attr); break; } }else if(difference.getComparison()!= null&&difference.getComparison().getTestDetails()!= null&&!ObjectUtils.isEmpty(difference.getComparison().getTestDetails().getXPath())) { System.out.println("=====getTestDetails======"+"\t"+comparisonType); switch (comparisonType) { case CHILD_LOOKUP: Node testNode = (Node) xpath.evaluate(difference.getComparison().getTestDetails().getXPath(), doc2AsDom, XPathConstants.NODE); //Attr attr = doc2AsDom.createAttribute("data-diff"); Attr attr = testNode.getOwnerDocument().createAttribute("data-diff"); attr.setValue("update"); testNode.getAttributes().setNamedItem(attr); break; } } } // 输出整个带有标记的HTML内容 StringWriter writer1 = new StringWriter(); Transformer transformer1 = TransformerFactory.newInstance().newTransformer(); transformer1.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); transformer1.transform(new DOMSource(doc1AsDom), new StreamResult(writer1)); String modifiedHtml1 = writer1.toString(); StringWriter writer2 = new StringWriter(); Transformer transformer2 = TransformerFactory.newInstance().newTransformer(); transformer2.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); transformer2.transform(new DOMSource(doc2AsDom), new StreamResult(writer2)); String modifiedHtml2 = writer2.toString(); System.out.println("Original HTML with marked differences: " + modifiedHtml1); System.out.println("Modified HTML with marked differences: " + modifiedHtml2); } catch (Exception e) { throw new RuntimeException(e); } } else { System.out.println("Documents are similar."); } } }
=====getControlDetails======	CHILD_NODELIST_LENGTH
=====getTestDetails======	CHILD_LOOKUP
=====getTestDetails======	CHILD_LOOKUP
Original HTML with marked differences: <html>
<body>
<p>Q</p>
</body>
</html>

Modified HTML with marked differences: <html>
<body>
<p>Q</p>
<h1 data-diff="update">Q2</h1>
<h1 data-diff="update">Q</h1>
</body>
</html>


你可能感兴趣的:(java进阶综合提升,java)