XML 处理实践
一: DOM解析
1. 把XML文挡以String读出
File docFile = new File( fileName);
//FileUtils. commons-io-1.4.jar
String result = FileUtils.readFileToString(new File(fileName), UTF_8);
2. 把String写出到XML文挡
File dest = new File (fileName);
String contens = contents;
FileUtils.writeStringToFile(dest, contents, UTF_8);
3. String to org.w3c.dom.Document
//DocumentBuilderFactory. xml-apis.jar
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
String nodeData = xml content;
Document result = builder.parse(new InputSource(new StringReader(nodeData)));
4. String to org.apache.xerces.dom.DocumentImpl
//DocumentImpl. xercesImpl.jar
DocumentImpl result = null;
//DOMParser. xercesImpl.jar
DOMParser parser = new DOMParser();
//xml-apis.jar
DefaultHandler defaultHandler = new DefaultHandler();
parser.setErrorHandler(defaultHandler);
InputSource is = new InputSource(new StringReader(nodeData));
parser.parse(is);
result = (DocumentImpl) parser.getDocument();
5. 通过父节点, 返回一个 (所有)的子节点
/**Given any parent element and the tag name of a child element, this returns
the first child element. If there is multiple child elements, this method
only retrieves the first. It will also create a child element if it does
not exist and add it to the parent element if the <code>addChildIfNotPresent</code>
flag is set to true.
*/
public static Element findChildForParent(Element parentElement, String childTagName, boolean addChildIfNotPresent) {
Element childElement = null;
if (parentElement != null) {
NodeList list = parentElement.getElementsByTagName(childTagName);
if (list != null && list.getLength() > 0) {
childElement = (Element)list.item(0);
} else {
if (addChildIfNotPresent) {
childElement = addElement(parentElement, childTagName);
}
}
}
return childElement;
}
public static Element addElement(Element parentElement, String childTagName, String value) {
Element childElement = null;
if (parentElement != null) {
childElement = parentElement.getOwnerDocument().createElement(childTagName);
parentElement.appendChild(childElement);
if (value != null) {
Node text = parentElement.getOwnerDocument().createTextNode(value);
childElement.appendChild(text);
}
}
return childElement;
}
6。 取得text文本的一些方法
a. //xmi-apis.jar
/**
* Get the text of an Element
*
*@param textElement the Element to retrieve the text from
*@return the text of the Element
*/
public static String textForElement(Element textElement) {
String value = null;
if (textElement != null) {
Node child = textElement.getFirstChild();
while (child != null) {
if ((child.getNodeType() == Node.TEXT_NODE) || (child.getNodeType() == Node.CDATA_SECTION_NODE)) {
value = child.getNodeValue();
child = null;
} else {
child = child.getNextSibling();
}
}
}
return value;
}
b. // CachedXPathAPI come from xalan.jar
//org.w3c.dom.Node implement in xml-apis.jar
/**
* Description of the Method
*
*@param xpathToElement Description of the Parameter
*@param cachedXpath Description of the Parameter
*@param node Description of the Parameter
*@return Description of the Return Value
*@exception CVXMLException Description of the Exception
*/
public static String textForElement(String xpathToElement, Node node, org.apache.xpath.CachedXPathAPI cachedXpath) throws CVXMLException {
String results = null;
if (cachedXpath != null) {
try {
if (logger.isDebugEnabled()) logger.debug("getting textForElement using cachedxpath and xpath : " + cachedXpath + " -- " + xpathToElement);
Node n = cachedXpath.selectSingleNode(node, xpathToElement);
if (n != null) {
results = textForElement((Element)n);
}
} catch (Throwable t) {
if (logger.isDebugEnabled()) logger.debug("Error getting textForElement", t);
//throw new CVXMLException(t);
}
} else {
results = textForElement(xpathToElement, node);
}
return results;
}
c. Using xpath get the text from Document.
//XPathAPI. xalan.jar
/**
* Description of the Method
*
*@param xpathToElement Description of the Parameter
*@param doc Description of the Parameter
*@return Description of the Return Value
*@exception CVXMLException Description of the Exception
*/
public static String textForElement(String xpathToElement, Document doc) throws CVXMLException {
String results = null;
try {
if (logger.isDebugEnabled()) logger.debug("getting textForElement using old way -- no cachedxpathapi");
Node n = XPathAPI.selectSingleNode(doc.getDocumentElement(), xpathExpression);
if (n != null) {
results = textForElement((Element)n);
}
} catch (CVXMLException e) {
e.appendLogInfo("Unable to find a node using the xpath : " + xpathToElement);
throw e;
} catch (Throwable t) {
if (logger.isDebugEnabled()) logger.debug("Error getting textForElement", t);
}
return results;
}
7. 取得Attibute的方法
//xml-apis.jar
public static String textForAttribute(Element parentElement, String attributeName) {
String attributeText = null;
Attr classAttribute = parentElement.getAttributeNode(attributeName);
if (classAttribute != null) {
attributeText = classAttribute.getValue();
}
return attributeText;
}
8. 设置text和attribute的方法
//xml-apis.jar
public static void setTextForElement(Element textElement, String textToBeUsed) {
if (textToBeUsed != null) {
if (textToBeUsed.length() > 0){
textToBeUsed = StringUtils.removeIncompatibilityCharacters(textToBeUsed);
}
if (textElement.hasChildNodes()) {
// search for a text or cdata node to set the value of
boolean found = false;
boolean done = false;
Node child = textElement.getFirstChild();
while (!found && !done) {
if (child.getNodeType() == Node.TEXT_NODE || child.getNodeType() == Node.CDATA_SECTION_NODE) {
done = true;
found = true;
} else {
child = child.getNextSibling();
if (child == null) {
done = true;
}
}
}
if (found) {
child.setNodeValue(textToBeUsed);
} else {
//Node text = textElement.getOwnerDocument().createTextNode(textToBeUsed);//GR 04/01/2004 -- Changed to create CData Section instead
Node text = textElement.getOwnerDocument().createCDATASection(textToBeUsed);
textElement.appendChild(text);
}
}
else {
// no text nodes found to set a value for, so must create one
//textElement.appendChild(textElement.getOwnerDocument().createTextNode(textToBeUsed));//GR 04/01/2004 -- Changed to create CData Section instead
textElement.appendChild(textElement.getOwnerDocument().createCDATASection(textToBeUsed));
}
}
}
public static String removeIncompatibilityCharacters(String textStr) {
CharArrayWriter charArrayWriter = new CharArrayWriter();
for (int i = 0; i < textStr.length(); i++) {
if (Character.isDefined(textStr.charAt(i))) {
int charVal = (int) textStr.charAt(i);
// Control Characters --> NUL, SOH, STX, ETX, EOT, ENQ, ACK, BEL, BS
if (charVal >= 0x00 && charVal <= 0x08) {
continue;
}
// Control Characters --> CR, SO, SI, DLE, DC1, DC2, DC3, DC4, NAK, SYN, ETB, CAN, EM, SUB, ESC, FS, GS, RS, US
if (charVal >= 0x0B && charVal <= 0x1F) {
continue;
}
// The remaining characters are defined at http://www.w3.org/TR/2006/REC-xml-20060816/#charsets
if (charVal >= 0x7F && charVal <= 0x84) {
continue;
}
if (charVal >= 0x86 && charVal <= 0x9F) {
continue;
}
if (charVal >= 0xFDD0 && charVal <= 0xFDDF) {
continue;
}
if (charVal >= 0x1FFFE && charVal <= 0x1FFFF) {
continue;
}
if (charVal >= 0x2FFFE && charVal <= 0x2FFFF) {
continue;
}
if (charVal >= 0x3FFFE && charVal <= 0x3FFFF) {
continue;
}
if (charVal >= 0x4FFFE && charVal <= 0x4FFFF) {
continue;
}
if (charVal >= 0x5FFFE && charVal <= 0x5FFFF) {
continue;
}
if (charVal >= 0x6FFFE && charVal <= 0x6FFFF) {
continue;
}
if (charVal >= 0x7FFFE && charVal <= 0x7FFFF) {
continue;
}
if (charVal >= 0x8FFFE && charVal <= 0x8FFFF) {
continue;
}
if (charVal >= 0x9FFFE && charVal <= 0x9FFFF) {
continue;
}
if (charVal >= 0xAFFFE && charVal <= 0xAFFFF) {
continue;
}
if (charVal >= 0xBFFFE && charVal <= 0xBFFFF) {
continue;
}
if (charVal >= 0xCFFFE && charVal <= 0xCFFFF) {
continue;
}
if (charVal >= 0xDFFFE && charVal <= 0xDFFFF) {
continue;
}
if (charVal >= 0xEFFFE && charVal <= 0xEFFFF) {
continue;
}
if (charVal >= 0xFFFFE && charVal <= 0xFFFFF) {
continue;
}
if (charVal >= 0x10FFFE && charVal <= 0x10FFFF) {
continue;
}
charArrayWriter.write(charVal);
}
}
return new String(charArrayWriter.toCharArray());
}
public static void setTextForAttribute(Element parentElement, String attributeName, String attributeValue) {
parentElement.setAttribute(attributeName, attributeValue);
}
9. 创建Document
a. //xml-apis.jar
//Creates a new document object with the root node having the name: <code>rootElementName</code>\
public static Document createDocumentWithRoot(String rootElementName, boolean x) {
Document newRecordDoc = null;
if (rootElementName != null && !rootElementName.trim().equals("")) {
newRecordDoc = createDocument(true);
// Once the new document has been created, add the necessary root element
newRecordDoc.appendChild(newRecordDoc.createElement(rootElementName));
}
return newRecordDoc;
}
b. //Creates a new document object without the root node being set on the
public static Document createDocument(boolean x) {
Document newRecordDoc = null;
try {
DocumentBuilder docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
newRecordDoc = docBuilder.newDocument();
} catch (Exception e) {
ExceptionLogWriter.logException(e, "DOMHelper.createDocument(). Problem instantiating a new DocumentBuilder");
}
return newRecordDoc;
}
10. 替换节点
a. //xml-apis.jar
public static Document replaceRootNodeName(String xml, String newValue) throws CVXMLException {
Document doc = stringToDocument(xml, true);
replaceRootNodeName(doc, newValue);
return doc;
}
b.
public static void replaceRootNodeName(Document doc, String newValue) {
replaceNodeName(doc, doc.getDocumentElement(), newValue);
}
c.
public static void replaceNodeName(Document doc, Element element, String newValue) {
//Create an element with the new name
Element element = doc.createElement(newValue);
// Copy the attributes to the new element
NamedNodeMap attrs = element.getAttributes();
for (int i = 0; i < attrs.getLength(); i++) {
Attr attr2 = (Attr)doc.importNode(attrs.item(i), true);
element2.getAttributes().setNamedItem(attr2);
}
// Move all the children
while (element.hasChildNodes()) {
element2.appendChild(element.getFirstChild());
}
// Replace the old node with the new node
element.getParentNode().replaceChild(element2, element);
}
二. SAX 解析
class XMLDocument extends DefaultHandler {
Document doc;
String elementName = "";
String attribName = "";
String attribVal = "";
boolean addAttr = false;
Element currElement;
Element root;
String rootName;
boolean hasDocument = false;
boolean hasRoot = false;
int count = 0;
/** Constructor for the XMLDocument object */
XMLDocument() {
super();
hasDocument = false;
}
/**
*@param val Document
*/
XMLDocument(Document val) {
super();
doc = val;
hasDocument = true;
}
/**
* Method to set the Root
*
*@param val The new root value
*/
public void setRoot(String val) {
rootName = val;
hasRoot = true;
}
/**
* Method to add an attribute.
*
*@param element String containing the elementName
*@param attribute String containing the attribute name
*@param value String containing the attribute value
*/
public void addAttribute(String element, String attribute, String value) {
elementName = element;
attribName = attribute;
attribVal = value;
addAttr = true;
}
/**
*@exception SAXException If the root element name is missing
*/
public void startDocument() throws SAXException {
if (!hasDocument) {
try {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder parser = factory.newDocumentBuilder();
doc = parser.newDocument();
} catch (Exception e) {
throw new SAXException(e);
}
if (rootName == null || rootName.equals("")) {
throw new SAXException("Missing root element name, call setRoot() first");
}
}
if (hasRoot) {
root = (Element)doc.createElement(rootName);
doc.appendChild(root);
}
}
/**
*@param uri String containing the uri
*@param localName String containing the localName
*@param rawName String containing the rawName
*@param atts Attributes
*@exception SAXException Description of the Exception
*/
public void startElement(String uri, String localName, String rawName, Attributes atts) throws SAXException {
// Assume the first element encountered is the root element
// Check the element to see if it is one we are interested in processing
if (hasRoot) {
if (!rawName.equalsIgnoreCase(rootName)) {
currElement = (Element)doc.createElement(rawName);
for (int i = 0; i < atts.getLength(); i++) {
currElement.setAttribute(atts.getQName(i), atts.getValue(i));
}
if (rawName.equals(elementName) && addAttr) {
// Add the new attribute if specified
currElement.setAttribute(attribName, attribVal);
}
root.appendChild(currElement);
} else {
currElement = (Element)doc.createElement(rawName);
for (int i = 0; i < atts.getLength(); i++) {
currElement.setAttribute(atts.getQName(i), atts.getValue(i));
}
}
} else {
// Create a new element - used for stringToDoc(String) method
if (count == 0) {
root = (Element)doc.createElement(rawName);
doc.appendChild(root);
}
currElement = (Element)doc.createElement(rawName);
for (int i = 0; i < atts.getLength(); i++) {
currElement.setAttribute(atts.getQName(i), atts.getValue(i));
}
root.appendChild(currElement);
count++;
}
}
/**
*@param ch Description of the Parameter
*@param start Description of the Parameter
*@param length Description of the Parameter
*@exception SAXException Description of the Exception
*/
public void characters(char[] ch, int start, int length) throws SAXException {
Text text = doc.createTextNode(new String(ch, start, length));
if (currElement != null) {
currElement.appendChild(text);
}
}
/**
* Description of the Method
*
*@return Description of the Return Value
*/
public Document document() {
return doc;
}
}
//xml-apis.jar
public Object process( InputSource is ) {
protected List tags = new ArrayList(0);
SAXParserFactory spf = SAXParserFactory.newInstance();
spf.setValidating( strict );
SAXParser sp = spf.newSAXParser();
XMLReader xmlReader = sp.getXMLReader();
xmlReader.setContentHandler( this );
xmlReader.setErrorHandler( this );
if( entityResolver != null ) {
xmlReader.setEntityResolver( entityResolver );
}
xmlReader.parse( is );
if( tags.size() == 0 ) return finish( null );
return finish( tags.remove(0) );
}
MyProcess processor = new ProcessImpl(conf, rc);
//XMLBuilder extends XMLDocument 策略模式
XMLBuilder xb = new XMLBuilder(processor);
xb.process(new StringReader( ""));
三. xsl parse xml
//xml-apis.jar
protected void transformXml(String path, String documentXml) {
StreamSource xsl = new StreamSource(new File(xslFileName));
StringReader reader = new StringReader(documentXml);
StreamSource xml = new StreamSource(reader);
StringWriter writer = new StringWriter();
StreamResult result = new StreamResult(writer);
Transformer transformer = null;
try {
transformer = TransformerFactory.newInstance().newTransformer(xsl);
}
try {
if(null != transformer) {
transformer.transform(xml, result);
}
}
xsl:
<root>
<xsl:template match="/book">
<xsl:if test="date/text()" >
<newnode><xsl:value-of select="date"/></facet>
</xsl:if>
<xsl:apply-templates select="Auther/lan"/>
</root>
<xsl:template match="Auther/lan">
<xsl:if test="position()=1">
<p><xsl:value-of select="key"/></p>
</xsl:if>
</xsl:template>