大部分程序员,平时工作中除了与Bug相伴之外,想必也会很多种多样的文件打交道吧。当然,XML 就是其中之一,获取交互数据,创建规则等等,都离不开他。XML是个非常强大的描述语言,相比而言,txt之流则功力较弱了些。
XML那么重要,单解析XML的工作却繁杂无聊。原因如下:解析XML工具繁杂,每个人掌握的东西不一样;学习新工具又浪费时间;解析规则只是个体力活。
程序员的辛苦,也就体现于此了。一般程序员总是抱怨忙啊忙,累啊累。我们作为程序员来说,更应该想一想想法设法吧东西提炼一下,减少重复的工作。 说这些题外话,是有原因的。我现在的大部分同事,加班加点在哪加班,貌似很辛苦,看一下他们的工作,让人很无奈。代码是一大段一大段的写,确实一晚上写个百十行,是非常“有成就感”的。
如生成XML 文件,用拼串的方式实现。一个小小的xml,往往都50多行。让人很无语。
解析XML, 往往都需要有一套解析规则。而我们可以动态创建解析规则。让后根据规则将xml文件数据放到Map中。思路简单至极。其他扩展,暂时不考虑。
主要步骤 分为2部分
第一部分 使用Digester实现可配置的规则树结构
1) 规则节点对象
public class Node{
private String nodeName;
private List children = new ArrayList();
/**
* 属性列表,以逗号分隔
*/
private String attrs="";
public String getAttrs() {
return attrs;
}
public void setAttrs(String values) {
this.attrs= values;
}
public String getNodeName() {
return nodeName;
}
public void setNodeName(String nodeName) {
this.nodeName = nodeName;
}
public List getChildren() {
return children;
}
public void addChildren(Node node) {
this.children.add(node);
}
@Override
public String toString() {
return this.nodeName+ ":"+ "have"+ this.getChildren().size()+ "子节点";
}
}
(2) 定义xml 解析规则文件,当然也可以写编码实现。
<? xml version ="1.0" ?>
<!DOCTYPE digester-rules PUBLIC "-//Jakarta Apache //DTD digester-rules XML V1.0//EN" "http://jakarta.apache.org/commons/digester/dtds/digester-rules.dtd">
< digester-rules >
< pattern value ="root" >
< object-create-rule classname ="com.aisino.common.parse.Node" />
< set-properties-rule />
< set-next-rule methodname ="add" />
< pattern value ="level1" >
< object-create-rule classname ="com.aisino.common.parse.Node" />
< set-properties-rule />
< set-next-rule methodname ="addChildren" />
< pattern value ="level2" >
< object-create-rule classname ="com.aisino.common.parse.Node" />
< set-properties-rule />
<!--
<call-method-rule methodname="setNodeName" paramcount="1" paramtypes="java.lang.String"/>
<call-param-rule paramnumber="0"/>
-->
< pattern value ="level3" >
< object-create-rule classname ="com.aisino.common.parse.Node" />
< set-properties-rule />
< set-next-rule methodname ="addChildren" />
< pattern value ="level4" >
< object-create-rule classname ="com.aisino.common.parse.Node" />
< set-properties-rule />
< set-next-rule methodname ="addChildren" />
< pattern value ="level5" >
< object-create-rule classname ="com.aisino.common.parse.Node" />
< set-properties-rule />
< set-next-rule methodname ="addChildren" />
</ pattern >
</ pattern >
</ pattern >
< set-next-rule methodname ="addChildren" />
</ pattern >
</ pattern >
</ pattern >
</ digester-rules >
3) 规则解析类RuleLoader
public class RuleLoader {
private static final Log logger = LogFactory.getLog(RuleLoader. class);
private URL digesterRulesURL;
private URL fileURL;
public RuleLoader(String rulePath,String filePath){
digesterRulesURL= getURL(rulePath);
fileURL = getURL(filePath);
// digesterRulesURL= getClass().getClassLoader().getResource(rulePath);
}
public RuleLoader(URL digesterRulesURL,URL fileURL){
this.digesterRulesURL = digesterRulesURL;
this.fileURL = fileURL;
}
public static RuleLoader getXmlRuleLoader(String filePath){
URL url = getURL( "classpath:com/aisino/common/parse/xml-rules.xml");
return new RuleLoader(url,getURL(filePath));
}
/**
*
* 自定义指定规则<br/>
* 需要对Digester 解析规则熟悉
* @return
*/
public List parseRules(){
ClassLoader classLoader = getClass().getClassLoader();
Object root = new ArrayList();
try {
DigesterLoader.load(digesterRulesURL, classLoader, fileURL,root);
} catch (IOException e) {
logger.error( "IOException");
} catch (SAXException e) {
logger.error( "SAX error");
} catch (DigesterLoadingException e) {
logger.error( "an error occurs while parsing XML into Digester rules");
}
return (List)root;
}
/**
*
* 解析XML数据,并将数据保存至Map
* 最多支持5级
* XML节点:主要表示root、level1,level2,level3,level4,level5<br/>
* for example:<br/>
* <root nodeName="dataExchangePackage"><br/>
* <level1 nodeName="envelopeInfo"><br/>
* <level2 nodeName="sourceID" /><br/>
* <level2 nodeName="destinationID" /><br/>
* <level2 nodeName="destinationAppID" /><br/>
* <level2 nodeName="businessType" /><br/>
* <level2 nodeName="globalBusinessID" /><br/>
* </level1>
* <level1>
* ...
* </level1>
</root>
<br/>
约定 ${}为节点对应nodeName值
${root} = dataExchangePackage
${level1}==envelopeInfo
${level2}==sourceID...
* @param input
* @return Map
* 从map 获取值
* 如果key 是叶子节点:则返回string,反之,为Map
* 如果想取sourceID的值,则key=/dataExchangePackage/envelopeInfo/sourceID
*/
public static Map reParseRules(String filePath,InputStream input){
List rules = RuleLoader.getXmlRuleLoader(filePath).parseRules();
if(rules != null && rules.size()>0){
Digester digester = new Digester();
Node root=(Node)rules.get(0); //根节点
MapSetNextRule rule = new MapSetNextRule( "put");
addRule2Dister(digester, root,"", rule, true);
try {
Map valueMap = new HashMap();
Map parseMap = (Map)digester.parse(input);
valueMap.putAll(parseMap);
afterRootMap(parseMap,valueMap,"");
return valueMap;
} catch (IOException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
} finally{
digester= null;
}
}
return null;
}
private static void afterRootMap(Map valueMap,Map destMap,String pattern){
String fullPattern="";
Iterator keys = valueMap.keySet().iterator();
while(keys.hasNext()){
Object key = keys.next();
Object v = valueMap.get(key);
fullPattern= pattern+ "/"+key;
if(v instanceof Map){
afterRootMap((Map)v,destMap,fullPattern);
} else{
logger.debug(fullPattern+ ">>>>对应元素>"+v+ " 放入返回栈中");
destMap.put(fullPattern, v);
}
}
}
private static URL getURL(String resourceLocation){
try {
if (resourceLocation.startsWith(ResourceHelper.CLASSPATH_URL_PREFIX)) {
return ResourceHelper.getURL(resourceLocation);
} else if(resourceLocation.startsWith(ResourceHelper.FILE_URL_PREFIX)) {
resourceLocation = StringUtils.replace(resourceLocation, ResourceHelper.FILE_URL_PREFIX, "");
return new File(resourceLocation).toURI().toURL();
}
} catch (Exception e) {
logger.error( "解析XML路径时,出错");
}
return null;
}
/**
* 递归添加解析规则
* @param digester
* @param node :当前节点
* @param pattern:规则
* @param rule:支持map添加
* @param isRoot 是否为根节点
*/
private static void addRule2Dister(Digester digester,Node node,String pattern,MapSetNextRule rule, boolean isRoot){
String fullPattern="";
if(StringUtils.isNotBlank(pattern)){
fullPattern = pattern+ "/"+node.getNodeName();
} else{
fullPattern = node.getNodeName();
}
if(node.getChildren().size()>0){
logger.debug( " add rules >>> digester.addObjectCreate("+fullPattern+ ", HashMap.class);");
digester.addObjectCreate(fullPattern, HashMap. class);
if(StringUtils.isNotBlank(node.getAttrs())){
String[] attrs =StringUtils.split(node.getAttrs(), ",");
logger.debug(fullPattern+ "有属性:"+ToStringBuilder.reflectionToString(attrs));
for( int i=0;i<attrs.length;i++){
String attr= attrs[i];
logger.debug( " add rules >>> digester.addCallMethod("+fullPattern+ ",\"put\", 2)");
logger.debug( " add rules >>> digester.addObjectParam("+fullPattern+ ",0, "+attr+ ")");
logger.debug( " add rules >>> digester.addCallParam("+fullPattern+ ",1, "+attr+ ")");
digester.addCallMethod(fullPattern, "put", 2);
digester.addObjectParam(fullPattern, 0, attr);
digester.addCallParam(fullPattern, 1,attr);
}
}
if(!isRoot){ //不是根节点
logger.debug( " add rules >>> digester.addRule("+fullPattern+ ", rule);");
digester.addRule(fullPattern, rule);
}
for( int i=0;i<node.getChildren().size();i++){
Node child = (Node)node.getChildren().get(i);
addRule2Dister(digester,child,fullPattern,rule, false);
}
} else{
//叶子节点
logger.debug( "add rules >>> digester.addCallMethod("+fullPattern+ ", \"put\", 2)");
digester.addCallMethod(fullPattern, "put", 2);
logger.debug( "add rules >>> digester.addObjectParam("+fullPattern+ ",0, "+node.getNodeName()+ ")");
digester.addObjectParam(fullPattern, 0, node.getNodeName());
logger.debug( "add rules >>> digester.addCallParam("+fullPattern+ ",1)");
digester.addCallParam(fullPattern, 1);
}
}
...
}
XML阅读器
public class XmlReader {
private static final Log logger = LogFactory.getLog(XmlReader. class);
private String rulePath;
private String filePath;
private RuleLoader rule;
public XmlReader(String rulePath){
this.rulePath = rulePath;
}
public List read(String filePath){
rule = new RuleLoader( this.rulePath,filePath);
return rule.parseRules();
}
...
}
测试类
public static void main(String args[]) throws Exception{
InputStream resourceAsStream = Thread.currentThread().getContextClassLoader().getResourceAsStream( "com/aisino/common/parse/101R.xml");
Map m = RuleLoader.reParseRules( "classpath:com/aisino/common/parse/xml-value-example.xml",resourceAsStream);
System.out.println(m.get( "/dataExchangePackage/envelopeInfo/destinationID"));
System.out.println(m.get( "/dataExchangePackage/transferInfo/age"));
System.out.println(m.get( "/dataExchangePackage/transferInfo/sex"));
}
测试规则文件(定义XML解析规则)
暂时只支持5级。当然可以无限增加至6级别,8级...
Digester 好想不支持通配符吧。 在这一点确实没有做到充分的可扩展。但是5级相当可以满足应用了
<? xml version ="1.0" ?>
< root nodeName ="dataExchangePackage" >
< level1 nodeName ="envelopeInfo" >
< level2 nodeName ="sourceID" />
< level2 nodeName ="destinationID" />
< level2 nodeName ="destinationAppID" />
< level2 nodeName ="businessType" />
< level2 nodeName ="globalBusinessID" />
</ level1 >
< level1 nodeName ="transferInfo" attrs ="age,sex" >
< level2 nodeName ="senderID" />
< level2 nodeName ="receiverID" />
< level2 nodeName ="isRetry" />
< level2 nodeName ="sendTime" />
< level2 nodeName ="messageID" />
< level2 nodeName ="sourceMessageID" />
</ level1 >
< level1 nodeName ="contentControl" >
< level2 nodeName ="zip" >
< level3 nodeName ="isZip" />
</ level2 >
< level2 nodeName ="encrypt" >
< level3 nodeName ="isEncrypt" />
</ level2 >
< level2 nodeName ="code" >
< level3 nodeName ="isCode" />
</ level2 >
</ level1 >
< level1 nodeName ="packageInfo" >
< level2 nodeName ="subPackage" >
< level3 nodeName ="sequence" />
< level3 nodeName ="content" >
< level4 nodeName ="dj_nsrxx" >
< level5 nodeName ="nsrsbh" />
< level5 nodeName ="swjg_dm" />
< level5 nodeName ="nsrdzdah" />
< level5 nodeName ="nsrmc" />
< level5 nodeName ="zjhm" />
< level5 nodeName ="scjydz" />
< level5 nodeName ="bsrmc" />
< level5 nodeName ="dhhm" />
< level5 nodeName ="djzclx_dm" />
< level5 nodeName ="nsrzt_dm" />
< level5 nodeName ="nsr_swjg_dm" />
</ level4 >
</ level3 >
</ level2 >
</ level1 >
< level1 nodeName ="returnState" >
< level2 nodeName ="returnCode" > </ level2 >
< level2 nodeName ="returnMessageID" > </ level2 >
< level2 nodeName ="returnMessage" > </ level2 >
</ level1 >
</ root >
测试数据
<? xml version ="1.0" encoding ="GBK" ?>
< dataExchangePackage xmlns ="http://www.chinatax.gov.cn/tirip/dataspec" xmlns:xsi ="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation ="http://www.chinatax.gov.cn/tirip/dataspec/dataExchangePackage.xsd" version ="SW5001" >
< envelopeInfo >
< sourceID >SJ </ sourceID >
< destinationID >YD </ destinationID >
< destinationAppID >SJQZ </ destinationAppID >
< businessType >WLFP101 </ businessType >
< globalBusinessID >SJWLFP1012012011100184241 </ globalBusinessID >
</ envelopeInfo >
< transferInfo age ="88777" sex ="男" >
< senderID >SJ </ senderID >
< receiverID >YD </ receiverID >
< isRetry />
< sendTime >2012-01-11 13:14:57:759 </ sendTime >
< messageID >SJWLFP1012012011100184241 </ messageID >
< sourceMessageID />
</ transferInfo >
< contentControl >
< zip >
< isZip >false </ isZip >
</ zip >
< encrypt >
< isEncrypt >false </ isEncrypt >
</ encrypt >
< code >
< isCode >false </ isCode >
</ code >
</ contentControl >
< packageInfo >
< subPackage >
< sequence >1 </ sequence >
< content >
< dj_nsrxx >
< nsrsbh >350583729702365 </ nsrsbh >
< swjg_dm >13505833100 </ swjg_dm > < xgrq />
< nsrdzdah >350502001008324 </ nsrdzdah >
< nsrmc >南安市水头康利石材有限公司 </ nsrmc >
< zjhm >130302610511351 </ zjhm >
< scjydz >南安市水头镇西锦村 </ scjydz >
< bsrmc >陈姿颖 </ bsrmc >
< dhhm >6981988 </ dhhm >
< djzclx_dm >230 </ djzclx_dm >
< nsrzt_dm >21 </ nsrzt_dm >
< nsr_swjg_dm >13505833100 </ nsr_swjg_dm >
</ dj_nsrxx >
</ content >
</ subPackage >
</ packageInfo >
< returnState >
< returnCode >00 </ returnCode >
< returnMessageID >YDWLFP1012012011100000001 </ returnMessageID >
< returnMessage >成功 </ returnMessage >
</ returnState >
</ dataExchangePackage >
测试结果
/dataExchangePackage/envelopeInfo/destinationID>>>>YD
/dataExchangePackage/transferInfo/age>>>88777
/dataExchangePackage/transferInfo/age>>>男