Java知识积累——Sax解析xml文档

整体思路:Sax解析是将xml文档按Stream读入,一点一点的解析,不像Dom解析那样把文档全部加载到内存中,因此更适合实际情况的使用。Sax解析需要继承DefaultHandler,重写其4个方法(startDocument、startElement、endElement、characters),分别用于处理xml的ElementNode(<>或</>)和TextNode(字符)。下面看一个例子:此程序将原本的xml文件中各个元素标签解析成对应的TagBean实例,每对标签会被解析成一个实例,并且按树状存储标签之间的关系。因为我的目的是要解析Spring IoC的配置信息,所以4个方法的处理都是基于Spring IoC配置规则而实现的,当你们解析特定的xml文档时,只要修改这4个方法的解析规则即可。

TagBean的代码:

 1 public class TagBean {

 2     private String name;

 3     private Map<String,String> attrList = null;

 4     private ArrayList<TagBean> childList = new ArrayList<TagBean>();

 5     

 6     public void print(int count){

 7         for(int i = 0; i < count; i++){

 8             System.out.print(" ");

 9         }

10         System.out.print(name+":");

11         if(attrList != null){

12             Iterator<String> keySetItr = attrList.keySet().iterator();

13             while(keySetItr.hasNext()){

14                 String key = keySetItr.next();

15                 System.out.print(key +"--"+attrList.get(key)+"\t");

16             }

17             System.out.println();

18         }else{

19             System.out.println();

20         }

21         if(childList.size() > 0){

22             int newCount = ++count;

23             for(TagBean temp : childList){

24                 temp.print(newCount);

25             }

26         }else{

27             return;

28         }

29     }

30     

31     public String getName() {

32         return name;

33     }

34     public void setName(String name) {

35         this.name = name;

36     }

37     public Map<String, String> getAttrList() {

38         if(attrList == null){

39             attrList = new HashMap<String,String>();

40         }

41         return attrList;

42     }

43     public void setAttrList(Map<String, String> attrList) {

44         this.attrList = attrList;

45     }

46     public ArrayList<TagBean> getChildList() {

47         return childList;

48     }

49     public void setChildList(TagBean childTag){

50         this.childList.add(childTag);

51     }

52     

53 }

解析服务的代码:

 1 public class SaxParsingService extends DefaultHandler{

 2     private ArrayList<TagBean> tagList = null;  

 3     private TagBean tag = null;

 5     private Stack<String> tagNameStack = null;//标记当前在处理的tag的隶属关系,遇到<>时入栈,遇到 </>时出栈

 6     private Stack<TagBean> tagBeanStack = null;

 7     private TagBean rootTag = null;

 8       

 9     public TagBean getRootTag(InputStream xmlStream) throws Exception{  

10         //获取sax分析器

11         SAXParserFactory factory = SAXParserFactory.newInstance();  

12         SAXParser parser = factory.newSAXParser();

13         //根据具体程序要求进行分析

14         SaxParsingService handler = new SaxParsingService();  

15         parser.parse(xmlStream, handler);  //给分析器传入文件路径和分析规则

16         return handler.getRootTag(); //分析结束,获得信息 

17     }  

18       

19     public ArrayList<TagBean> getTagList(){  

20         return tagList;

21     }  

22       

23     @Override  //当文件解析开始时,会调用此函数

24     public void startDocument() throws SAXException {  

25         tagList = new ArrayList<TagBean>();

26         tagNameStack = new Stack<String>();

27         tagBeanStack = new Stack<TagBean>();

28     }  

29   

30     @Override  //当碰到ElementNode时(如<books>),会调用此方法

31     public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {  

32         //System.out.println(qName+" start");

33         tagNameStack.push(qName);

34         

35         tag = new TagBean();

36         tag.setName(qName);

37         

38         //存储标签的属性信息

39         int index = attributes.getLength();

40         if(index > 0){

41             Map<String,String> attrList = new HashMap<String,String>();

42             for(int i = 0 ; i < index ; i++ ){

43                 attrList.put(attributes.getQName(i),attributes.getValue(i));

44             }

45             tag.setAttrList(attrList);

46         }

47         

48         //将正在解析的节点名称赋给preTag

49         //preTag = qName;

50         

51         //处理子标签问题

52         tagBeanStack.push(tag);

53         

54         //保存beans的引用

55         if("beans".equals(qName)){

56             rootTag = tag;

57         }

58     }  

59   

60     @Override  //当遇到</>时调用此方法

61     public void endElement(String uri, String localName, String qName)  throws SAXException {  

62         //System.out.println(qName+" end");

63         

64         //处理了</>后,需要弹栈,并且构建子标签与父标签的关系

65         tagNameStack.pop();

66         tag = tagBeanStack.pop();

67         if(!tag.getName().equals("beans")){

68             tagBeanStack.peek().setChildList(tag);

69         }

70         

71         //preTag = null;

72     }  

73       

74     @Override  //当遇到TextNode时调用,比如空格、值

75     public void characters(char[] ch, int start, int length) throws SAXException {  

76         //System.out.println("char here");

77         String content = new String(ch,start,length);  

78         content = content.trim();

79         

80         if(content.length() > 0){

81             tagNameStack.peek();

82             tag = tagBeanStack.peek();

83             tag.getAttrList().put(tagNameStack.peek(), content);

84         }

85         

86     }

87     

88     public TagBean getRootTag(){

89         return this.rootTag;

90     }

91 }

程序入口,Main类:

 1 public class Main {

 2       public void testSAX() throws Throwable{  

 3           SaxParsingService sax;  

 4           InputStream input = null;

 5           try {

 6              sax = new SaxParsingService();

 7              input = this.getClass().getClassLoader().getResourceAsStream(“test.xml”);

 8              

 9              //解析input的xml文件,存储成树结构的tag链

10              TagBean rootTag = sax.getRootTag(input);

11              

12              //输出解析后存储的标签树结构

13              rootTag.print(0);

14          } catch (Exception e) {

15              e.printStackTrace();

16          } finally{

17              input.close();

18          }

19       }

20       public static void main(String[] args) throws Throwable {

21         new Main().testSAX();

22       }

23 }

 

需要解析的文档:

 1 <?xml version="1.0" encoding="UTF-8"?>

 2 <beans 

 3     xmlns="http://www.springframework.org/schema/beans"

 4     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"

 5     xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-3.2.xsd">

 6     <bean id = "beanB" class = "org.beans.BeanB">

 7         <property name = "ba">

 8             <ref bean = "beanA"/>

 9         </property>

10         <property name = "content">

11             <value>There is no problem!</value>

12         </property>

13         <property name="bc" ref="beanC"/>

14         <property name="listRef">

15             <list>

16                 <value>First</value>

17                 <value>Second</value>

18             </list>

19         </property>

20         <property name="listBeanRef">

21             <list>

22                 <ref bean = "beanA1"/>

23                 <ref bean = "beanA2"/>

24             </list>

25         </property>

26         <property name="max" value="15" />

27         <property name = "valueProps">

28             <props>

29                 <prop key = "strValueKey1">something1</prop>

30                 <prop key = "strValueKey2">something2</prop>

31             </props>

32         </property>

33         <property name="mapRef">

34             <map>

35                 <entry>

36                     <key>

37                         <value>key1</value>

38                     </key>

39                     <value>v1</value>

40                 </entry>

41                 <entry key="key2">

42                     <value>v2</value>

43                 </entry>

44                 <entry key="key3" value="v3"/>

45             </map>

46         </property>

47     </bean>

48     <bean id = "beanA" class = "org.beans.BeanA"/>

49     <bean id = "beanC" class = "org.beans.BeanC">

50         <constructor-arg >

51             <value>11</value>

52         </constructor-arg>

53         <constructor-arg>

54             <ref bean = "beanA3"/>

55         </constructor-arg>

56     </bean>

57     <bean id = "beanA1" class = "org.beans.BeanA">

58         <constructor-arg index = "1" value = "22"/>

59         <constructor-arg index = "0" ref = "beanA4"/>    

60     </bean>

61     <bean id = "beanA2" class = "org.beans.BeanA">

62         <constructor-arg index = "1" >

63             <value>33</value>

64         </constructor-arg>

65         <constructor-arg  ref = "beanA4"/>

66         <constructor-arg>

67             <value>44</value>

68         </constructor-arg>

69         <constructor-arg index = "0" value = "55"/>

70     </bean>

71 </beans>

解析后,调用rootTag的print方法,打印的树状数据结构:

beans:xmlns:xsi--http://www.w3.org/2001/XMLSchema-instance    xmlns--http://www.springframework.org/schema/beans    xsi:schemaLocation--http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-3.2.xsd    

 bean:id--beanB    class--org.beans.BeanB    

  property:name--ba    

   ref:bean--beanA    

  property:name--content    

   value:value--There is no problem!    

  property:ref--beanC    name--bc    

  property:name--listRef    

   list:

    value:value--First    

    value:value--Second    

  property:name--listBeanRef    

   list:

    ref:bean--beanA1    

    ref:bean--beanA2    

  property:name--max    value--15    

  property:name--valueProps    

   props:

    prop:prop--something1    key--strValueKey1    

    prop:prop--something2    key--strValueKey2    

  property:name--mapRef    

   map:

    entry:

     key:

      value:value--key1    

     value:value--v1    

    entry:key--key2    

     value:value--v2    

    entry:value--v3    key--key3    

 bean:id--beanA    class--org.beans.BeanA    

 bean:id--beanC    class--org.beans.BeanC    

  constructor-arg:

   value:value--11    

  constructor-arg:

   ref:bean--beanA3    

 bean:id--beanA1    class--org.beans.BeanA    

  constructor-arg:index--1    value--22    

  constructor-arg:ref--beanA4    index--0    

 bean:id--beanA2    class--org.beans.BeanA    

  constructor-arg:index--1    

   value:value--33    

  constructor-arg:ref--beanA4    

  constructor-arg:

   value:value--44    

  constructor-arg:index--0    value--55

如有疑问,可留言沟通~

你可能感兴趣的:(解析xml)