SAX解析XML出现特殊字符

import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class SaxParseService extends DefaultHandler{
    private List<Book> books = null;
    private Book book = null;
    private String preTag = null;//作用是记录解析时的上一个节点名称
    /**
     * 特殊字符数量很多时方法1显然不实用此时可用转义来实现 
     * &lt; <   
     * &gt; >   
     * &amp; &
     * &apos; '
     * &quot; " 
     * 即将xml中的特殊文档全部替换为转义字符
     * 如<name><thinking in java ><name>变成<name>&lt;thinking in java&gt;<name>。 
     * 但是对于这种情况解析过程会发生变化不是一次性解析<thinking in java >
     * 而是分三步先解析&lt;然后是thinking in java然后是&gt;
     * 因此要注意想要获得<name>中数据必须要用StringBuffer将这三部分加起来。
     * 
     */
    StringBuffer sb = new StringBuffer();
    
    public List<Book> getBooks(InputStream xmlStream) throws Exception{
        SAXParserFactory factory = SAXParserFactory.newInstance();
        SAXParser parser = factory.newSAXParser();
        SaxParseService handler = new SaxParseService();
        parser.parse(xmlStream, handler);
        return handler.getBooks();
    }
    
    public List<Book> getBooks(){
        return books;
    }
    
    @Override
    public void startDocument() throws SAXException {
        books = new ArrayList<Book>();
    }

    @Override
    public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
        sb.delete(0, sb.length());  //清除字符内容
        if("book".equals(qName)){
            book = new Book();
            book.setId(Integer.parseInt(attributes.getValue(0)));
        }
        preTag = qName;//将正在解析的节点名称赋给preTag
    }

    @Override
    public void endElement(String uri, String localName, String qName)
            throws SAXException {
        if("book".equals(qName)){
            books.add(book);
            book = null;
        }
        preTag = null;/**当解析结束时置为空。这里很重要,例如,当图中画3的位置结束后,会调用这个方法
        ,如果这里不把preTag置为null,根据startElement(....)方法,preTag的值还是book,当文档顺序读到图
        中标记4的位置时,会执行characters(char[] ch, int start, int length)这个方法,而characters(....)方
        法判断preTag!=null,会执行if判断的代码,这样就会把空值赋值给book,这不是我们想要的。*/
    }
    
    @Override
    public void characters(char[] ch, int start, int length) throws SAXException {
        if(preTag!=null){
//            String content = new String(ch,start,length);
            sb.append(ch,start,length);//字符相加
            String content =sb.toString();
            if("name".equals(preTag)){
                book.setName(content);
            }else if("price".equals(preTag)){
                book.setPrice(Float.parseFloat(content));
            }
        }
    }
    
}


public class Book {
    private int id;
    private String name;
    private float price;
    public int getId() {
        return id;
    }
    public void setId(int id) {
        this.id = id;
    }
    public String getName() {
        return name;
    }
    public void setName(String name) {
        this.name = name;
    }
    public float getPrice() {
        return price;
    }
    public void setPrice(float price) {
        this.price = price;
    }
    @Override
    public String toString(){
        return this.id+":"+this.name+":"+this.price;
    }
}

<?xml version="1.0" encoding="UTF-8"?>
<books>
    <book id="12">
        <name>&lt;thing in java ></name>
        <price>45.0</price> 
    </book>
    <book id="15">
        <name>Spring in Action</name>
        <price>39.0</price>
    </book>
</books>

采用JUNIT4测试
public class Test {
     @org.junit.Test
     public void testSAX() throws Throwable{  
            SaxParseService sax = new SaxParseService();  
            InputStream input = this.getClass().getClassLoader().getResourceAsStream("book.xml");  
            List<Book> books = sax.getBooks(input);  
            for(Book book : books){  
                System.out.println(book.getName());  
            }  
        }}

你可能感兴趣的:(SAX 特殊字符)