SAX解析xml

import java.io.FileInputStream;
import java.io.OutputStreamWriter;

import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

/**
 * 读取出来的内容可以进一步处理,比如插入数据库,
 * 文件过大可以分段保存到数据库,以免内存溢出
 * 
 *
 */
public class MySaxXml extends DefaultHandler {

	/**
	 * @param args
	 * @throws SAXException
	 * @throws Exception
	 */
	private Locator locator;
	private int index1 = 0;
	private int index2 = 0;
	//测试保存数据
	private StringBuffer content = new StringBuffer();

	// 读取标签之间的内容,如:<xxx>tttt</xxx>
	public void characters(char[] ch, int start, int length)
			throws SAXException {
		String tmp = new StringBuffer().append(ch, start, length).toString()
				.trim();
		if (tmp.length() == 0) {
			return;
		}
		System.out.print("本书<<");
		try {
			OutputStreamWriter outw = new OutputStreamWriter(System.out);
			outw.write(ch, start, length);
			outw.flush();
		} catch (Exception e) {
			e.printStackTrace();
		}
		if(content.capacity()>30*1000){
			content.delete(0, content.length());
			System.out.println(content.capacity());
		}else{
			content.append(ch, start, length);
		}
		System.out.println(">>的目录");
	}

	public void endDocument() throws SAXException {
		System.out.println("解析结束:");
	}

	// 遇到结束标签,在此可以做其它业务处理,因为是结束标签处表示一条数据的完整性
	public void endElement(String uri, String localName, String qName)
			throws SAXException {
		
		if (qName.equalsIgnoreCase("chapter"))
			this.index2 = 0;
	}

	public void setDocumentLocator(Locator locator) {
		this.locator = locator;
	}

	public void startDocument() throws SAXException {
		System.out.println("解析开始: ");
	}

	// 重写开始解析元素的方法. 这里是将<xxx>中的名称xxx提取出来,与读取属性
	public void startElement(String uri, String localName, String qName,
			Attributes atts) throws SAXException {
		if (qName.equalsIgnoreCase("chapter")) {
			index1++;
			for (int i = 0; i < atts.getLength(); i++) {
				String attName = atts.getQName(i);
				if (attName.equalsIgnoreCase("title")) {
					System.out.println("第" + index1 + "章:" + atts.getValue(i));
				}
			}
		}

		if (qName.equalsIgnoreCase("topic")) {
			index2++;
			for (int i = 0; i < atts.getLength(); i++) {
				String attName = atts.getQName(i);
				if (attName.equalsIgnoreCase("name")) {
					System.out.println("     第" + index2 + "部分:"
							+ atts.getValue(i));
				}
			}
		}
	}

	public static void main(String[] args) throws Exception {
		String vendorParserClass = "org.apache.xerces.parsers.SAXParser";
		String xmlURI = "D:\\book2.xml";
		XMLReader reader = XMLReaderFactory.createXMLReader(vendorParserClass);
		reader.setContentHandler(new MySaxXml());
		InputSource inputSource = new InputSource(new FileInputStream(xmlURI));
		//开始解析xml文件,会自动回调上面重写的方法
		reader.parse(inputSource);
	}
}

 下面是对应xml:

<?xml version="1.0"?>
<book>
    <title>Java and XML</title> 
    <contents>
        <chapter title="Introduction" number="1">
            <topic name="XML Matters"/> 
            <topic name="What's Important"/>
            <topic name="The Essentials"/>
            <topic name="What&apos;s Next?"/>
        </chapter>
        <chapter title="Nuts and Bolts" number="2">
            <topic name="The Basics"/>
            <topic name="Constraints"/>
            <topic name="Transformations"/>
            <topic name="And More..."/>
            <topic name="What&apos;s Next?"/>
        </chapter>
        <chapter title="SAX" number="3">
            <topic name="Getting Prepared"/>
            <topic name="SAX Readers"/>
            <topic name="Content Handlers"/>
            <topic name="Gotcha!"/>
            <topic name="What&apos;s Next?"/>
        </chapter>       
    </contents>
</book>

 

你可能感兴趣的:(apache,xml)