sax解析大xml文件

工作中遇到要处理30M左右的文件,测试了下dom4j,最多10M多点,11M就 out of memory, 于是最后选择了直接用sax进行解析:

 

ReadXMLFileSAX类:

package xml;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class ReadXMLFileSAX {

	public static void main(String argv[]) {

		try {
			SAXParserFactory factory = SAXParserFactory.newInstance();
			SAXParser saxParser = factory.newSAXParser();
			DefaultHandler handler = new DefaultHandler() {
				boolean bfname = false;
				boolean blname = false;
				boolean bnname = false;
				boolean bsalary = false;
				boolean bstaff = false;

				public void startElement(String uri, String localName,
						String qName, Attributes attributes)
						throws SAXException {
					System.out.println("Start Element :" + qName);

					if (qName.equalsIgnoreCase("firstname")) {
						bfname = true;
					}
					if (qName.equalsIgnoreCase("lastname")) {
						blname = true;
					}
					if (qName.equalsIgnoreCase("nickname")) {
						bnname = true;
					}
					if (qName.equalsIgnoreCase("salary")) {
						bnname = true;
					}
					for (int i = 0; i < attributes.getLength(); i++) {
						System.out.println("attribute name:"
								+ attributes.getQName(i));
						System.out.println("attribute value:"
								+ attributes.getValue(i));
					}
				}

				public void endElement(String uri, String localName, String qName) throws SAXException {
					System.out.println("End Element :" + qName);
				}

				public void characters(char ch[], int start, int length)
						throws SAXException {
					if (bfname) {
						System.out.println("description : " + new String(ch, start, length));
						bfname = false;
					}

					if (blname) {
						System.out.println("orderContent : " + new String(ch, start, length));
						blname = false;
					}

					if (bnname) {
						System.out.println("Nick Name : " + new String(ch, start, length));
						bnname = false;
					}

					if (bsalary) {
						System.out.println("Salary : " + new String(ch, start, length));
						bsalary = false;
					}

					if (bstaff) {
						System.out.println(new String(ch, start, length));
						bstaff = false;
					}
				}
			};
			saxParser.parse("test.xml", handler);
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
}
 

 

 

xml文件:

<?xml version="1.0"?>
<company>
	<staff name="aaaaa">
		<firstname>yong</firstname>
		<lastname>mook kim</lastname>
		<nickname>mkyong</nickname>
		<salary>100000</salary>
	</staff>
	<staff name="bbbbbb">
		<firstname>low</firstname>
		<lastname>yin fong</lastname>
		<nickname>fong fong</nickname>
		<salary>200000</salary>
	</staff>
</company>
 

 

注:可以在startElement方法的attributes中获取element的属性信息,然后再endElement中让element与它的attribute对应。由于xml文件很大,尽量不要把解析的所有信息放在内存,譬如集合多少信息,插入数据库,再清空,再继续下面的解析。

你可能感兴趣的:(xml)