直接贴代码
package org.autumn.kettle;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.util.Date;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class LearnSAX {
public static void main(String[] args) throws Exception, SAXException {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser parser = factory.newSAXParser();
Date before = new Date();
parser.parse(new File("D:/hotel.xml"), new MyHandler());
Date after = new Date();
System.out.println("it takes " + (after.getTime() - before.getTime())
+ "ms");
}
}
class MyHandler extends DefaultHandler {
OutputStreamWriter out = null;
@Override
public void startDocument() throws SAXException {
try {
out = new OutputStreamWriter(
new FileOutputStream("D:/xml2csv.csv"), "GBK");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
@Override
public void endDocument() throws SAXException {
try {
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
}
@Override
public void endElement(String uri, String localName, String qName)
throws SAXException {
if (qName.equalsIgnoreCase("row")) {
try {
out.write("\r\n");
} catch (IOException e) {
e.printStackTrace();
}
}
}
@Override
public void characters(char ch[], int start, int length)
throws SAXException {
String s = new String(ch, start, length);
if (!s.trim().isEmpty())
try {
out.write(s + ",");
} catch (IOException e) {
e.printStackTrace();
}
}
}
输入的XML文档:
<?xml version="1.0" encoding="UTF-8"?>
<Rows>
<Row><domain>YJ</domain> <hotelname>阳江猾令新另赂宾馆</hotelname> <hotelno>230016</hotelno> <lxr>邹垮龚</lxr> <tel>23238687</tel> <address>阳江市铁核估金伎区0号</address> <ssq>2</ssq> <bz/> </Row>
<Row><domain>QY</domain> <hotelname>清远埔沃睬储酒店</hotelname> <hotelno>443171</hotelno> <lxr>柏弗蚊</lxr> <tel>21289491</tel> <address>清远市陨促醛映区49号</address> <ssq>3</ssq> <bz/> </Row>
</Rows>
输出的CSV文件:
YJ,阳江猾令新另赂宾馆,230016,邹垮龚,23238687,阳江市铁核估金伎区0号,2,
QY,清远埔沃睬储酒店,443171,柏弗蚊,21289491,清远市陨促醛映区49号,3,