自定义类ParseGoldRDF extends 类DefaultHandler,使用DefaultHandler类中的startDocument()、endDocument()、startElement()、endElement()、characters(),解析完成后直接导入到MongoDB数据库中。
package process; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; /* * @date 2015-11-08 */ public class ParseGoldRDF extends DefaultHandler { private OperateDB db = null; private List<Record> rcdLst = null; private Record rcd = null; private String nodeName = null; private String value = null; private String content = null; private boolean flag = false; Map<String, Object> mapKeyValue = new HashMap<String, Object>(); public ParseGoldRDF(OperateDB db) { this.db = db; } // 开始解析文档 public void startDocument() throws SAXException { super.startDocument(); // 调用父类的函数 rcdLst = new ArrayList<Record>(); } // 结束文档解析 public void endDocument() throws SAXException { super.endDocument(); } // 开始解析节点 // qName: 当前节点的名字 // attributes: 当前节点的属性 public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { super.startElement(uri, localName, qName, attributes); nodeName = qName; // 记录当前节点的名字 if (qName.equals("Cell")) { flag = true; rcd = new Record(); } if (flag) value = attributes.getValue(0); } // ch: 存储元素的内容 // start: 内容的开始位置 // length: 内容的长度 public void characters(char[] ch, int start, int length) throws SAXException { super.characters(ch, start, length); if (!flag) return; content = new String(ch, start, length); } // 结束节点解析 public void endElement(String uri, String localName, String qName) throws SAXException { super.endElement(uri, localName, qName); // 结束一个cell的解析 if (qName.equals("Cell")) { String name = (String) mapKeyValue.get("entity1"); if (java.lang.Character.isUpperCase(name.charAt(0))) //首字母大写,class mapKeyValue.put("type", "class"); else //否则是prop mapKeyValue.put("type", "prop"); rcd.setAttrKeyValue(mapKeyValue); rcdLst.add(rcd); mapKeyValue.clear(); flag = false; return; } switch (nodeName) { case "entity1": case "entity2": String[] ele = value.split("#"); value = ele[1]; mapKeyValue.put(nodeName, value); break; case "relation": mapKeyValue.put(nodeName, content); break; default: break; } } //解析GD.rdf,返回map public void parseGD(String fileName, Map<String, String> alignClass, Map<String, String> alignProp) throws Exception{ alignClass.clear(); alignProp.clear(); SAXParserFactory factory = SAXParserFactory.newInstance(); try { // 创建解析器 SAXParser parser = factory.newSAXParser(); parser.parse(fileName, this); } catch (ParserConfigurationException e) { System.out.println("ParserConfig error"); } catch (SAXException e) { System.out.println("SAXException: xml not well formed"); } catch (IOException e) { System.out.println("IO error"); } finally{ for(Record rcd:rcdLst){ Map<String, Object> map = rcd.getAttrKeyValue(); String type = map.get("type").toString(); String entity1 = map.get("entity1").toString(); String entity2 = map.get("entity2").toString(); if(type.equals("class")) alignClass.put(entity1, entity2); else alignProp.put(entity1, entity2); } } } //解析GD.rdf,写入数据库 public void parseDocument(String fileName) { // 实例化SAXParserFactory对象 SAXParserFactory factory = SAXParserFactory.newInstance(); try { // 创建解析器 SAXParser parser = factory.newSAXParser(); parser.parse(fileName, this); } catch (ParserConfigurationException e) { System.out.println("ParserConfig error"); } catch (SAXException e) { System.out.println("SAXException: xml not well formed"); } catch (IOException e) { System.out.println("IO error"); } finally{ // 写入数据库 db.WriteDB(rcdLst, false); } } /** * @param args */ public static void main(String[] args) throws Exception { // TODO Auto-generated method stub String rootPath = "E:\\01-My Papers\\08Alignment of Graphical Linked Data in Semantic Web\\data\\OAEI2010\\benchmarks\\"; String objName="304"; String nameColl = "C"+objName+"GD"; OperateDB db = new OperateDB("OAEI2010", nameColl); String fileGD = rootPath + objName+"\\refalign.rdf"; ParseGoldRDF handler = new ParseGoldRDF(db); handler.parseDocument(fileGD); System.out.println("finish parsing "+objName+"GD.rdf"); } }