解析rdf格式的数据

自定义类ParseGoldRDF extends 类DefaultHandler,使用DefaultHandler类中的startDocument()、endDocument()、startElement()、endElement()、characters(),解析完成后直接导入到MongoDB数据库中。

package process;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
/*
* @date 2015-11-08
*/
public class ParseGoldRDF extends DefaultHandler {

	private OperateDB 			db = null;

	private List<Record> 		rcdLst = null;
	private Record 				rcd = null;
	private String 				nodeName = null;
	private String 				value = null;
	private String 				content = null;
	private boolean 			flag = false;
	Map<String, Object> 		mapKeyValue = new HashMap<String, Object>();

	public ParseGoldRDF(OperateDB db) {
		this.db = db;
	}

	// 开始解析文档
	public void startDocument() throws SAXException {
		super.startDocument(); // 调用父类的函数
		rcdLst = new ArrayList<Record>();
	}

	// 结束文档解析
	public void endDocument() throws SAXException {
		super.endDocument();
	}

	// 开始解析节点
	// qName: 当前节点的名字
	// attributes: 当前节点的属性
	public void startElement(String uri, String localName, String qName,
			Attributes attributes) throws SAXException {
		super.startElement(uri, localName, qName, attributes);
		nodeName = qName; // 记录当前节点的名字

		if (qName.equals("Cell")) {
			flag = true;
			rcd = new Record();
		}

		if (flag)
			value = attributes.getValue(0);
	}

	// ch: 存储元素的内容
	// start: 内容的开始位置
	// length: 内容的长度
	public void characters(char[] ch, int start, int length)
			throws SAXException {
		super.characters(ch, start, length);
		if (!flag)
			return;
		content = new String(ch, start, length);
	}

	// 结束节点解析
	public void endElement(String uri, String localName, String qName)
			throws SAXException {
		super.endElement(uri, localName, qName);

		// 结束一个cell的解析
		if (qName.equals("Cell")) {
			String name = (String) mapKeyValue.get("entity1"); 
			if (java.lang.Character.isUpperCase(name.charAt(0)))   //首字母大写,class
				mapKeyValue.put("type", "class"); 
			else                                                   //否则是prop
				mapKeyValue.put("type", "prop");
			
			rcd.setAttrKeyValue(mapKeyValue);
			rcdLst.add(rcd);
			mapKeyValue.clear();
			flag = false;
			return;
		}

		switch (nodeName) {
		case "entity1":
		case "entity2":
			String[] ele = value.split("#");
			value = ele[1];
			mapKeyValue.put(nodeName, value);
			break;
		case "relation":
			mapKeyValue.put(nodeName, content);
			break;
		default:
			break;
		}
	}
	
	//解析GD.rdf,返回map
	public void parseGD(String fileName, 
			Map<String, String> alignClass, Map<String, String> alignProp) throws Exception{

		alignClass.clear();
		alignProp.clear();
		SAXParserFactory factory = SAXParserFactory.newInstance();
		try {
			// 创建解析器
			SAXParser parser = factory.newSAXParser();
			parser.parse(fileName, this);

		} catch (ParserConfigurationException e) {
			System.out.println("ParserConfig error");
		} catch (SAXException e) {
			System.out.println("SAXException: xml not well formed");
		} catch (IOException e) {
			System.out.println("IO error");
		} finally{
			for(Record rcd:rcdLst){
				Map<String, Object> map = rcd.getAttrKeyValue();
				String type = map.get("type").toString();
				String entity1 = map.get("entity1").toString();
				String entity2 = map.get("entity2").toString();
				
				if(type.equals("class"))
					alignClass.put(entity1, entity2);
				else
					alignProp.put(entity1, entity2);
			}
		}
	}

	//解析GD.rdf,写入数据库
	public void parseDocument(String fileName) {
		// 实例化SAXParserFactory对象
		SAXParserFactory factory = SAXParserFactory.newInstance();
		try {
			// 创建解析器
			SAXParser parser = factory.newSAXParser();
			parser.parse(fileName, this);

		} catch (ParserConfigurationException e) {
			System.out.println("ParserConfig error");
		} catch (SAXException e) {
			System.out.println("SAXException: xml not well formed");
		} catch (IOException e) {
			System.out.println("IO error");
		} finally{
			// 写入数据库
			db.WriteDB(rcdLst, false);
		}
	}

	/**
	 * @param args
	 */
	public static void main(String[] args) throws Exception {
		// TODO Auto-generated method stub
		String rootPath = "E:\\01-My Papers\\08Alignment of Graphical Linked Data in Semantic Web\\data\\OAEI2010\\benchmarks\\";
		String objName="304";
		String nameColl = "C"+objName+"GD";
		OperateDB db = new OperateDB("OAEI2010", nameColl);
		String fileGD    = rootPath + objName+"\\refalign.rdf";
		
		ParseGoldRDF handler = new ParseGoldRDF(db);
		handler.parseDocument(fileGD);
		System.out.println("finish parsing "+objName+"GD.rdf");
	}

}


你可能感兴趣的:(mongodb,xml,RDF)