lucene-使用Digester分析XML索引

1、隶属于Jakarta Commons项目的一个子项目提供了一个简单的上层接口来将XML类型的文档映射为JAVA对象。

2、DigesterXMLHandler类用来解析XML格式的文档。

public class DigesterXMLHandler implements DocumentHandler{

private Digester dig;

private static Documentdoc;

publicDigesterXMLHandler(){

dig=new Digester();

dig.setValidating(false);

//首先创建DigesterXMLHandler实例

dig.addObjectCreate("address-book",DigesterXMLHandler.class);

//找到contact子元素,创建一个contact类实例

dig.addObjectCreate("address-book/contact",Contact.class);

//当Digester找到<contact>参数的type属性时我们就需要设置Contact实例的type属性。

dig.adSetProperties("address-book","type","type");

//设置几个规则,用来设置contact属性。

dig.addCallMethod("address-book/contact/name","setName",0);

dig.addCallMethod("address-book/contact/address","setAddress",0);

dig.addCallMethod("address-book/contact/city","setCity",0);

dig.addCallMethod("address-book/contact/province","setProvince",0);

dig.addCallMethod("address-book/contact/postalcode","setPostalcode",0);

dig.addCallMethod("address-book/contact/country","setCountry",0);

dig.addCallMethod("address-book/contact/telephone","setTelephone",0);

dig.addSetNext("address-book/contact","populateDocument");

}

public sychronized DocumentgetDocument(InputStream is) throws DocumentHandlerException{

try{

dig.parse(is);//开始解析XML格式的InputStream 输入流
}

catch (IOException e){

throw new DocumentHandlerException("cannot parse XMLdocument",e);

}

catch (SAXExceptoin e){

throw new DocumentHandlerException("cannot parse XMLdocument",e);

}

return doc;

}

 

public voidpopulateDocument(Contact contact){

//将已经取出的各Field组装到Lucene的Document对象里

doc=new Document();

doc.add(Field.Keyword("type",contact.getType()));

doc.add(Field.Keyword("name",contact.getName()));

doc.add(Field.Keyword("address",contact.getAddress()));

doc.add(Field.Keyword("city",contact.getCity()));

doc.add(Field.Keyword("province",contact.getProvince()));

doc.add(Field.Keyword("postalcode",contact.getPostalcode()));

doc.add(Field.Keyword("country",contact.getCountry()));

doc.add(Field.Keyword("telephone",contact.getTelephone()));

}

//重载了每个contact实例入口的JAVABEAN类

public static class Contact{

private String type;

private String name;

private String address;

private String city;

private String province;

private String postalcode;

private String country;

private String telephone;

public void setType(String newType){

type=newType();

}

public String getType(){

return type;

}

public String setName(String newName){

name=newName;

}

public String getName(){

returnname;

}

..........//依次设置city、province、postalcode、country、telephone

..........

}

public static voidmain(String[] args) throws Exception{

DigesterXMLHandler handler=new DigesterXMLHandler();

Document doc=

handler.getDocument(new FileInputStream(new File(args[0]));

System.out.println(doc);

}

}

你可能感兴趣的:(xml,Lucene)