




对于原理性的知识,可以参看网络上的一些文章,google一下看到很多写的都不错,个人不是很擅长写这种原理性的东西,所以下面做一个小实验,来熟悉一下具体的lucene step by step









package com.normandy.position.common; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import org.apache.commons.lang.StringUtils; import org.apache.log4j.Logger; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.cjk.CJKAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.FieldOption; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.store.LockObtainFailedException; import org.springframework.jdbc.core.JdbcTemplate; /** * 使用lucene进行搜索服务 * <p> * 适用于数据量不大的单机搜索服务,对于数据量较大的搜索,建议使用分布式搜索 * </p> * * @author quzishen * @version 1.0 */ public class LuceneSearcher implements Runnable { protected final Logger logger = Logger.getLogger(LuceneSearcher.class); /** ~~~ 类名 */ private String className; /** ~~~ 需要建立索引的域列表字符串,以“,”隔开 */ private String fieldsStr; /** ~~~ 默认的索引存放目录 */ private String defaultIndexDir = "c:/index/"; /** ~~~ 配置中需要索引字段的默认分隔符 */ private static final String DEFAULT_KEY_FIELD_SPLIT = ","; /** ~~~ 默认的标记符名称,如果fieldsStr中含有主键,则使用主键名称*/ private static final String DEFAULT_ID = "id"; /** ~~~ 是否每次重新建立索引 */ private boolean IS_REBUILD_INDEX = true; /** ~~~ 默认的建立索引的最大数目 */ private int DEFAULT_MAX_INDEX_NUMS = Integer.MAX_VALUE; /** ~~~ 特别针对于匿名内部类提供的操作jdbc模板 */ private JdbcTemplate jdbcTemplate; /** * 建立索引,初始化操作 * * @throws RuntimeException */ public void initIndex() throws RuntimeException { if (StringUtils.isBlank(fieldsStr) || StringUtils.isBlank(className)) { throw new RuntimeException("can not build the index by null value of field and className."); } long beginTime = System.currentTimeMillis(); if (logger.isInfoEnabled()) { logger.info("begin to build the lucene index..."); } Analyzer analyzer = new CJKAnalyzer(); try { // 获取需要建立索引的域 List<String> fieldList = getKeyWordsList(); IndexWriter indexWriter = new IndexWriter(defaultIndexDir, analyzer, IS_REBUILD_INDEX); // 控制写入一个新的segment前在内存中保存的最大的document数目 indexWriter.setMaxBufferedDocs(500); // 控制多个segment合并的频率 indexWriter.setMaxMergeDocs(100); buildIndex(fieldList,indexWriter); indexWriter.optimize(); indexWriter.close(); long endTime = System.currentTimeMillis(); if (logger.isInfoEnabled()) { logger.info("end to build the lucene index...,use time :" + (endTime - beginTime) + "ms."); } } catch (IOException e) { logger.error("create index failed!check the authentation!", e); throw new RuntimeException("create index failed!check the authentation!", e); } catch (ClassNotFoundException e) { logger.error("class not found : " + className, e); throw new RuntimeException("class not found : " + className, e); } } /** * 重新建立索引 */ public void run() { if(logger.isDebugEnabled()){ logger.debug("rebuild the index for lucene start..."); } long begin = System.currentTimeMillis(); removeAllIndex(); initIndex(); long end = System.currentTimeMillis(); if(logger.isDebugEnabled()){ logger.debug("rebuild the index for lucene end..."+(end - begin)+"ms."); } } /** * 重新建立索引 * @throws RuntimeException */ public void refreshIndex() throws RuntimeException { new Thread(this).start(); } /** * 删除所有的索引,将根据主键一次性全部删除 * @throws RuntimeException */ @SuppressWarnings("unchecked") public void removeAllIndex() throws RuntimeException { try { // reader IndexReader indexReader = IndexReader.open(defaultIndexDir); Analyzer analyzer = new CJKAnalyzer(); IndexWriter indexWriter = new IndexWriter(defaultIndexDir,analyzer); // 获取所有的索引名称集合 Collection<String> indexs = indexReader.getFieldNames(FieldOption.INDEXED); // 检查是否包含主键 String keyName = getKeyName(); if(!indexs.contains(keyName)){ return; } // 遍历并删除 int maxDocNum = indexReader.maxDoc(); for(int k = 0;k < maxDocNum ;k++){ Document document = indexReader.document(k); String value = document.get(keyName); Term term = new Term(keyName,value); indexWriter.deleteDocuments(term); if(logger.isDebugEnabled()){ logger.debug("delete the index of ["+keyName+","+value+"]"); } logger.error("delete the index of ["+keyName+","+value+"]"); } indexWriter.optimize(); indexWriter.close(); indexReader.flush(); indexReader.close(); } catch (CorruptIndexException e) { logger.error("create index failed!", e); throw new RuntimeException("create index failed!",e); } catch (LockObtainFailedException e) { logger.error("create index failed!", e); throw new RuntimeException("create index failed!",e); } catch (IOException e) { logger.error("create index failed!check the authentation!", e); throw new RuntimeException("create index failed!",e); } catch (ClassNotFoundException e) { logger.error("class not found!", e); throw new RuntimeException("create index failed!",e); } } /** * 从数据库中取出数据,建立索引用于全文检索 * @param fieldList 建立索引的字段列表 * @param indexWriter * @throws RuntimeException */ @SuppressWarnings("unchecked") private void buildIndex(List<String> fieldList,IndexWriter indexWriter) throws RuntimeException{ try{ // 获取类型 Class<? extends Object> objectClass = Class.forName(className); // 匿名内部类 AbstractBaseDAO abstractBaseDAO = new AbstractBaseDAO() { }; abstractBaseDAO.setJdbcTemplate(jdbcTemplate); // 获取第一页 Paginal<? extends Object> paginal = abstractBaseDAO.queryFieldsListForPaging(objectClass, null, fieldList, 1, 1000); // 修正分页总数,如果搜索结果总数超过最大值,则使用最大值 int totalCount = paginal.getTotalCount(); totalCount = totalCount > DEFAULT_MAX_INDEX_NUMS ? DEFAULT_MAX_INDEX_NUMS : totalCount; paginal.setTotalCount(totalCount); // 需要分页的数目 int pageNum = paginal.getPageNum(); // 循环从数据库分页读取数据 for (int i = 0; i < pageNum; i++) { // 查询结果列表 List<? extends Object> resultList = paginal.getResultList(); Iterator<? extends Object> resultIndex = resultList.iterator(); while (resultIndex.hasNext()) { // 每一个新建document,防止field重名覆盖 Document document = new Document(); Object object = resultIndex.next(); Iterator<String> fieldIndex = fieldList.iterator(); while (fieldIndex.hasNext()) { // 获取需要分页的域 String field = fieldIndex.next(); // 过滤空白 if (StringUtils.isBlank(field)) { continue; } // 获取值 Object value = ((Map<String, Object>) object).get(field); // 写入doc document.add(new Field(field, value.toString(), Field.Store.YES, Field.Index.TOKENIZED)); } // 写入索引文件 indexWriter.addDocument(document); }//while }//for } catch (CorruptIndexException e) { logger.error("create index failed!", e); throw new RuntimeException("create index failed!",e); } catch (LockObtainFailedException e) { logger.error("create index failed!", e); throw new RuntimeException("create index failed!",e); } catch (IOException e) { logger.error("create index failed!check the authentation!", e); throw new RuntimeException("create index failed!",e); } catch (ClassNotFoundException e) { logger.error("class not found!", e); throw new RuntimeException("create index failed!",e); } } /** * 查询服务 * @param keywords 查询字 * @return */ public List<Map<String,String>> search(String keywords){ //~~~ return value List<Map<String,String>> result = new ArrayList<Map<String,String>>(); try { // 搜索执行器 IndexSearcher indexSearcher = new IndexSearcher(defaultIndexDir); // 分词器 Analyzer analyzer = new CJKAnalyzer(); // 关键字列表 List<String> keyWordsList = getKeyWordsList(); for(String indexName : keyWordsList){ QueryParser queryParser = new QueryParser(indexName,analyzer); Query query = queryParser.parse(keywords); Hits hits = indexSearcher.search(query); if(logger.isDebugEnabled()){ logger.debug("search result count:"+hits.length()); } for(int i=0;i<hits.length();i++){ Document document = hits.doc(i); Map<String,String> resultMap = new HashMap<String,String>(); for(String field : keyWordsList){ if(StringUtils.isBlank(field)){ continue; } String value = document.get(field); resultMap.put(field, value); } result.add(resultMap); } } } catch (CorruptIndexException e) { logger.error("create index failed!", e); throw new RuntimeException("create index failed!",e); } catch (LockObtainFailedException e) { logger.error("create index failed!", e); throw new RuntimeException("create index failed!",e); } catch (IOException e) { logger.error("create index failed!check the authentation!", e); throw new RuntimeException("create index failed!",e); } catch (ClassNotFoundException e) { logger.error("class not found!", e); throw new RuntimeException("create index failed!",e); } catch (ParseException e) { logger.error("parse keyword exception!", e); throw new RuntimeException("parse keyword exception!",e); } return result; } /** * 获取配置的主键名称 */ @SuppressWarnings({ "rawtypes", "unchecked" }) private String getKeyName() throws ClassNotFoundException{ Class objectClass = Class.forName(className); Table table = (Table) objectClass.getAnnotation(Table.class); String keyName = table.keyField(); return StringUtils.isBlank(keyName)? DEFAULT_ID : keyName.toLowerCase(); } /** * 根据配置的关键字串获取关键字列表,自动补全主键 */ private List<String> getKeyWordsList() throws ClassNotFoundException{ // 获取需要建立索引的域 String[] fields = StringUtils.split(fieldsStr.toLowerCase(), DEFAULT_KEY_FIELD_SPLIT); // 转换成列表形式 List<String> fieldList = Arrays.asList(fields); // 如果配置的索引字段串不包含主键,则手动添加主键,也就是主键必须创建索引用于标示doc String keyName = getKeyName(); if(!fieldList.contains(keyName)){ fieldList.add(keyName); } return fieldList; } // ~~~~~~~~~~~~~~~~~~~~~getter && setter ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~***// public void setClassName(String className) { this.className = className; } public void setFieldsStr(String fieldsStr) { this.fieldsStr = fieldsStr; } public void setJdbcTemplate(JdbcTemplate jdbcTemplate) { this.jdbcTemplate = jdbcTemplate; } public void setDefaultIndexDir(String defaultIndexDir) { this.defaultIndexDir = defaultIndexDir; } }





package com.normandy.position.common; import java.lang.annotation.Documented; import java.lang.annotation.ElementType; import java.lang.annotation.Retention; import java.lang.annotation.RetentionPolicy; import java.lang.annotation.Target; /** * 针对于数据库表的配置的注解 * * 工程名称:NormandyPosition * 类型名称:Table * 概要: * <p> 目前主要用于配置数据库表名,主键名 </p> * 创建时间:2010-7-28 上午10:40:42 * 创建人:quzishen * 最后修改时间:2010-7-28 上午10:40:42 * 最后修改内容: * @version 1.0 */ @Documented @Retention(RetentionPolicy.RUNTIME) @Target(ElementType.TYPE) public @interface Table { /**~~~~ 表名*/ String tableName() default("table"); /**~~~~ 主键名称*/ String keyField() default("id"); }


package com.normandy.position.domain; import java.io.Serializable; import java.util.Date; import com.normandy.position.common.Table; @Table(tableName = "NOR_QUICK_NEWS",keyField="id") public class NorQuickNews implements Serializable { private static final long serialVersionUID = -4777096683339361256L; private long id; private String prop1; private String prop2; private String prop3; private String prop4; private String prop5; private String prop6; private String prop7; private String prop8; private String prop9; private String name; private Date gmt_Create; public long getId() { return id; } public void setId(long id) { this.id = id; } public String getProp1() { return prop1; } public void setProp1(String prop1) { this.prop1 = prop1; } public String getProp2() { return prop2; } public void setProp2(String prop2) { this.prop2 = prop2; } public String getProp3() { return prop3; } public void setProp3(String prop3) { this.prop3 = prop3; } public String getProp4() { return prop4; } public void setProp4(String prop4) { this.prop4 = prop4; } public String getProp5() { return prop5; } public void setProp5(String prop5) { this.prop5 = prop5; } public String getProp6() { return prop6; } public void setProp6(String prop6) { this.prop6 = prop6; } public String getProp7() { return prop7; } public void setProp7(String prop7) { this.prop7 = prop7; } public String getProp8() { return prop8; } public void setProp8(String prop8) { this.prop8 = prop8; } public String getProp9() { return prop9; } public void setProp9(String prop9) { this.prop9 = prop9; } public String getName() { return name; } public void setName(String name) { this.name = name; } public Date getGmt_Create() { return gmt_Create; } public void setGmt_Create(Date gmt_Create) { this.gmt_Create = gmt_Create; } }


<bean id="luceneSearcher" class="com.normandy.position.common.LuceneSearcher" depends-on="jdbcTemplate"> <property name="defaultIndexDir"> <value>${lucene.index.dir}</value> </property> <property name="className" value="com.normandy.position.domain.NorQuickNews" /> <property name="fieldsStr" value="id,prop1,prop2" /> </bean>


<bean id="timetaskScheduler" class="org.springframework.scheduling.quartz.SchedulerFactoryBean"> <property name="triggers"> <list> <ref local="luceneTrigger" /> </list> </property> <property name="autoStartup"> <value>true</value> </property> <property name="schedulerName"> <value>timetaskScheduler</value> </property> </bean> <bean id="luceneTrigger" class="org.springframework.scheduling.quartz.CronTriggerBean"> <property name="jobDetail"> <bean class="org.springframework.scheduling.quartz.MethodInvokingJobDetailFactoryBean"> <property name="targetObject" ref="luceneSearcher" /> <property name="targetMethod" value="refreshIndex" /> </bean> </property> <property name="cronExpression" value="0 */15 * * * ?" /> </bean>




package com.normandy.positiontest; import java.util.List; import java.util.Map; import junit.framework.TestCase; import org.springframework.context.ApplicationContext; import org.springframework.context.support.ClassPathXmlApplicationContext; import org.springframework.jdbc.core.JdbcTemplate; import com.normandy.position.common.LuceneSearcher; public class LuceneSearcherTest extends TestCase { private LuceneSearcher luceneSearcher; private JdbcTemplate jdbcTemplate; @Override protected void setUp() throws Exception { super.setUp(); ApplicationContext applicationContext = new ClassPathXmlApplicationContext( "com/normandy/positiontest/applicationContext.xml"); luceneSearcher = (LuceneSearcher) applicationContext .getBean("luceneSearcher"); jdbcTemplate = (JdbcTemplate)applicationContext.getBean("jdbcTemplate"); luceneSearcher.setJdbcTemplate(jdbcTemplate); } public void testInit(){ luceneSearcher.initIndex(); } public void testRemoveAllIndex(){ luceneSearcher.removeAllIndex(); } public void testRefreshIndex(){ luceneSearcher.refreshIndex(); } public void testSearch(){ List<Map<String,String>> list = luceneSearcher.search("prop1"); System.out.println(list.size()); } }



