这几天闲着没什么事,就鼓捣了一下Lucene的全文检索方面的内容,首先在Apache的Lucene站点 上大致浏览了一下Lucene的相关文档及知识,由于英文不怎么好,干脆直接找中文相关的内容看了,弄了两天,自己整了个Lucene的大致的实现程序,并且添加进自己的站点项目中,做了个简单的测试,出了一点效果,在这里将大致过程做个简单的记录:
1、在原有的maven项目上,增加了一个test-lucene的子模块,原有的与其相关的模块为test-persisit(主要做持久化操作),test-model(主要定义模型的模块), 大致的POM文件如下:
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <parent> <artifactId>test</artifactId> <groupId>org.clzps</groupId> <version>0.0.1-SNAPSHOT</version> </parent> <groupId>org.clzps</groupId> <artifactId>test-lucene</artifactId> <version>0.0.1-SNAPSHOT</version> <packaging>jar</packaging> <name>test-lucene</name> <url>http://maven.apache.org</url> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <lucene.version>3.1.0</lucene.version> </properties> <dependencies> <!-- 项目模块相关依赖 --> <dependency> <groupId>org.clzps</groupId> <artifactId>test-model</artifactId> <version>${project.version}</version> <type>jar</type> </dependency> <dependency> <groupId>org.clzps</groupId> <artifactId>test-persist</artifactId> <version>${project.version}</version> <type>jar</type> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers</artifactId> <version>${lucene.version}</version> <type>jar</type> <scope>compile</scope> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>${lucene.version}</version> <type>jar</type> <scope>compile</scope> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-highlighter</artifactId> <version>${lucene.version}</version> <type>jar</type> <scope>compile</scope> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-smartcn</artifactId> <version>${lucene.version}</version> <type>jar</type> <scope>compile</scope> </dependency> </dependencies> </project>
2、主要的Java文件内容,实现外部应用的直接调用,包括创建index过程,加载index过程(索引已经创建好),搜索过程,具体就不详细描述了,主要自己做记录,看代码:
public class LuceneService { /** * Logger */ // private final Logger logger = Logger.getLogger(getClass()); /** * 分词器 */ private static Analyzer analyzer = Constants.analyzer; public static void setAnalyzer(Analyzer analyzer) { LuceneServiceImpl.analyzer = analyzer; } /** * 索引目录路径 */ private static String indexDirPath = Constants.indexDirectoryPath; public static void setIndexDirPath(String indexDirPath) { LuceneServiceImpl.indexDirPath = indexDirPath; } /** * Directory变量 */ private static FSDirectory fsdirectory = null; private static RAMDirectory ramDirectory = null; private static IndexReader fsIndexReader = null; private static IndexReader ramIndexReader = null; private void IndexRamToDisk() { IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_31, analyzer); iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter fsIndexWriter = null; try { fsdirectory = FSDirectory.open(new File(indexDirPath)); fsIndexWriter = new IndexWriter(fsdirectory, iwConfig); fsIndexWriter.addIndexes(new Directory[]{ramDirectory}); fsIndexWriter.optimize(); fsIndexWriter.close(); //关闭ramDirectory ramDirectory.close(); ramDirectory = null; } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } loadIndex(); } public void createIndex(List<ArticleData> objList) { IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_31, analyzer); iwConfig.setOpenMode(OpenMode.CREATE); IndexWriter ramIndexWriter = null; ramDirectory = new RAMDirectory(); try { ramIndexWriter = new IndexWriter(ramDirectory, iwConfig); ramIndexWriter.deleteAll(); ramIndexWriter.commit(); for(int i = 0; i < objList.size(); i++) { Document doc = AnyObjects2DocumentUtils.Persist2Document(objList.get(i)); ramIndexWriter.addDocument(doc); System.out.println(i); } ramIndexWriter.close(); IndexRamToDisk(); } catch (IOException e) { e.printStackTrace(); } } public void loadIndex() { try { if(fsdirectory == null) { fsdirectory = FSDirectory.open(new File(indexDirPath)); } fsIndexReader = IndexReader.open(fsdirectory); } catch (IOException e) { e.printStackTrace(); } } public void search(String queryStr) { Long start = new Date().getTime(); String[] fields = new String[]{"title","content"}; try { Query query = new MultiFieldQueryParser(Version.LUCENE_31, fields, analyzer).parse(queryStr); MultiReader multiReader = null; if(fsIndexReader == null) return; if(ramIndexReader == null) { multiReader = new MultiReader(new IndexReader[]{fsIndexReader}); } else { multiReader = new MultiReader(new IndexReader[]{fsIndexReader, ramIndexReader}); } IndexSearcher indexSearcher = new IndexSearcher(multiReader); TopDocs topDocs = indexSearcher.search(query, null, 100); System.out.println("检索到【" + topDocs.totalHits + "】条匹配的结果!"); System.out.println("--------------------------------------------------------"); ScoreDoc[] scoreDoc = topDocs.scoreDocs; for(int i = 0; i < scoreDoc.length; i++) { Document document = indexSearcher.doc(scoreDoc[i].doc); Object2DocumentUtils.PrintDocument(document); } Long end = new Date().getTime(); System.out.println("搜索时间:" + (end-start)); } catch (ParseException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }
3、通过下面的测试代码,即可进行创建Index,加载Index和搜索过程了:
@Test public void testCreateOrLoadIndex() { File file = new File(indexDirectoryPath); if(file.exists()) { luceneService.loadIndex(); } else { createIndex(); } luceneService.search("事件"); }