lucene4入门(2)搜索

欢迎转载http://www.cnblogs.com/shizhongtao/p/3440479.html

接着上一篇,这里继续搜索,对于搜索和创建一样,首先你要确定搜索位置,然后用规定的类来读取。还要注意一点,确定分词器,因为不同的分词器所创建的分词规则不同。上篇我使用的是默认的分词器,这里我也先不管分词器。为了方便阅读,代码就全部粘上。

  1 package com.bing.test;

  2 

  3 import java.io.File;

  4 import java.io.FileNotFoundException;

  5 import java.io.FileReader;

  6 import java.io.IOException;

  7 

  8 import org.apache.lucene.analysis.Analyzer;

  9 import org.apache.lucene.analysis.standard.StandardAnalyzer;

 10 import org.apache.lucene.document.Document;

 11 import org.apache.lucene.document.Field.Store;

 12 import org.apache.lucene.document.FieldType;

 13 import org.apache.lucene.document.StringField;

 14 import org.apache.lucene.document.TextField;

 15 import org.apache.lucene.index.DirectoryReader;

 16 import org.apache.lucene.index.IndexReader;

 17 import org.apache.lucene.index.IndexWriter;

 18 import org.apache.lucene.index.IndexWriterConfig;

 19 import org.apache.lucene.queryparser.classic.ParseException;

 20 import org.apache.lucene.queryparser.classic.QueryParser;

 21 import org.apache.lucene.search.IndexSearcher;

 22 import org.apache.lucene.search.Query;

 23 import org.apache.lucene.search.ScoreDoc;

 24 import org.apache.lucene.search.TopDocs;

 25 import org.apache.lucene.store.Directory;

 26 import org.apache.lucene.store.FSDirectory;

 27 import org.apache.lucene.util.Version;

 28 

 29 /**

 30  * @author bingyulei

 31  * 

 32  */

 33 public class HelloLucene

 34 {

 35 

 36     Directory directory = null;

 37     Document doc;

 38     IndexWriter writer = null;

 39 

 40     /**

 41      * 

 42      * @param indexWriterPath

 43      *            索引创建路径

 44      * @param filePath

 45      *            读取文件路径

 46      */

 47     public void createIndex(String indexWriterPath, String filePath)

 48     {

 49 

 50         // 创建indexwriter

 51         Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45);// 设置标准分词器

 52                                                                     // ,默认是一元分词

 53         IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_45,

 54                 analyzer);// 设置IndexWriterConfig

 55 

 56         try

 57         {

 58             // 创建directory

 59             // directory=RAMDirectory();//创建在内存中

 60             // 创建在硬盘上

 61             directory = FSDirectory.open(new File(indexWriterPath));// 打开存放索引的路径

 62             writer = new IndexWriter(directory, iwc);

 63 

 64             // 为document添加field

 65             addFile(writer, filePath);

 66 

 67             System.out.println("添加成功");

 68         } catch (IOException e)

 69         {

 70             // TODO Auto-generated catch block

 71             e.printStackTrace();

 72         }

 73 

 74     }

 75 

 76     private void addFile(IndexWriter writer, String filePath)

 77     {

 78         File f = new File(filePath);

 79         FieldType ft = new FieldType();

 80         ft.setIndexed(true);// 索引

 81         ft.setStored(true);// 存储,数据量比较大,一般都是不鼓励存储,放在索引文件中会把索引文件撑大

 82         ft.setTokenized(true);

 83         for (File file : f.listFiles())

 84         {

 85             try

 86             {

 87                 // 创建Document对象

 88                 doc = new Document();

 89                 // doc.add(new Field("content", new FileReader(file), ft));

 90                 doc.add(new TextField("content", new FileReader(file)));

 91                 doc.add(new TextField("filename", file.getName(), Store.YES));

 92                 doc.add(new StringField("path", file.getPath(), Store.YES));

 93                 // 添加文档

 94                 writer.addDocument(doc);

 95                 writer.commit();// 提交数据

 96             } catch (FileNotFoundException e)

 97             {

 98                 // TODO Auto-generated catch block

 99                 e.printStackTrace();

100             } catch (IOException e)

101             {

102                 // TODO Auto-generated catch block

103                 e.printStackTrace();

104             }

105 

106         }

107     }

108 

109     /**

110      * 搜索

111      * 

112      * @param path

113      *            搜索路径

114      * @param indexReaderPath

115      *            索引存放路径

116      */

117     public void seacher(String indexReaderPath, String searthText)

118     {

119         IndexReader reader=null;

120         try

121         {

122             directory = FSDirectory.open(new File(indexReaderPath));

123             // 创建读取索引的reader

124              reader = DirectoryReader.open(directory);

125             // 根据reader创建search

126             IndexSearcher searcher = new IndexSearcher(reader);

127             // 创建查询,第二个参数表示查询的字段名,第三个是分词器

128             QueryParser parser = new QueryParser(Version.LUCENE_45, "content",

129                     new StandardAnalyzer(Version.LUCENE_45));

130             // 搜索包含searthText的内容

131             Query query = parser.parse(searthText);

132             // 搜索返回10条记录

133             TopDocs tds = searcher.search(query, 10);

134             

135              //获取scoredoc对象组,

136              ScoreDoc[] sds=tds.scoreDocs;

137              for(ScoreDoc sd:sds){

138                  //获取具体的doc

139                  Document doc=searcher.doc(sd.doc);

140                  System.out.println(doc.get("filename")+":"+doc.get("path"));

141              }

142         } catch (IOException e)

143         {

144             // TODO Auto-generated catch block

145             e.printStackTrace();

146         }// 打开存放索引的路径

147         catch (ParseException e)

148         {

149             // TODO Auto-generated catch block

150             e.printStackTrace();

151         }finally{

152             if (reader!=null)

153             {

154                 try

155                 {

156                     reader.close();

157                 } catch (IOException e)

158                 {

159                     // TODO Auto-generated catch block

160                     e.printStackTrace();

161                 }

162             }

163         }

164     }

165 }
View Code

说明,"D:\\lucene\\file"是我复制lucene官方文档上的两段话,不过当你创建完索引之后,然后再修改文件内容,新加的内容并不能搜索出来。这个应该很好理解。

然后进行测试:searchTest,就可以得到那个文本文件中有"Changing Similarity"这段字符

package com.bing.test;



import org.junit.Test;



public class HelloLuceneTest

{

    @Test

    public void writertest(){

        HelloLucene test=new HelloLucene();

        test.createIndex("D:\\lucene\\index","D:\\lucene\\file");

    }

    @Test

    public void searchTest(){

        HelloLucene test=new HelloLucene();

        test.seacher("D:\\lucene\\index", "Changing Similarity");

    }

}

 

 

你可能感兴趣的:(Lucene)