最近在做lucene,发现网上的lucene实例都不是很满意,所以自己做了个 ,如果哪有问题可以指出来
建立索引
import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.wltea.analyzer.lucene.IKAnalyzer; public class AddIndex { //path为索引存放地址 public void addIndex(String path) { try{ Directory fsDir = FSDirectory.open(new File(path)); //记住,此处的分词器一定要和下面查询的分词器一致,否则会查不到数据 Analyzer analyzer = new IKAnalyzer(); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_35, analyzer); IndexWriter writer = new IndexWriter(fsDir, conf); System.out.println("~~~建立索引~~~"); Document document1 = new Document(); document1.add(new Field("id", "1", Field.Store.YES, Field.Index.NO)); document1.add(new Field("address", "中国四川省成都市金牛区青羊东二路", Field.Store.YES, Field.Index.ANALYZED)); Document document2 = new Document(); document2.add(new Field("id", "2", Field.Store.YES, Field.Index.NO)); document2.add(new Field("address", "中国四川省成都市金牛区永陵路", Field.Store.YES, Field.Index.ANALYZED)); Document document3 = new Document(); document3.add(new Field("id", "3", Field.Store.YES, Field.Index.NO)); document3.add(new Field("address", "中国四川省成都市金牛区一环路西三段", Field.Store.YES, Field.Index.ANALYZED)); Document document4 = new Document(); document4.add(new Field("id", "4", Field.Store.YES, Field.Index.NO)); document4.add(new Field("address", "中国四川省成都市金牛区营门口路", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(document1); writer.addDocument(document2); writer.addDocument(document3); writer.addDocument(document4); writer.forceMerge(1); writer.close(); System.out.println("~~~索引建立完成~~~"); }catch (IOException e) { System.out.println(e.toString()); } } }
查询数据
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class TestLucene {
private static String path = "e:\\lucene\\addressStore";
Analyzer analyzer = new IKAnalyzer();
private static File dataFile = new File(path);
private static String str = "中国四川省成都市金牛区营门口路";
private static String fiels = "address";
public static void main(String[] args) {
new AddIndex().addIndex(path);
try {
new TestLucene().search(str);
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
}
}
public void search(String keyword) throws IOException, ParseException {
Analyzer analyzer = new IKAnalyzer();
IndexSearcher isearcher = new IndexSearcher(IndexReader.open(FSDirectory.open(dataFile)));
//此处只需把分词器传进去,lucene会自动分词
QueryParser parser = new QueryParser(Version.LUCENE_35, fiels,analyzer);
Query query = parser.parse(keyword);
System.out.println(query.toString());
/**
* 执行搜索,获取查询结果集对象 10为前10条记录
*/
TopDocs topDocs = isearcher.search(query, 10);
ScoreDoc[] hits = topDocs.scoreDocs;
for (ScoreDoc scoreDoc : hits) {
System.out.println("----------------分割线----------------------");
Document hitDoc = isearcher.doc(scoreDoc.doc);
float i = scoreDoc.score;
String address = hitDoc.get("address");
System.out.println("address:" + address+"\nsocre:"+i);
//打印打分细节,不需要可以去掉
//int docId = scoreDoc.doc;
//Explanation exp = isearcher.explain(query,docId);
//System.out.println(exp.toString());
}
isearcher.close();
}
}
查询结果
~~~建立索引~~~
~~~索引建立完成~~~
address:中国 address:国四 address:四川省 address:四川 address:四 address:省成 address:成都市 address:成都 address:都市 address:金牛区 address:金牛 address:营 address:门口 address:路
----------------分割线----------------------
address:中国四川省成都市金牛区营门口路
socre:0.9141956
----------------分割线----------------------
address:中国四川省成都市金牛区永陵路
socre:0.44761625
----------------分割线----------------------
address:中国四川省成都市金牛区青羊东二路
socre:0.39166427
----------------分割线----------------------
address:中国四川省成都市金牛区一环路西三段
socre:0.31202385