需要的jar包:
compile group: 'org.apache.lucene', name: 'lucene-core', version: '8.3.0'
compile group: 'org.apache.lucene', name: 'lucene-queryparser', version: '8.3.0'
compile group: 'org.apache.lucene', name: 'lucene-highlighter', version: '8.3.0'
compile group: 'org.apache.lucene', name: 'lucene-backward-codecs', version: '8.3.0'
compile group: 'com.github.magese', name: 'ik-analyzer', version: '8.3.0'
package lucene_test;
import java.nio.file.FileSystems;
import java.util.UUID;
import lombok.Cleanup;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;
/**
* @Title:EntitySearchDemo.java Created by zhangdapeng on 2019/11/27上午11:01
* @Description:基于Lucene 8.3
* @Author:zhangdapeng
* @Verson:1.0
*/
public class EntitySearchDemo {
/**
* 索引存储路径
*/
public static final String INDEX_PATH = "./knowledge_graph-index";
/**
* 创建索引
*/
public void creatIndex() throws Exception {
@Cleanup IndexWriter indexWriter = null;
Directory directory = FSDirectory.open(FileSystems.getDefault().getPath(INDEX_PATH));
//Analyzer analyzer = new StandardAnalyzer();
Analyzer analyzer = new IKAnalyzer(true);
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
indexWriter = new IndexWriter(directory, indexWriterConfig);
indexWriter.deleteAll();// 清除以前的index
for (int i = 0; i < 10; i++) {
Document document=null;
if(i==0) {
UUID uuid = UUID.randomUUID();
String id = uuid.toString().replace("-", "");
document = new Document();
document.add(new Field("id", id, TextField.TYPE_STORED));
document.add(new Field("title", "tile" + '"' + i + '"', TextField.TYPE_STORED));
document.add(new Field("content", "huawei牛掰啊", TextField.TYPE_STORED));
document.add(new Field("tag", "tag" + id, TextField.TYPE_STORED));
document.add(new Field("url", "url" + id, TextField.TYPE_STORED));
}else{
if(i==2){
UUID uuid = UUID.randomUUID();
String id = uuid.toString().replace("-", "");
document = new Document();
document.add(new Field("id", id, TextField.TYPE_STORED));
document.add(new Field("title", "tile" + '"' + i + '"', TextField.TYPE_STORED));
document.add(new Field("content", "上海的天空?", TextField.TYPE_STORED));
document.add(new Field("tag", "tag" + id, TextField.TYPE_STORED));
document.add(new Field("url", "url" + id, TextField.TYPE_STORED));
}else {
UUID uuid = UUID.randomUUID();
String id = uuid.toString().replace("-", "");
document = new Document();
document.add(new Field("id", id, TextField.TYPE_STORED));
document.add(new Field("title", "tile" + '"' + i + '"', TextField.TYPE_STORED));
document.add(new Field("content", "C" + '"' + i + '"', TextField.TYPE_STORED));
document.add(new Field("tag", "tag" + id, TextField.TYPE_STORED));
document.add(new Field("url", "url" + id, TextField.TYPE_STORED));
}
}
indexWriter.addDocument(document);
}
}
/**
* 搜索
*/
public void search(String keyWord) throws Exception {
@Cleanup DirectoryReader directoryReader = null;
// 1、创建Directory
@Cleanup Directory directory = FSDirectory.open(FileSystems.getDefault().getPath(INDEX_PATH));
// 2、创建IndexReader
directoryReader = DirectoryReader.open(directory);
// 3、根据IndexReader创建IndexSearch
IndexSearcher indexSearcher = new IndexSearcher(directoryReader);
// 4、创建搜索的Query
// Analyzer analyzer = new StandardAnalyzer();
Analyzer analyzer = new IKAnalyzer(true); // 使用IK分词
// 简单的查询,创建Query表示搜索域为content包含keyWord的文档
//Query query = new QueryParser("content", analyzer).parse(keyWord);
String[] fields = {"title", "content", "tag"};
// MUST 表示and,MUST_NOT 表示not ,SHOULD表示or
BooleanClause.Occur[] clauses = {BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD};
// MultiFieldQueryParser表示多个域解析, 同时可以解析含空格的字符串,如果我们搜索"上海 中国"
Query multiFieldQuery = MultiFieldQueryParser.parse(keyWord, fields, clauses, analyzer);
// 5、根据searcher搜索并且返回TopDocs
TopDocs topDocs = indexSearcher.search(multiFieldQuery, 3); // 搜索前100条结果
System.out.println("共找到匹配处:" + topDocs.totalHits);
// 6、根据TopDocs获取ScoreDoc对象
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
System.out.println("共找到匹配文档数:" + scoreDocs.length);
QueryScorer scorer = new QueryScorer(multiFieldQuery, "content");
SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("", "");
Highlighter highlighter = new Highlighter(htmlFormatter, scorer);
highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
for (ScoreDoc scoreDoc : scoreDocs) {
// 7、根据searcher和ScoreDoc对象获取具体的Document对象
Document document = indexSearcher.doc(scoreDoc.doc);
String content = document.get("content");
System.out.println("--------------------"+keyWord+"---------------------");
System.out.println("文章标题:" + document.get("title"));
System.out.println("文章地址:" + document.get("url"));
System.out.println("文章内容:"+document.get("content"));
System.out.println(highlighter.getBestFragment(analyzer, "content", content));
System.out.println("");
// 8、根据Document对象获取需要的值
}
}
public static void main(String args[]) throws Exception {
EntitySearchDemo demo = new EntitySearchDemo();
demo.creatIndex();
demo.search("huawei");
demo.search("上海");
demo.search("上海 天空");
demo.search("上海 南方");
}
}