1.lucene简介
lucene主要的功能是用来全文检索。他可以搜索出非结构性的数据,比如WORD文档等。
2.lucene使用
1)pom文件引入
4.7.2
UTF-8
UTF-8
org.apache.lucene
lucene-core
${lucene.version}
org.apache.lucene
lucene-queryparser
${lucene.version}
org.apache.lucene
lucene-analyzers-common
${lucene.version}
junit
junit
RELEASE
compile
org.apache.lucene
lucene-highlighter
4.7.2
2)新建lucene工具类
package com.show.lucence.lucence.utils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Version;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class AnalyzerUtil {
public static final Class INTEGER_TYPE = Integer.class;
public static final Class DOUBLE_TYPE = Double.class;
public static final Class FLOAT_TYPE = Float.class;
public static final Class LONG_TYPE = Long.class;
public static final Class BIGDECIMAL_TYPE = BigDecimal.class;
public static final Class STRING_TYPE = String.class;
public static final Map CLASS_MAP = new HashMap() {
{
put(INTEGER_TYPE,0);
put(DOUBLE_TYPE,1);
put(FLOAT_TYPE,2);
put(LONG_TYPE,3);
put(BIGDECIMAL_TYPE,4);
put(STRING_TYPE,5);
}
};
//写入
public static void writerDoc(Directory directory, Analyzer analyzer, Object entity) throws Exception {
Document doc = newDocument(entity);
//创建写入器
IndexWriter indexWriter = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_47,analyzer));
indexWriter.addDocument(doc);
indexWriter.close();
}
//更新
public static void updateDoc(Directory directory, Analyzer analyzer, Object entity,String index0,String value0) throws Exception{
IndexWriter indexWriter = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_47,analyzer));
Document doc = newDocument(entity);
Term term = new Term(index0,value0);
indexWriter.updateDocument(term,doc);
indexWriter.close();
}
//删除
public static void deleteDoc(Directory directory, Analyzer analyzer,String index0,String value0) throws Exception {
IndexWriter indexWriter = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_47,analyzer));
Term term = new Term(index0,value0);
if(index0 == null && value0 == null){
indexWriter.deleteAll();
}else {
indexWriter.deleteDocuments(term);
}
indexWriter.close();
}
//查询
public static List searchDoc(Directory directory, Analyzer analyzer,Class clazz,String searchInfo) throws Exception {
Map fieldType = new HashMap();
String[] fields = getFields(clazz, fieldType);
DirectoryReader reader = DirectoryReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_47,fields,analyzer);
List objList = new ArrayList();
searchDoc(searchInfo, clazz, fieldType, fields, searcher, parser, objList);
reader.close();
directory.close();
return objList;
}
private static void searchDoc(String searchInfo, Class clazz, Map fieldType, String[] fields, IndexSearcher searcher, MultiFieldQueryParser parser, List objList) throws Exception {
Query query = parser.parse(searchInfo);
TopDocs search = searcher.search(query, null, 10);
System.out.println(search.totalHits);
ScoreDoc[] scoreDocs = search.scoreDocs;
//读出
for (ScoreDoc scoreDoc: scoreDocs) {
Document document = searcher.doc(scoreDoc.doc);
T obj = (T)clazz.newInstance();
for (String field : fields) {
Class aClass = fieldType.get(field);
String fieldValue = document.get(field);
Method declaredMethod = clazz.getDeclaredMethod("set" + toUpperCaseFirst(field),aClass);
if(CLASS_MAP.get(aClass) == 5){
Highlighter highlighter = highlighter(query);
String bestFragment = highlighter.getBestFragment(parser.getAnalyzer(), field, fieldValue);
if (bestFragment != null){
fieldValue = bestFragment;
}
}
transToReal(declaredMethod,obj,aClass, fieldValue);
}
objList.add(obj);
}
}
//高亮显示
private static Highlighter highlighter(Query query){
Formatter formatter = new SimpleHTMLFormatter("","");
Highlighter highlighter = new Highlighter(formatter,new QueryScorer(query));
Fragmenter fragmenter = new SimpleFragmenter(100);
highlighter.setTextFragmenter(fragmenter);
return highlighter;
}
private static Document newDocument(Object entity) throws IllegalAccessException, InvocationTargetException, NoSuchMethodException {
//创建文档对象,并添加相关字段值
Document doc = new Document();
Class> clazz = entity.getClass();
java.lang.reflect.Field[] declaredFields = clazz.getDeclaredFields();
for (java.lang.reflect.Field declaredField : declaredFields) {
declaredField.setAccessible(true);
String fieldName = declaredField.getName();
Object invoke = clazz.getDeclaredMethod("get" + toUpperCaseFirst(fieldName)).invoke(entity);
doc.add(new Field(fieldName,invoke.toString(),Field.Store.YES,Field.Index.ANALYZED));
}
return doc;
}
private static String[] getFields(Class clazz, Map fieldType) {
java.lang.reflect.Field[] declaredFields = clazz.getDeclaredFields();
String[] fields = new String[declaredFields.length];
for (int i = 0; i < fields.length; i++) {
java.lang.reflect.Field declaredField = declaredFields[i];
declaredField.setAccessible(true);
String fieldName = declaredField.getName();
fields[i] = fieldName;
fieldType.put(fieldName,declaredField.getType());
}
return fields;
}
private static void transToReal(Method declaredMethod,Object obj,Class aClass, String fieldValue) throws Exception {
System.out.println(CLASS_MAP.get(aClass));
switch (CLASS_MAP.get(aClass)){
case 0:
declaredMethod.invoke(obj, Integer.valueOf(fieldValue));
break;
case 1:
declaredMethod.invoke(obj,Double.valueOf(fieldValue));
break;
case 2:
declaredMethod.invoke(obj,Float.valueOf(fieldValue));
break;
case 3:
declaredMethod.invoke(obj,Long.valueOf(fieldValue));
break;
case 4:
declaredMethod.invoke(obj,new BigDecimal(fieldValue));
break;
default:
declaredMethod.invoke(obj,fieldValue);
}
}
private static String toUpperCaseFirst(String fieldName) {
return fieldName.substring(0, 1).toUpperCase() + fieldName.substring(1);
}
}
3)自定义测试对象
package com.show.lucence.lucence.domain;
import java.io.Serializable;
public class Goods implements Serializable {
private Integer goodsId;
private String goodsName;
private Double goodsPrice;
private String goodsRemark;
public Goods() {
super();
}
public Goods(Integer goodsId, String goodsName,Double goodsPrice,String goodsRemark) {
super();
this.goodsId = goodsId;//商品ID
this.goodsName = goodsName;//商品名称
this.goodsPrice = goodsPrice;//商品价格
this.goodsRemark = goodsRemark;//商品备注、描述
}
public Integer getGoodsId() {
return goodsId;
}
public void setGoodsId(Integer goodsId) {
this.goodsId = goodsId;
}
public String getGoodsName() {
return goodsName;
}
public void setGoodsName(String goodsName) {
this.goodsName = goodsName;
}
public Double getGoodsPrice() {
return goodsPrice;
}
public void setGoodsPrice(Double goodsPrice) {
this.goodsPrice = goodsPrice;
}
public String getGoodsRemark() {
return goodsRemark;
}
public void setGoodsRemark(String goodsRemark) {
this.goodsRemark = goodsRemark;
}
@Override
public String toString() {
return "Goods{" +
"goodsId=" + goodsId +
", goodsName='" + goodsName + '\'' +
", goodsPrice=" + goodsPrice +
", goodsRemark='" + goodsRemark + '\'' +
'}';
}
}
4)建测试类测试-这里使用的IK分词器
package com.show.lucence.lucence.analyzers;
import com.show.lucence.lucence.domain.Goods;
import com.show.lucence.lucence.utils.AnalyzerUtil;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.junit.BeforeClass;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.Field;
import java.util.List;
public class AnalyzerTest {
private static Directory directory = null;
private static Analyzer analyzer = null;
private static Goods goods = null;
@BeforeClass
public static void init() throws IOException {
//索引库
directory = getDirectory();
//分词器
analyzer = getAnalyzer();
//测试自定义的对象
goods = getGoods();
}
@Test
public void writerDoc() throws Exception {
AnalyzerUtil.writerDoc(directory,analyzer,goods);
}
@Test
public void updateDoc() throws Exception {
AnalyzerUtil.updateDoc(directory,analyzer,goods,"goodsId",String.valueOf(goods.getGoodsId()));
}
@Test
public void deleteDoc() throws Exception {
AnalyzerUtil.deleteDoc(directory,analyzer,"goodsId",String.valueOf(goods.getGoodsId()));
}
@Test
public void searchDoc() throws Exception {
List goodsList = AnalyzerUtil.searchDoc(directory, analyzer,Goods.class,"力实现自");
System.out.println(goodsList);
}
private static Goods getGoods() {
String goodsRemark = "个性独特 努力实现自我";
//goodsRemark = "我是超级王牌";
return new Goods(1,"Pin",11.0,goodsRemark);
}
private static Analyzer getAnalyzer() {
//指定分词器,版本一般指定为最高
//StandardAnalyzer分词器
Analyzer analyzer =
//new StandardAnalyzer(Version.LUCENE_47);
new IKAnalyzer(false);
return analyzer;
}
//设置对应的索引库--目前不会变
private static Directory getDirectory() throws IOException {
//构建索引库
return FSDirectory.open(new File("D:\\pragramFIle\\lucence\\src\\main\\resources\\com\\show\\lucence"));
}
@Test
public void testTemplate1() throws Exception {
Class extends Goods> aClass = goods.getClass();
Field goodsName = aClass.getDeclaredField("goodsName");
goodsName.setAccessible(true);
System.out.println(goodsName.getName());
}
}
5)使用lucene时可以使用luke(lucene索引库查看工具)来查看分好的索引库。lucene与luke的版本一定要相适应,这里lucene用的4.7.2,我用的是lukeall-4.7.1,可以查看。下完jar包之后,可以在jar包下新建一个.bat文件,文件内容start javaw -jar lukeall-4.7.1.jar。索引库新建成功后,可以点击.bat文件启动可视化工具,然后显示如下:
6)IK扩展字典项使用:
在项目resource路径下创建IKAnalyzer.cfg.xml文件,文件内容如下:
IK Analyzer 扩展配置
/dicdata/use.dic.dic;/dicdata/googlepy.dic
/dicdata/ext_stopword.dic
附加:
####lucene构建步骤:
##创建索引:
#####1.构建索引库
目的:用来制定生成的索引文件的存放位置
#####2.指定对应的分词器
常见的分词器:
######>>1)StandardAnalyzer分词器: 单字分词:就是按照中文一个字一个字地进行分词.如:”我爱中国”
效果:我,爱,中,国
######>>2)CJKAnalyzer分词器:二分法分词:按两个字进行切分,如”我是中国人”
效果:我是,是中,中国,国人
######>>3)IK分词器:自定义词库
扩展词典(新创建词功能): 有些词IK分词器不识别,例如:”白富美,高富帅
停用词典(停用某些词功能): 有些词不需要建立索引 例如:“哦”,“啊”,“的””
IK分词器的词库有限,新增加的词条可以通过配置文件添加到IK的词库中,也可以把一些不用的词条去除:
#####3.创建文档对象
文档对象:
######>主要是添加索引的内容
#####4.创建写入器
######>主要通过指定的分词器进行分词,并将文档对象写到索引库里面。
##查询索引:
#####1.设定搜索目录
#####2.选择搜索器,指定设定好的搜索目录
#####3.选择对应的执行语句解析器
#####4.搜索器执行查询语句(可以查出索引所在的文档/以及查到符合索引的文档数)