这两天学习了一下Lucene,然后对其进行了应用上的一些封装。主要仿照项目中经常使用的BaseDao的封装方式。对Lucene的一些简单操作进行了封装,在项目中使用起来比较方便。下面介绍一下封装代码。
1.基础类
该类中,主要提供
- createIndex——创建索引
- update——更新索引信息
- delete——删除索引信息
- searchAll——查询所有
- pageSearch——支持分页查询
import net.sf.json.JSONArray; import net.sf.json.JSONObject; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.StoredField; import org.apache.lucene.index.*; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.wltea.analyzer.lucene.IKAnalyzer; import java.beans.PropertyDescriptor; import java.io.File; import java.io.IOException; import java.io.StringReader; import java.lang.reflect.Method; import java.lang.reflect.ParameterizedType; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; /** * 全文检索基础类 * * @author: alex * @time: 14-4-1 下午2:24 * @version: 1.0 */ public abstract class BaseLucene<T extends Object> { protected static Analyzer analyzer = new IKAnalyzer(); //分词器 protected static File indexDir = null; static { indexDir = new File(LuceneContants.INDEX_FILE_PATH); //存放索引文件的目录 } /** * 获取文件目录 * @return 文件目录对象 */ private Directory getDirectory() { Directory directory = null; try { directory = FSDirectory.open(indexDir); } catch (IOException e) { e.printStackTrace(); } return directory; } /** * 获取索引创建器 * @param directory 文件目录 * @return 索引创建器对象 */ private IndexWriter getIndexWriter(Directory directory) { IndexWriter indexWriter = null; try { //索引创建器配置 IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer); //设置打开索引模式为创建或追加 config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); //索引创建器对象 indexWriter = new IndexWriter(directory, config); } catch (IOException e) { e.printStackTrace(); } return indexWriter; } /** * 关闭文件目录和索引创建器对象 * @param directory 文件目录 * @param indexWriter 索引创建器 */ private void closeDirectoryAndIndexWriter(Directory directory,IndexWriter indexWriter) { if (indexWriter != null) { try { indexWriter.close(); } catch (IOException e) { e.printStackTrace(); } } if (directory != null) { try { directory.close(); } catch (IOException e) { e.printStackTrace(); } } } /** * 创建索引 * @param entity 泛型实体 * @param keyWordFields 关键字索引字段名集合 */ public void createIndex(T entity,List<String> keyWordFields) { Directory directory = null; IndexWriter indexWriter = null; try { directory = getDirectory(); indexWriter = getIndexWriter(directory); //装配成document Document doc = getDoc(entity,keyWordFields); indexWriter.addDocument(doc); //写入索引文件 } catch (IOException e) { e.printStackTrace(); } finally { closeDirectoryAndIndexWriter(directory,indexWriter); } } /** * 装配document对象方法 * @param entity 泛型实体 * @param keyWordFields 关键字索引字段名集合 * @return Document对象 */ private Document getDoc(T entity,List<String> keyWordFields){ FieldType ftIndex = new FieldType(); // 索引类型 ftIndex.setIndexed(true); //设置索引为true ftIndex.setStored(true); //设置保存为true ftIndex.setTokenized(true); //设置分词为true Document doc = new Document(); Map<String,Integer> fieldsMap = getAllFields(entity, keyWordFields); for (Map.Entry<String,Integer> fieldMap : fieldsMap.entrySet()) { if(fieldMap.getValue() == LuceneContants.IS_KEY_WORD) { //索引字段 Field field = new Field(fieldMap.getKey(), getterMethod(entity,fieldMap.getKey()).toString(),ftIndex); doc.add(field); } else { //存储字段 StoredField storedField = new StoredField(fieldMap.getKey(), getterMethod(entity,fieldMap.getKey()).toString()); doc.add(storedField); } } return doc; } /** * 通过反射获取所有实体字段 * @param entity 泛型实体 * @param keyWordFields 关键字索引字段名集合 * @return 实体的所有字段 */ private Map<String,Integer> getAllFields(T entity,List<String> keyWordFields){ Map<String,Integer> fieldsMap = new HashMap<String,Integer>(); java.lang.reflect.Field[] fields = entity.getClass().getDeclaredFields(); for(int i = 0; i < fields.length; i++) { Integer keyWordFlag = LuceneContants.NO_KEY_WORD; String fieldName = fields[i].getName(); for(String keyWordField : keyWordFields) { if(keyWordField.equals(fieldName)) { keyWordFlag = LuceneContants.IS_KEY_WORD; } } fieldsMap.put(fieldName,keyWordFlag); } return fieldsMap; } /** * 通过反射获取字段值 * @param obj 实体 * @param filedName 字段名 * @return 字段值 */ private Object getterMethod(Object obj, String filedName) { Object object = null; try { Class clazz = obj.getClass(); PropertyDescriptor propertyDescriptor = new PropertyDescriptor(filedName, clazz); Method getMethod = propertyDescriptor.getReadMethod();//获得get方法 if (propertyDescriptor != null) { object = getMethod.invoke(obj); //执行get方法返回一个Object } } catch (Exception e) { e.printStackTrace(); } return object; } /** * 查询所以索引匹配到的数据 * @param queryWhere 查询条件 * @param defaultQueryField 默认查询的关键字字段 * @param keyWordFields 是关键字且需高亮显示的字段集合 * @return json格式的字符串 */ public String searchAll(String queryWhere,String defaultQueryField,List<String> keyWordFields) { if("".equals(queryWhere)){ queryWhere = "(*:*)"; keyWordFields = new ArrayList<String>(); } String result = null; IndexReader reader = null; IndexSearcher indexSearcher = null; try { reader = DirectoryReader.open(getDirectory()); indexSearcher = new IndexSearcher(reader); Query query = new QueryParser(Version.LUCENE_46, defaultQueryField, analyzer).parse(queryWhere); ScoreDoc[] hits = indexSearcher.search(query, LuceneContants.QUERY_MAX_COUNT).scoreDocs; result = resultToJson(indexSearcher,query,hits,keyWordFields,0,hits.length); } catch (Exception e) { e.printStackTrace(); } finally { if (reader != null){ try { reader.close(); } catch (IOException e) { e.printStackTrace(); } } } return result; } /** * 分页查询所以索引匹配到的数据 * @param queryWhere 查询条件 * @param defaultQueryField 默认查询的关键字字段 * @param keyWordFields 是关键字且需高亮显示的字段集合 * @param currPage 当前页 * @param pageSize 每页显示的条数 * @return json格式的字符串 */ public String pageSearch(String queryWhere,String defaultQueryField,List<String> keyWordFields,int currPage,int pageSize) { if("".equals(queryWhere)){ queryWhere = "(*:*)"; keyWordFields = new ArrayList<String>(); } String result = null; IndexReader reader = null; IndexSearcher indexSearcher = null; try { reader = DirectoryReader.open(getDirectory()); indexSearcher = new IndexSearcher(reader); Query query = new QueryParser(Version.LUCENE_46, defaultQueryField, analyzer).parse(queryWhere); ScoreDoc[] hits = indexSearcher.search(query, LuceneContants.QUERY_MAX_COUNT).scoreDocs; //分页计算 int start = (currPage - 1) * pageSize; int totalCount = hits.length; int end = Math.min(currPage * pageSize,totalCount); result = resultToJson(indexSearcher,query,hits,keyWordFields,start,end); } catch (Exception e) { e.printStackTrace(); } finally { if (reader != null){ try { reader.close(); } catch (IOException e) { e.printStackTrace(); } } } return result; } /** * 将查询到的数据转成json格式数据返回 * @param indexSearcher 查询索引实体 * @param query 查询实体 * @param hits 匹配的结果对象 * @param keyWordFields 是关键字且需高亮显示的字段集合 * @param start 开始索引 * @param end 结束索引 * @return json格式的字符串 */ private String resultToJson(IndexSearcher indexSearcher, Query query, ScoreDoc[] hits,List<String> keyWordFields,int start,int end){ JSONArray jsonArray = new JSONArray(); try { Class<T> entityClass = (Class<T>)((ParameterizedType) this.getClass().getGenericSuperclass()).getActualTypeArguments()[0]; T entity = entityClass.newInstance(); Map<String,Integer> fieldsMap = getAllFields(entity,keyWordFields); for (int i = start; i < end; i++) { JSONObject jsonObject = new JSONObject(); Document hitDoc = indexSearcher.doc(hits[i].doc); for(Map.Entry<String, Integer> field : fieldsMap.entrySet()){ if(field.getValue() == LuceneContants.IS_KEY_WORD) { //是关键字,且需要高亮显示 String value = toHighlighter(query, hitDoc, field.getKey()); jsonObject.put(field.getKey(),value); } else { jsonObject.put(field.getKey(),hitDoc.get(field.getKey())); } } jsonArray.add(jsonObject); } } catch (Exception e) { e.printStackTrace(); } return jsonArray.toString(); } /** * 高亮显示设置 * @param query 查询实体 * @param doc 文档对象 * @param field 字段 * @return 高亮设置 */ private String toHighlighter(Query query, Document doc, String field) { try { SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"blue\">", "</font>"); Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query)); TokenStream tokenStream1 = analyzer.tokenStream("text", new StringReader(doc.get(field))); String highlighterStr = highlighter.getBestFragment(tokenStream1, doc.get(field)); return highlighterStr == null ? doc.get(field) : highlighterStr; } catch (IOException e) { e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { e.printStackTrace(); } return null; } /** * 修改索引 * @param entity 泛型实体 * @param keyWordFields 索引字段集合 * @return true成功,false失败 */ public boolean update(T entity,List<String> keyWordFields) { boolean flag = false; IndexWriter indexWriter = null; Directory directory = null; try{ directory = getDirectory(); indexWriter = getIndexWriter(directory); Document doc = getDoc(entity,keyWordFields); //根据Id进行更新索引 Term term = new Term("id", String.valueOf(getterMethod(entity,"id"))); indexWriter.updateDocument(term, doc); flag = true; } catch (Exception e){ flag = false; e.printStackTrace(); } finally { closeDirectoryAndIndexWriter(directory,indexWriter); } return flag; } /** * 删除索引 * @param id 删除的索引Id * @return true成功,false失败 */ public boolean delete(Integer id) { if(id == null || id.intValue() < 0) { throw new RuntimeException("参数不正确!"); } boolean flag = false; IndexWriter indexWriter = null; Directory directory = null; try{ //索引所放目录 directory = getDirectory(); indexWriter = getIndexWriter(directory); Term term = new Term("id", String.valueOf(id)); indexWriter.deleteDocuments(term); flag = true; }catch (Exception e) { flag = false; e.printStackTrace(); } finally { closeDirectoryAndIndexWriter(directory,indexWriter); } return flag; } }
常量类:
/** * 常量描述 * * @author: alex * @time: 14-4-1 下午2:24 * @version: 1.0 */ public class LuceneContants { //索引文件存储目录 public static final String INDEX_FILE_PATH = "D://indexFile"; //查询文件的最大条数 public static final int QUERY_MAX_COUNT = 100000; //是索引标识 public static final int IS_KEY_WORD = 1; //不是索引标识 public static final int NO_KEY_WORD = 0; }
2.service层应用
import java.util.ArrayList; import java.util.List; /** * service层 * * @author: alex * @time: 14-4-2 下午1:35 * @version: 1.0 */ public class PersonSearchService extends BaseLucene<Person> { private static List<String> keyWordFields = null; static { keyWordFields = new ArrayList<String>(); //给名字和介绍做全文检索 keyWordFields.add("id"); keyWordFields.add("name"); keyWordFields.add("introduce"); } /** * 保存用户信息 * @param person 用户 */ public void savePerson(Person person) { this.createIndex(person,keyWordFields); } /** * 更新用户信息 * @param person 用户实体 * @return true成功,false失败 */ public boolean updatePerson(Person person) { return this.update(person,keyWordFields); } /** * 删除用户信息 * @param id 用户ID * @return true成功,false失败 */ public boolean deletePerson(Integer id) { return this.delete(id); } /** * 根据条件查询所有 * @param queryWhere 查询条件 * @param defaultQueryField 默认检索字段 * @return json格式数据 */ public String queryAll(String queryWhere,String defaultQueryField) { return this.searchAll(queryWhere,defaultQueryField,keyWordFields); } /** * 根据条件分页查询 * @param queryWhere 查询条件 * @param defaultQueryField 默认检索字段 * @param currPage 当前页 * @param pageSize 每页条数 * @return json格式数据 */ public String pageQuery(String queryWhere,String defaultQueryField,int currPage,int pageSize) { return this.pageSearch(queryWhere,defaultQueryField,keyWordFields,currPage,pageSize); } }
实体类:
/** * 实体 * * @author: alex * @time: 14-4-2 下午1:32 * @version: 1.0 */ public class Person { private int id; private String name; private int age; private String introduce; public int getId() { return id; } public void setId(int id) { this.id = id; } public String getName() { return name; } public void setName(String name) { this.name = name; } public int getAge() { return age; } public void setAge(int age) { this.age = age; } public String getIntroduce() { return introduce; } public void setIntroduce(String introduce) { this.introduce = introduce; } }
3.测试类
import junit.framework.Assert; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; /** * 测试类 * * @author: alex * @time: 14-4-1 上午10:54 * @version: 1.0 */ public class PersonSearchServiceTest { static PersonSearchService personSearchService = null; @BeforeClass public static void setUpBeforeClass() throws Exception { personSearchService = new PersonSearchService(); } @AfterClass public static void tearDownAfterClass() throws Exception { } @Test public void testSavePerson() { Person person = new Person(); person.setId(1); person.setName("张三"); person.setAge(21); person.setIntroduce("张三是中国好演员!"); personSearchService.savePerson(person); person.setId(2); person.setName("张一三"); person.setAge(22); person.setIntroduce("张三是中国好替身!"); personSearchService.savePerson(person); person.setId(3); person.setName("张三疯"); person.setAge(23); person.setIntroduce("张三是中国好程序员!"); personSearchService.savePerson(person); person.setId(4); person.setName("啊张三"); person.setAge(24); person.setIntroduce("张三是中国好声音!"); personSearchService.savePerson(person); person.setId(5); person.setName("李三"); person.setAge(25); person.setIntroduce("啊啊啊啊啊啊啊啊啊啊啊啊啊!"); personSearchService.savePerson(person); } @Test public void testUpdatePerson() { Person person = new Person(); person.setId(4); person.setName("张三啊"); person.setAge(24); person.setIntroduce("把啊张三改成了张三啊!"); boolean result = personSearchService.updatePerson(person); Assert.assertTrue(result); } @Test public void testDeletePerson() { //删除了第二条 boolean result = personSearchService.deletePerson(2); Assert.assertTrue(result); } @Test public void testQuery() { String queryWhere = "name:张三 introduce:中国"; String defaultQueryField = "name"; String result = personSearchService.queryAll(queryWhere, defaultQueryField); System.out.println(result); } @Test public void testPageQuery() { String queryWhere = "name:张三 introduce:中国"; String defaultQueryField = "name"; String result = personSearchService.pageQuery(queryWhere, defaultQueryField,1,2); System.out.println(result); } }
4.说明
本示例没有对查询出来的结果进行排序,同时还有一些其他的地方还需完善。代码发出来,给大家参考一下。
示例中应用的jar如下:
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>lucene</groupId> <artifactId>lucene</artifactId> <version>1.0</version> <dependencies> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>4.6.0</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-highlighter</artifactId> <version>4.6.0</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queries</artifactId> <version>4.6.0</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queryparser</artifactId> <version>4.6.0</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-test-framework</artifactId> <version>4.6.0</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-smartcn</artifactId> <version>3.6.2</version> </dependency> <dependency> <groupId>IKAnalyzer</groupId> <artifactId>IKAnalyzer</artifactId> <version>2012FF_u1</version> </dependency> <dependency> <groupId>net.sf.json-lib</groupId> <artifactId>json-lib</artifactId> <version>2.4</version> </dependency> <dependency> <groupId>commons-lang</groupId> <artifactId>commons-lang</artifactId> <version>2.5</version> </dependency> <dependency> <groupId>net.sf.ezmorph</groupId> <artifactId>ezmorph</artifactId> <version>1.0.6</version> </dependency> <dependency> <groupId>commons-logging</groupId> <artifactId>commons-logging</artifactId> <version>1.1</version> </dependency> <dependency> <groupId>commons-beanutils</groupId> <artifactId>commons-beanutils</artifactId> <version>1.8.3</version> </dependency> <dependency> <groupId>commons-collections</groupId> <artifactId>commons-collections</artifactId> <version>3.2.1</version> </dependency> </dependencies> </project>
至于运行结果没有贴出来,有兴趣的同学可以自己运行一下。