package com.zjr.service.impl; import java.io.File; import java.io.IOException; import java.io.StringReader; import java.lang.reflect.InvocationTargetException; import java.util.ArrayList; import java.util.List; import org.apache.commons.beanutils.BeanUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.wltea.analyzer.lucene.IKAnalyzer; import org.wltea.analyzer.lucene.IKQueryParser; import org.wltea.analyzer.lucene.IKSimilarity; import com.zjr.model.User; public class UserIndexService { private final Log logger = LogFactory.getLog(UserIndexService.class); private final String dirPath = "d:/temp/user"; Analyzer analyzer = new IKAnalyzer(); Directory directory = null; IndexWriter writer = null; IndexSearcher indexSearcher = null; private void confirmDirs() { File indexFile = new File(dirPath); if (!indexFile.exists()) { indexFile.mkdirs(); } if (!indexFile.exists() || !indexFile.canWrite()) { if (logger.isDebugEnabled()) logger.error("索引文件目录创建失败或不可写入!"); } } public void init() { confirmDirs(); try { File f = new File(dirPath); directory = FSDirectory.open(f); } catch (Exception e) { if (logger.isDebugEnabled()) { logger.error("解除索引文件锁定失败!" + e.getCause()); } } } public void createIndex(List<User> userList) { init(); try { // 第一个参数是存放索引目录有FSDirectory(存储到磁盘上)和RAMDirectory(存储到内存中), // 第二个参数是使用的分词器, 第三个:true,建立全新的索引,false,建立增量索引,第四个是建立的索引的最大长度 writer = new IndexWriter(directory, analyzer, true,IndexWriter.MaxFieldLength.LIMITED); writer.setMergeFactor(500); writer.setMaxBufferedDocs(155); writer.setMaxFieldLength(Integer.MAX_VALUE); writeIndex(writer, userList); writer.optimize(); writer.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public List<User> search(String keyword) { File indexFile = new File(dirPath); if (!indexFile.exists()) { return null; } Directory dir; try { dir = FSDirectory.open(indexFile); indexSearcher = new IndexSearcher(dir); indexSearcher.setSimilarity(new IKSimilarity()); // 单字段查询,单条件查询 // Query query = IKQueryParser.parse("userInfo", keyword); // 多字段,单条件查询 String[] fields = new String[] { "userInfo", "parameter1" }; Query query = IKQueryParser.parseMultiField(fields, keyword); // 多字体,单条件,多BooleanClause.Occur[] flags , 查询条件的组合方式(Or/And) // BooleanClause.Occur[]数组,它表示多个条件之间的关系, // BooleanClause.Occur.MUST表示 and, // BooleanClause.Occur.MUST_NOT表示not, // BooleanClause.Occur.SHOULD表示or. // String[] fields =new String[]{"userInfo","parameter1"}; // BooleanClause.Occur[] flags=new // BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.SHOULD}; // Query query = IKQueryParser.parseMultiField(fields, // keyword,flags); // //多Field,多条件查询分析 // String[] fields =new String[]{"userInfo","parameter1"}; // String[] queries = new String[]{keyword,keyword}; // Query query = IKQueryParser.parseMultiField(fields,queries); // 多Field,多条件,多Occur 查询 // String[] fields =new String[]{"userInfo","parameter1"}; // String[] queries = new String[]{keyword,keyword}; // BooleanClause.Occur[] flags=new // BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.SHOULD}; // Query query = // IKQueryParser.parseMultiField(fields,queries,flags); // 搜索相似度最高的20条记录 TopDocs topDocs = indexSearcher.search(query, 20); ScoreDoc[] hits = topDocs.scoreDocs; return hitsToQuery(hits, query); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return null; } private List<User> hitsToQuery(ScoreDoc[] hits, Query query) { List<User> list = new ArrayList<User>(); try { for (int i = 0; i < hits.length; i++) { User u = new User(); Document doc = indexSearcher.doc(hits[i].doc); u.setUserId(Integer.parseInt(doc.get("userId"))); u.setUserName(doc.get("userName")); u.setUserAge(Integer.parseInt(doc.get("userAge"))); // 高亮设置 SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter( "<font color=\"red\">", "</font>"); Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query)); TokenStream tokenStream = analyzer.tokenStream("text", new StringReader(doc.get("userInfo"))); String userInfo = highlighter.getBestFragment(tokenStream, doc .get("userInfo")); if (userInfo != null) { u.setUserInfo(userInfo); } else { u.setUserInfo(doc.get("userInfo")); } SimpleHTMLFormatter simpleHtmlFormatter1 = new SimpleHTMLFormatter( "<font color=\"red\">", "</font>"); Highlighter highlighter1 = new Highlighter( simpleHtmlFormatter1, new QueryScorer(query)); TokenStream tokenStream1 = analyzer.tokenStream("text1", new StringReader(doc.get("parameter1"))); String p1 = highlighter1.getBestFragment(tokenStream1, doc .get("parameter1")); if (p1 != null) { u.setParameter1(p1); } else { u.setParameter1(doc.get("parameter1")); } u.setParameter2(doc.get("parameter2")); u.setParameter3(doc.get("parameter3")); u.setParameter4(doc.get("parameter4")); list.add(u); } indexSearcher.close(); return list; } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { // TODO Auto-generated catch block e.printStackTrace(); } return null; } public void writeIndex(IndexWriter writer, List<User> userList) { try { for (User u : userList) { Document doc = getDoc(u); writer.addDocument(doc); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } private Document getDoc(User user) { System.out.println("用户ID 为" + user.getUserId() + " 索引被创建"); Document doc = new Document(); addField2Doc(doc, user, "userId", Store.YES, Index.NOT_ANALYZED); addField2Doc(doc, user, "userName", Store.YES, Index.NOT_ANALYZED);// Index.NOT_ANALYZED // 不分词,但建立索引 addField2Doc(doc, user, "userAge", Store.YES, Index.NOT_ANALYZED);// Index.ANALYZED // 分词并且建立索引 addField2Doc(doc, user, "userInfo", Store.YES, Index.ANALYZED); addField2Doc(doc, user, "parameter1", Store.YES, Index.ANALYZED); addField2Doc(doc, user, "parameter2", Store.YES, Index.ANALYZED); addField2Doc(doc, user, "parameter3", Store.YES, Index.ANALYZED); addField2Doc(doc, user, "parameter4", Store.YES, Index.ANALYZED); return doc; } private void addField2Doc(Document doc, Object bean, String name, Store s, Index i) { String value; try { value = BeanUtils.getProperty(bean, name); if (value != null) { doc.add(new Field(name, value, s, i, Field.TermVector.WITH_POSITIONS_OFFSETS)); } } catch (IllegalAccessException e) { logger.error("get bean property error", e); } catch (InvocationTargetException e) { logger.error("get bean property error", e); } catch (NoSuchMethodException e) { logger.error("get bean property error", e); } } /** * 没有排序,有高亮,有分页 * * @param pageNo * @param pageSize * @param keyword * @return */ public PageBean getPageQuery(int pageNo, int pageSize, String keyword) { List result = new ArrayList(); File indexFile = new File(dirPath); if (!indexFile.exists()) { return null; } Directory dir; try { dir = FSDirectory.open(indexFile); indexSearcher = new IndexSearcher(dir); indexSearcher.setSimilarity(new IKSimilarity()); String[] fields = new String[] { "userInfo", "parameter1" }; BooleanClause.Occur[] flags = new BooleanClause.Occur[] { BooleanClause.Occur.MUST, BooleanClause.Occur.SHOULD }; Query query = IKQueryParser.parseMultiField(fields, keyword, flags); TopScoreDocCollector topCollector = TopScoreDocCollector.create( indexSearcher.maxDoc(), true); indexSearcher.search(query, topCollector); // 查询当页的记录 ScoreDoc[] docs = topCollector.topDocs((pageNo - 1) * pageSize, pageSize).scoreDocs; // String[] highlightCol = {"userInfo", "parameter1"}; // 高亮设置 SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter( "<font color=\"red\">", "</font>"); Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query)); for (ScoreDoc scdoc : docs) { User u = new User(); Document doc = indexSearcher.doc(scdoc.doc); // // for (Fieldable fa : doc.getFields()) { // System.out.println(fa.name()); // String value = doc.get(fa.name()); // for (String col : highlightCol) { // if(fa.name().equals(col)) { // //设置高显内容 // TokenStream tokenStream = analyzer.tokenStream("text",new // StringReader(value)); // value = highlighter.getBestFragment(tokenStream, value); // } // } // // } u.setUserId(Integer.parseInt(doc.get("userId"))); u.setUserName(doc.get("userName")); u.setUserAge(Integer.parseInt(doc.get("userAge"))); TokenStream tokenStream = analyzer.tokenStream("text", new StringReader(doc.get("userInfo"))); String userInfo = highlighter.getBestFragment(tokenStream, doc .get("userInfo")); if (userInfo != null) { u.setUserInfo(userInfo); } else { u.setUserInfo(doc.get("userInfo")); } TokenStream tokenStream1 = analyzer.tokenStream("text1", new StringReader(doc.get("parameter1"))); String p1 = highlighter.getBestFragment(tokenStream1, doc .get("parameter1")); if (p1 != null) { u.setParameter1(p1); } else { u.setParameter1(doc.get("parameter1")); } u.setParameter2(doc.get("parameter2")); u.setParameter3(doc.get("parameter3")); u.setParameter4(doc.get("parameter4")); result.add(u); } PageBean pb = new PageBean(); pb.setCurrentPage(pageNo);// 当前页 pb.setPageSize(pageSize); pb.setAllRow(topCollector.getTotalHits());// hit中的记录数目 pb.setList(result); return pb; } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { // TODO Auto-generated catch block e.printStackTrace(); } return null; } /** * 排序,有高亮,有分页 * * @param pageNo * @param pageSize * @param keyword * @return */ public PageBean getPageQuery2(int pageNo, int pageSize, String keyword) { List result = new ArrayList(); File indexFile = new File(dirPath); if (!indexFile.exists()) { return null; } Directory dir; try { dir = FSDirectory.open(indexFile); indexSearcher = new IndexSearcher(dir); indexSearcher.setSimilarity(new IKSimilarity()); String[] fields = new String[] { "userInfo", "parameter1" }; BooleanClause.Occur[] flags = new BooleanClause.Occur[] { BooleanClause.Occur.MUST, BooleanClause.Occur.SHOULD }; Query query = IKQueryParser.parseMultiField(fields, keyword, flags); // 多字段排序,设置在前面的会优先排序 SortField[] sortFields = new SortField[2]; SortField sortField = new SortField("userId", SortField.INT, false);//false升序,true降序 SortField FIELD_SEX = new SortField("userAge", SortField.INT, true); sortFields[0] = sortField; sortFields[1] = FIELD_SEX; Sort sort = new Sort(sortFields); TopDocs topDocs = indexSearcher.search(query, null, 50, sort); if (topDocs.totalHits != 0) { // for(ScoreDoc sd : topDocs.scoreDocs) { // // } // 高亮设置 SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>"); Highlighter highlighter = new Highlighter(simpleHtmlFormatter,new QueryScorer(query)); for (int i = (pageNo - 1) * pageSize; i < pageSize * pageNo; i++) { ScoreDoc scdoc = topDocs.scoreDocs[i]; User u = new User(); Document doc = indexSearcher.doc(scdoc.doc); u.setUserId(Integer.parseInt(doc.get("userId"))); u.setUserName(doc.get("userName")); u.setUserAge(Integer.parseInt(doc.get("userAge"))); TokenStream tokenStream = analyzer.tokenStream("text",new StringReader(doc.get("userInfo"))); String userInfo = highlighter.getBestFragment(tokenStream,doc.get("userInfo")); if (userInfo != null) { u.setUserInfo(userInfo); } else { u.setUserInfo(doc.get("userInfo")); } TokenStream tokenStream1 = analyzer.tokenStream("text1",new StringReader(doc.get("parameter1"))); String p1 = highlighter.getBestFragment(tokenStream1, doc.get("parameter1")); if (p1 != null) { u.setParameter1(p1); } else { u.setParameter1(doc.get("parameter1")); } u.setParameter2(doc.get("parameter2")); u.setParameter3(doc.get("parameter3")); u.setParameter4(doc.get("parameter4")); result.add(u); } PageBean pb = new PageBean(); pb.setCurrentPage(pageNo);// 当前页 pb.setPageSize(pageSize); pb.setAllRow(topDocs.totalHits);// hit中的记录数目 pb.setList(result); return pb; } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { // TODO Auto-generated catch block e.printStackTrace(); } return null; } /** * 删除索引 * @param userId */ public void deleIndex(String userId){ try { File f = new File(dirPath); directory = FSDirectory.open(f); IndexReader reader = IndexReader.open(directory,false); Term term = new Term("userId", userId); reader.deleteDocuments(term); reader.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }