方法主体
package cn.vincent; import java.io.File; import java.io.IOException; import java.util.logging.SimpleFormatter; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.cn.ChineseAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.NumberTools; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.Scorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.junit.Test; import org.wltea.analyzer.lucene.IKAnalyzer; import org.wltea.analyzer.lucene.IKSimilarity; public class LuceneSearch { File filePath = new File("E:\\Workspaces\\MyEclipse for Spring\\luceneDemo\\luceneIndex"); File file = new File( "E:\\Workspaces\\MyEclipse for Spring\\luceneDemo\\luceneDatasource\\IndexWriter addDocument's a javadoc .txt"); File file2 = new File("E:\\Workspaces\\MyEclipse for Spring\\luceneDemo\\luceneDatasource\\小笑话_总统的房间 Room .txt"); private Analyzer analyzer = new IKAnalyzer(); @Test public void createIndex() throws Exception { buildIndex(file2); } private void buildIndex(File myfile) throws Exception { IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter indexWriter = null; ; try { indexWriter = new IndexWriter(FSDirectory.open(filePath), iwc); Document doc = FileToDocument.fileToDocument(myfile); indexWriter.addDocument(doc); } finally { indexWriter.close(); // 必须关闭,否则Lucene无法保存索引 } } @Test public void luceneQuery() { String queryStr = "room"; String f = "content"; String[] fields = { "name", "content" }; try { // 构造查询器 QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_35, fields, analyzer); // QueryParser queryParser = new QueryParser(Version.LUCENE_35, f, analyzer); Query query = queryParser.parse(queryStr); search(query); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { } } private void search(Query query) throws Exception { IndexReader r = IndexReader.open(FSDirectory.open(filePath)); IndexSearcher indexSearcher = new IndexSearcher(r); indexSearcher.setSimilarity(new IKSimilarity()); TopDocs docs = indexSearcher.search(query, 10); int totalHits = docs.totalHits; System.out.println("total : " + totalHits); //高亮 Formatter formatter = new SimpleHTMLFormatter("<font color='red'>","</font>"); Scorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, scorer); Fragmenter fragmenter = new SimpleFragmenter(50); highlighter.setTextFragmenter(fragmenter); //取出当前页数据 for (ScoreDoc doc : docs.scoreDocs) { Document document = indexSearcher.doc(doc.doc); MyFile myFile = new MyFile(); myFile.setName(document.get("name")); myFile.setSize(NumberTools.stringToLong(document.get("size"))); myFile.setPath(document.get("path")); //返回高亮后的结果,如果当前属性值没有出现关键字则出现null String hc = highlighter.getBestFragment(analyzer, "content", document.get("content")); if (hc == null) { String content = document.get("content"); int endIndex = Math.min(50, content.length()); hc = content.substring(0, endIndex);// 最多前50个字符 } myFile.setContent(hc); System.out.println(myFile); } // 使用完毕需要关闭! r.close(); indexSearcher.close(); } }
实体类
package cn.vincent; public class MyFile { private String name; private String content; private Long size; private String path; public String getName() { return name; } public void setName(String name) { this.name = name; } public String getContent() { return content; } public void setContent(String content) { this.content = content; } public Long getSize() { return size; } public void setSize(Long size) { this.size = size; } public String getPath() { return path; } public void setPath(String path) { this.path = path; } public String toString(){ System.out.println("name:" + this.name); System.out.println("content:" + this.content); System.out.println("size:" + this.size); System.out.println("path:" + this.path); return ""; } }
操作类:
package cn.vincent; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.NumberTools; @SuppressWarnings("deprecation") public class FileToDocument { public static Document fileToDocument(File path){ MyFile file = new MyFile(); file.setName(path.getName()); file.setContent(readFileContent(path)); file.setSize(path.length()); file.setPath(path.getPath()); Document doc = new Document(); doc.add(new Field("name",file.getName(),Store.YES,Index.ANALYZED)); doc.add(new Field("content",file.getContent(),Store.YES,Index.ANALYZED)); doc.add(new Field("size",NumberTools.longToString(file.getSize()),Store.YES,Index.NOT_ANALYZED)); doc.add(new Field("path",file.getPath(),Store.YES,Index.NOT_ANALYZED)); return doc; } private static String readFileContent(File path){ StringBuffer content = new StringBuffer(); try { BufferedReader bfReader = new BufferedReader(new InputStreamReader(new FileInputStream(path))); for(String line = null; (line = bfReader.readLine()) != null;){ content.append(line); } } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return content.toString(); } }
写的时候,, 封装Long--〉String类型的类NumberTools 标识以及过时? 调用NumberTools.stringToLong方法 用32进制缩小Long的长度,
找半天似乎也没有找到替代的?有谁知道不~