1、工程目录
2、自定义评分一、根据文件大小来评分,文件越大,权重越低
package util; import java.io.IOException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.function.CustomScoreProvider; import org.apache.lucene.search.function.CustomScoreQuery; import org.apache.lucene.search.function.FieldScoreQuery; import org.apache.lucene.search.function.ValueSourceQuery; import org.apache.lucene.search.function.FieldScoreQuery.Type; public class MyScoreQuery1{ public void searchByScoreQuery() throws Exception{ IndexSearcher searcher = DocUtil.getSearcher(); Query query = new TermQuery(new Term("content","java")); //1、创建评分域,如果Type是String类型,那么是Type.BYTE //该域必须是数值型的,并且不能使用norms索引,以及每个文档中该域只能由一个语汇 //单元,通常可用Field.Index.not_analyzer_no_norms来进行创建索引 FieldScoreQuery fieldScoreQuery = new FieldScoreQuery("size",Type.INT); //2、根据评分域和原有的Query创建自定义的Query对象 //query是原有的query,fieldScoreQuery是专门做评分的query MyCustomScoreQuery customQuery = new MyCustomScoreQuery(query, fieldScoreQuery); TopDocs topdoc = searcher.search(customQuery, 100); DocUtil.printDocument(topdoc, searcher); searcher.close(); } @SuppressWarnings("serial") private class MyCustomScoreQuery extends CustomScoreQuery{ public MyCustomScoreQuery(Query subQuery, ValueSourceQuery valSrcQuery) { super(subQuery, valSrcQuery); } /** * 这里的reader是针对段的,意思是如果索引包含的段不止一个,那么搜索期间会多次调用 * 这个方法,强调这点是重要的,因为它使你的评分逻辑能够有效使用段reader来对域缓存 * 中的值进行检索 */ @Override protected CustomScoreProvider getCustomScoreProvider(IndexReader reader) throws IOException { //默认情况实现的评分是通过原有的评分*传入进来的评分域所获取的评分来确定最终打分的 //为了根据不同的需求进行评分,需要自己进行评分的设定 /** * 自定评分的步骤 * 创建一个类继承于CustomScoreProvider * 覆盖customScore方法 */ // return super.getCustomScoreProvider(reader); return new MyCustomScoreProvider(reader); } } private class MyCustomScoreProvider extends CustomScoreProvider{ public MyCustomScoreProvider(IndexReader reader) { super(reader); } /** * subQueryScore表示默认文档的打分 * valSrcScore表示的评分域的打分 * 默认是subQueryScore*valSrcScore返回的 */ @Override public float customScore(int doc, float subQueryScore, float valSrcScore)throws IOException { System.out.println("Doc:"+doc); System.out.println("subQueryScore:"+subQueryScore); System.out.println("valSrcScore:"+valSrcScore); // return super.customScore(doc, subQueryScore, valSrcScore); return subQueryScore / valSrcScore; } } }
package util; import java.io.IOException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.function.CustomScoreProvider; import org.apache.lucene.search.function.CustomScoreQuery; /** * 此类的功能是给特定的文件名加权,也就是加评分 * 也可以实现搜索书籍的时候把近一两年的出版的图书给增加权重 * @author user */ public class MyScoreQuery2 { public void searchByFileScoreQuery() throws Exception{ IndexSearcher searcher = DocUtil.getSearcher(); Query query = new TermQuery(new Term("content","java")); FilenameScoreQuery fieldScoreQuery = new FilenameScoreQuery(query); TopDocs topdoc = searcher.search(fieldScoreQuery, 100); DocUtil.printDocument(topdoc, searcher); searcher.close(); } @SuppressWarnings("serial") private class FilenameScoreQuery extends CustomScoreQuery{ public FilenameScoreQuery(Query subQuery) { super(subQuery); } @Override protected CustomScoreProvider getCustomScoreProvider(IndexReader reader) throws IOException { // return super.getCustomScoreProvider(reader); return new FilenameScoreProvider(reader); } } private class FilenameScoreProvider extends CustomScoreProvider{ String[] filenames = null; public FilenameScoreProvider(IndexReader reader) { super(reader); try { filenames = FieldCache.DEFAULT.getStrings(reader, "filename"); } catch (IOException e) {e.printStackTrace();} } //如何根据doc获取相应的field的值 /* * 在reader没有关闭之前,所有的数据会存储要一个域缓存中,可以通过域缓存获取很多有用 * 的信息filenames = FieldCache.DEFAULT.getStrings(reader, "filename");可以获取 * 所有的filename域的信息 */ @Override public float customScore(int doc, float subQueryScore, float valSrcScore) throws IOException { String fileName = filenames[doc]; System.out.println(doc+":"+fileName); // return super.customScore(doc, subQueryScore, valSrcScore); if("9.txt".equals(fileName) || "4.txt".equals(fileName)) { return subQueryScore*1.5f; } return subQueryScore/1.5f; } } }
package test; import org.junit.Test; import util.MyScoreQuery1; import util.MyScoreQuery2; public class TestCustomScore { @Test public void test01() throws Exception { MyScoreQuery1 msq = new MyScoreQuery1(); msq.searchByScoreQuery(); } @Test public void test02() throws Exception { MyScoreQuery2 msq = new MyScoreQuery2(); msq.searchByFileScoreQuery(); } }
package util; import java.io.File; import java.io.IOException; import java.text.SimpleDateFormat; import java.util.Date; import org.apache.lucene.document.Document; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class DocUtil { private static IndexReader reader; //得到indexSearch对象 public static IndexSearcher getSearcher(){ try { Directory directory = FSDirectory.open(new File("D:\\Workspaces\\customscore\\index")); reader = IndexReader.open(directory); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } IndexSearcher searcher = new IndexSearcher(reader); return searcher; } /** * 打印文档信息 * @param topdoc */ public static void printDocument(TopDocs topdoc,IndexSearcher searcher){ SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss"); for(ScoreDoc scoredoc : topdoc.scoreDocs){ try { Document doc = searcher.doc(scoredoc.doc); System.out.println(scoredoc.doc+":("+scoredoc.score+")" + "["+doc.get("filename")+"【"+doc.get("path")+"】--->"+ doc.get("size")+"-----"+sdf.format(new Date(Long.valueOf(doc.get("date"))))+"]"); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } } }
package index; import java.io.File; import java.io.IOException; import org.apache.commons.io.FileUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.Version; import org.wltea.analyzer.lucene.IKAnalyzer; public class FileIndexUtils { private static Directory directory = null; private static Analyzer analyzer = new IKAnalyzer(); public static void main(String[] args) { index(true); } static{ try { directory = FSDirectory.open(new File("D:\\Workspaces\\customscore\\index")); } catch (IOException e) { e.printStackTrace(); } } public static Directory getDirectory() { return directory; } public static void index(boolean hasNew) { IndexWriter writer = null; try { writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, analyzer)); if(hasNew) { writer.deleteAll(); } File file = new File("D:\\Workspaces\\customscore\\resource"); Document doc = null; for(File f:file.listFiles()) { doc = new Document(); doc.add(new Field("content",FileUtils.readFileToString(f),Field.Store.YES,Field.Index.ANALYZED)); doc.add(new Field("filename",f.getName(),Field.Store.YES,Field.Index.ANALYZED)); doc.add(new Field("classid","5312",Field.Store.YES,Field.Index.ANALYZED)); doc.add(new Field("path",f.getAbsolutePath(),Field.Store.YES,Field.Index.ANALYZED)); doc.add(new NumericField("date",Field.Store.YES,true).setLongValue(f.lastModified())); doc.add(new NumericField("size",Field.Store.YES,true).setIntValue((int)(f.length()))); writer.addDocument(doc); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { try { if(writer!=null) writer.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } } }