lucene牛刀小试

package luc.helloworld;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

import jeasy.analysis.MMAnalyzer;
import luc.utils.FileToDocumentUtils;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;

public class HelloWorld {
public static final String DATA_SOURCE_PATH = "D:/eclipseworkspace/lucene/lucene/datasrc/zh.txt";
public static final String LUCENE_INDEX_PATH = "D:/eclipseworkspace/lucene/lucene/luceneIndex";

// 分词器
// Analyzer analyzer = new StandardAnalyzer();

Analyzer analyzer=new MMAnalyzer();

/**
* 建立索引 IndexWriter 是用来操作(增、删、改)索引库的
*
*/
public void createIndex() throws CorruptIndexException, IOException {
// file-->doc:转化成Document对象

Document doc = FileToDocumentUtils.fileToDocument(DATA_SOURCE_PATH);
        //Document doc=File2DocumentUtils.file2Document(DATA_SOURCE_PATH);

//Document 的boost
// doc.setBoost(boost); 设置相关度
       
//建立索引
IndexWriter indexWriter = new IndexWriter(LUCENE_INDEX_PATH, analyzer, true,
MaxFieldLength.LIMITED);
indexWriter.addDocument(doc);
indexWriter.close();

}

/**
* 搜索 IndexSearcher 用来在索引库中查询的
*/
public void search() throws Exception{
String queryString = "hello";

// 1、把搜索的文件转换成Query对象
String fields[] = { "name", "content" };

//Field的boost:设置相关度, boosts默认为float类型的为1.0f
Map<String,Float> boosts=new HashMap<String,Float>();
boosts.put("name", 3f);
QueryParser queryParser=new MultiFieldQueryParser(fields, analyzer, boosts);

// QueryParser queryParser = new MultiFieldQueryParser(fields, analyzer);
Query query = queryParser.parse(queryString);;
       
search(query);

}
public void search(Query query) throws Exception{

// 2、进行查询

IndexSearcher indexSearcher = new IndexSearcher(LUCENE_INDEX_PATH);

//-----------------------------------------------------------
//自定义排序
Filter filter = null;
//Sort sort=new Sort();
//sort.setSort(new SortField("name"));//默认为升序
//sort.setSort(new SortField(name,true));//降序
//TopDocs topDoc=indexSearcher.search(query, filter, 10000,sort);
//------------------------------------------------------------

TopDocs topDoc=indexSearcher.search(query, filter, 10000);

//高亮
//------------------------------------
Formatter formatter=new SimpleHTMLFormatter("<font color='red'>","</font>");
Scorer scorer=new QueryScorer(query);

Highlighter highligher =new Highlighter(formatter, scorer);
Fragmenter fragmenter=new SimpleFragmenter(100);
highligher.setTextFragmenter(fragmenter);
//---------------------------------

// 3、显示结果
System.out.println("总共有"+topDoc.totalHits+"条数据");
for(ScoreDoc scoreDoc:topDoc.scoreDocs){
int docNo=scoreDoc.doc;//文档内部编号
Document doc=indexSearcher.doc(docNo); //根据文档编号取出数据

//高亮-------------------------------------
String hl=highligher.getBestFragment(analyzer, "content", doc.get("content"));
if(hl!=null){
doc.getField("content").setValue(hl);
}
//-------------------------------------

FileToDocumentUtils.pringInfo(doc);//打印信息
//File2DocumentUtils.printDocumentInfo(doc);
}
}
   
public static void main(String[] args) throws Exception {
HelloWorld hello = new HelloWorld();
hello.createIndex();
//hello.search();
}
}

你可能感兴趣的:(apache,Lucene)