lucene3.5分词+搜索

阅读更多

最近在做lucene,发现网上的lucene实例都不是很满意,所以自己做了个 ,如果哪有问题可以指出来

 

建立索引

import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;


public class AddIndex {
	//path为索引存放地址
	public  void addIndex(String path) {
		try{
			Directory fsDir = FSDirectory.open(new File(path));
             //记住,此处的分词器一定要和下面查询的分词器一致,否则会查不到数据
			Analyzer analyzer = new IKAnalyzer();
			IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_35, analyzer);
			IndexWriter writer = new IndexWriter(fsDir, conf);
			System.out.println("~~~建立索引~~~");
			Document document1 = new Document();
			document1.add(new Field("id", "1", Field.Store.YES, Field.Index.NO));
			document1.add(new Field("address", "中国四川省成都市金牛区青羊东二路", Field.Store.YES, Field.Index.ANALYZED));
			
			Document document2 = new Document();
			document2.add(new Field("id", "2", Field.Store.YES, Field.Index.NO));
			document2.add(new Field("address", "中国四川省成都市金牛区永陵路", Field.Store.YES, Field.Index.ANALYZED));
			
			Document document3 = new Document();
			document3.add(new Field("id", "3", Field.Store.YES, Field.Index.NO));
			document3.add(new Field("address", "中国四川省成都市金牛区一环路西三段", Field.Store.YES, Field.Index.ANALYZED));
			
			Document document4 = new Document();
			document4.add(new Field("id", "4", Field.Store.YES, Field.Index.NO));
			document4.add(new Field("address", "中国四川省成都市金牛区营门口路", Field.Store.YES, Field.Index.ANALYZED));
			
			writer.addDocument(document1);  
			writer.addDocument(document2);  
			writer.addDocument(document3);  
			writer.addDocument(document4);  
			
			writer.forceMerge(1);
			writer.close();
			System.out.println("~~~索引建立完成~~~");
		}catch (IOException e) {
			System.out.println(e.toString());
		} 
	}
}
 

 

 

查询数据

 

 

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class TestLucene {
	private static String path = "e:\\lucene\\addressStore";
	Analyzer analyzer = new IKAnalyzer();
	private static File dataFile = new File(path);
	private static String str = "中国四川省成都市金牛区营门口路";
	private static String fiels = "address";
	public static void main(String[] args) {
		new AddIndex().addIndex(path);
		try {
			new TestLucene().search(str);
		} catch (IOException e) {
			e.printStackTrace();
		} catch (ParseException e) {
			e.printStackTrace();
		}
	}

	public void search(String keyword) throws IOException, ParseException {
		Analyzer analyzer = new IKAnalyzer();
		IndexSearcher isearcher = new IndexSearcher(IndexReader.open(FSDirectory.open(dataFile)));
        //此处只需把分词器传进去,lucene会自动分词
		QueryParser parser = new QueryParser(Version.LUCENE_35, fiels,analyzer);
		Query query = parser.parse(keyword);
		System.out.println(query.toString());
			/**
			 * 执行搜索,获取查询结果集对象 10为前10条记录
			 */
		TopDocs topDocs = isearcher.search(query, 10);
		ScoreDoc[] hits = topDocs.scoreDocs;
		for (ScoreDoc scoreDoc : hits) {
			System.out.println("----------------分割线----------------------");
			Document hitDoc = isearcher.doc(scoreDoc.doc);
			float i = scoreDoc.score;
			String address = hitDoc.get("address");
			System.out.println("address:" + address+"\nsocre:"+i);
			//打印打分细节,不需要可以去掉
			//int docId = scoreDoc.doc;  
			//Explanation exp = isearcher.explain(query,docId); 
			//System.out.println(exp.toString());
		}

		isearcher.close();
	}
}
 

 

 查询结果

 

 

~~~建立索引~~~

~~~索引建立完成~~~

address:中国 address:国四 address:四川省 address:四川 address:四 address:省成 address:成都市 address:成都 address:都市 address:金牛区 address:金牛 address:营 address:门口 address:路

----------------分割线----------------------

address:中国四川省成都市金牛区营门口路

socre:0.9141956

----------------分割线----------------------

address:中国四川省成都市金牛区永陵路

socre:0.44761625

----------------分割线----------------------

address:中国四川省成都市金牛区青羊东二路

socre:0.39166427

----------------分割线----------------------

address:中国四川省成都市金牛区一环路西三段

socre:0.31202385


 

你可能感兴趣的:(lucene3.5,分词,搜索)