Lucene之——第一个Lucene3.0程序

转载请注明出处:http://blog.csdn.net/l1028386804/article/details/49647527

我们模拟一个对商品的操作流程,通过对商品信息的操作, 演示Lucene插入、查询功能, 了解Lucene存储原理。

创建数据操作实体类Goods:

public class Goods {
	private Integer gid;
	private String gname;
	private Double gprice;
	private String remark;
	// 省略 get set方法
}

创建索引代码

public void addIndex(Goods goods) {
	// 声明 索引、目录、分词器 变量
	IndexWriter indexWriter = null;
	Directory directory = null;
	Analyzer analyzer = null;
	try {
		// 创建一个简单的分词器,可以对数据进行分词
		analyzer = new StandardAnalyzer(Version.LUCENE_30);
		// Creates an FSDirectory instance
		directory = FSDirectory.open(new File("./indexData"));
		// 创建索引操作的工具类,用于对索引的 插入、更新、删除操作
		indexWriter = new IndexWriter(directory, analyzer, 
				MaxFieldLength.LIMITED);
		// 把Goods对象转化为Lucene可以识别的Document对象
		Document doc = new Document();
		doc.add(new Field("gid", goods.getGid().toString(), Store.YES,Index.NOT_ANALYZED));
		doc.add(new Field("gname", goods.getGname(), Store.YES,
				Index.ANALYZED));
		doc.add(new Field("gprice", goods.getGprice().toString(),
				Store.YES, Index.NOT_ANALYZED));
		doc.add(new Field("gremark", goods.getRemark(), Store.YES,
						Index.NO));
		// 添加到索引库中
		indexWriter.addDocument(doc);
		// 提交索引数据
		indexWriter.commit();
	} catch (Exception e) {
		new RuntimeException(e);
	} finally {
		try {
			indexWriter.close();
		} catch (Exception e) {
			new RuntimeException(e);
		}
	}
}

测试用例如下:

@Test
public void testAddIndex(){
	Goods goods=new Goods();
	goods.setGid(12345);
	goods.setGname("IBM笔记本电脑");
	goods.setGprice(34.56);
	goods.setRemark("IBM笔记本电脑, 小黑 永恒的经典....");
	helloWorld.addIndex(goods);
}
查询索引代码:
// 查询索引
public List queryIndex(String gname) {
	List goodsList = new ArrayList();
	Directory directory = null;
	IndexSearcher indexSearcher = null;
	try {
		// 获取索引库
		directory = FSDirectory.open(new File("./indexData"));
		// 创建查询索引工具类
		indexSearcher = new IndexSearcher(directory);
		// 把查询字符串转化为Query对象
		QueryParser queryParse = new QueryParser(Version.LUCENE_30,"gname",Configuraction.getAnalyzer());
		Query query=queryParse.parse(gname);
		// int totalHits: 存储了.通过查询条件查询到的总共的数量
		// ScoreDoc[]: 存储了命中文档的逻辑编号,Lucenen自动生成的编号
		TopDocs topDocs = indexSearcher.search(query, 10);
		System.out.println("查询到的总结果数为:" + topDocs.totalHits);
		for (int i = 0; i < topDocs.totalHits; i++) {
			ScoreDoc temp = topDocs.scoreDocs[i];
			System.out.println("当前doc在索引库中的编号为:" + temp.doc);
			// 通过编号去查找相应的document
			Document doc = indexSearcher.doc(temp.doc);
			Goods goods = new Goods();
			goods.setGid(Integer.parseInt(doc.get("gid")));
			goods.setGname(doc.get("gname"));
			goods.setGprice(Double.parseDouble(doc.get("gprice")));
			goods.setRemark(doc.get("gremark"));
			goodsList.add(goods);
		}
	} catch (IOException e) {
		new RuntimeException(e);
	}finally{
		try {
			indexSearcher.close();
		} catch (IOException e) {
			new RuntimeException(e);
		}
	}
	return goodsList;
}

测试用例如下:

@Test
public void testQueryIndex(){
	for(Goods goods:helloWorld.queryIndex("ibm")){
		System.out.println(goods.getGid());
		System.out.println(goods.getGname());
		System.out.println(goods.getRemark());
		System.out.println(goods.getGprice());
	}
}

查询结果如下:

查询到的总结果数为:1
当前doc在索引库中的编号为:0
12345
IBM笔记本电脑
IBM笔记本电脑, 小黑 永恒的经典....
34.56

你可能感兴趣的:(Lucene,分词,爬虫,搜索,搜索优化)