Lucene 4.10 + Mysql 5.5 创建数据库表索引(Lucene 学习序列1)

Lucene 4.10 + Mysql 5.5 创建数据库表索引(Lucene 学习序列1)

          Lucene是apache软件基金会4 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包,即它不是一个完整的全文检索引擎,而是一个全文检索引擎的架构,提供了完整的查询引擎和索引引擎,部分文本分析引擎(英文与德文两种西方语言)。

     Code:

    

package com.qiuzhping.lucene;

/*
 * System Abbrev :
 * system Name  :
 * Component No  :
 * Component Name:
 * File name     :QueryDataFromDb.java
 * Author        :Peter.Qiu
 * Date          :2015年7月28日
 * Description   :  
 */

/* Updation record 1:
 * Updation date        :  2015年7月28日
 * Updator          :  Peter.Qiu
 * Trace No:  
 * Updation No:  
 * Updation Content:  
 */

import java.io.File;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Random;
import java.util.UUID;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

/**
 * 
 * 
 * 
 * @author  Peter.Qiu
 * @version  [Version NO, 2015年7月28日]
 * @see  [Related classes/methods]
 * @since  [product/module version]
 */
public class QueryDataFromDb {
	private Directory directory = null;
	private static boolean insertFlag = true;
	public static Connection getConnection() throws SQLException,
			java.lang.ClassNotFoundException {
		String url = "jdbc:mysql:///hpsdb";
		Class.forName("com.mysql.jdbc.Driver");
		String userName = "root";
		String password = "123456";
		Connection con = DriverManager.getConnection(url, userName, password);
		return con;
	}
	
	public static void insertData() throws ClassNotFoundException, SQLException{
		Connection conn = getConnection();
		Statement st = conn.createStatement();
		Random random = new Random();
		for(int j = 0 ; j < 10 && insertFlag; j ++){
			StringBuffer sql = new StringBuffer("insert student (name,math) values");
			for(int i = 0 ; i < 100000 ; i++){
				String uuid = UUID.randomUUID().toString().replaceAll("-", "");
				sql.append("('" + uuid + "'," + random.nextInt(100) + "),");
			}
			String insert = sql.toString().substring(0, sql.length() - 1);
			st.execute(insert);
		}
		st.close();
		conn.close();
	}
	
	public void index() throws SQLException, ClassNotFoundException, IOException {
		IndexWriter writer = null;
		try {
			directory = FSDirectory.open(new File("C:/lucene/index02"));
			Analyzer analyzer = new StandardAnalyzer();
			IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_4_10_4,
					analyzer);
			conf.setOpenMode(OpenMode.CREATE_OR_APPEND);
			conf.setMaxBufferedDocs(100);
			writer = new IndexWriter(directory, conf);
			insertFlag = false;
			insertData();
			Connection conn = getConnection();
			Statement st = conn.createStatement();
			long count = 1;
			for(int i = 0 ; i < 10; i ++){
				String query = "select * from student limit "+ i * 100000+","+ 100000;
				ResultSet result = st.executeQuery(query);
				while (result.next()) {
					Document document = new Document();
					document.add(new StringField("id", result.getString("id"),
							Field.Store.YES));
					document.add(new StringField("name", result
							.getString("name"), Field.Store.YES));
					document.add(new StringField("math", result
							.getString("math"), Field.Store.YES));
					writer.addDocument(document);
					count ++;
				}
			}
			
			System.out.println("Total record : "+count);
			writer.close();
			st.close();
			conn.close();
		} finally {
			try {
				if (writer != null) {
					writer.close();
				}
			} catch (CorruptIndexException e) {
				e.printStackTrace();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}
	
	
	public IndexSearcher getSearcher() throws IOException {
		IndexReader reader = DirectoryReader.open(directory);
		IndexSearcher searcher = new IndexSearcher(reader);
		return searcher;
	}
	
	public void searchByTerm(String field, String name, int num) throws IOException {
		IndexSearcher searcher = getSearcher();
		// WildcardQuery 模糊查找
		// TermQuery 精确查找
		Query query = new TermQuery(new Term(field, name));
		TopDocs tds = searcher.search(query, num);
		System.out.println("count:" + tds.totalHits);
		for (ScoreDoc sd : tds.scoreDocs) {
			Document doc = searcher.doc(sd.doc);
			System.out.println("id:" + doc.get("id"));
			System.out.println("name:" + doc.get("name"));
			System.out.println("math:" + doc.get("math"));
		}
	}
	
	public static void main(String[] args) throws ClassNotFoundException,
			SQLException, IOException {
		QueryDataFromDb indexUtil = new QueryDataFromDb();
		indexUtil.index();
		int i = 0 ;
		long start = System.currentTimeMillis();
		//查找前90分前2名的信息
		System.out.println("查找前90分前2名的信息");
		indexUtil.searchByTerm("math", "90", 2);
		System.out.println(i+" Spend time:"+(System.currentTimeMillis() - start) + " ms");
	}
}

测试的结果是:

Total record : 1000001
查找前90分前2名的信息
count:36212
id:298904
name:636ab6012e4b429ea54d176f28f5fa1c
math:90
id:299156
name:085af1feb39b42f0be1c6e9f3f814526
math:90
0 Spend time:805 ms


涉及到Lucene 核心包链接:http://mirrors.hust.edu.cn/apache/lucene/java/4.10.4/

代码片段涉及到:

lucene-analyzers-common-4.10.4.jar

lucene-core-4.10.4.jar

lucene-queryparser-4.10.4.jar

mysql-connector-java-5.1.35.jar


你可能感兴趣的:(Lucene)