用lucene3.6搜索数据库和txt文件内容

我们以前经常碰到搜索数据库的内容;用like %的sql语句;如果数据量大而且多表查询时;速度实在让人难以忍受。。。如果用lucene3.6那就可以把这个恼人的问题解决了。

lucene3.6搜索photo表的title,username,tagname,desr内容;
用一个例题来说明更直观;此例题能搜索中文分词;
(需要mysql5的jdbc包和lucene3.5的包):

1、数据库我用mysql5;建一个photo表;数据库名是test。
--
-- 表的结构 photo
--
CREATE TABLE IF NOT EXISTS 'photo'(
'photo_id' int(10) unsigned NOT NULL AUTO_INCREMENT,
'title' varchar(11) DEFAULT NULL,
'descr' text,
'user_name' varchar(11) DEFAULT NULL,
'tag_name' varchar(11) DEFAULT NULL,
PRIMARY KEY ('photo_id')
) ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=REDUNDANT AUTO_INCREMENT=5 ;
--
-- 导出表中的数据 'photo'
--
INSERT INTO 'photo' ('photo_id', 'title', 'descr', 'user_name', 'tag_name') VALUES
(1, 'z美女j', 'h美女h', 't好人5', 'g美女femal'),
(2, 'l美女k', '5美女', '6美女', 'd美女female'),
(3, 'hagh', '4说的就是我的是f', '', NULL),
(4, 'hagh', '2说的就是我的是g', ' ', NULL);

2、java文件有4个:

文件Photo.java是数据库的photo表的操作文件;内容如下:
package test;
import java.sql.Connection;
import java.util.ArrayList;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
public class Photo {
private long photoId;
private String title;
private String description;
private String userName;
private String tag;
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
public long getPhotoId() {
return photoId;
}
public void setPhotoId(long photoId) {
this.photoId = photoId;
}
public String getTag() {
return tag;
}
public void setTag(String tag) {
this.tag = tag;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getUserName() {
return userName;
}
public void setUserName(String userName) {
this.userName = userName;
}
public static Photo[] loadPhotos(Connection con) throws Exception {
ArrayList<Photo> list = new ArrayList<Photo>();
PreparedStatement pstm = null;
ResultSet rs = null;
String sql = "select photo_id,title,descr,user_name,tag_name from photo";
try {
pstm = con.prepareStatement(sql);
rs = pstm.executeQuery();
while (rs.next()) {
Photo photo = new Photo();
photo.setPhotoId(rs.getLong(1));
photo.setTitle(rs.getString(2));
photo.setDescription(rs.getString(3));
photo.setUserName(rs.getString(4));
photo.setTag(rs.getString(5));
list.add(photo);
}
} catch (SQLException e) {
e.printStackTrace();
} finally {
if (rs != null) {
rs.close();
}
if (pstm != null) {
pstm.close();
}
}
return (Photo[]) list.toArray(new Photo[list.size()]);
}
}

文件IndexerFile.java是把数据库的内容备份成索引文件到磁盘中去;
内容如下:
package test;
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.apache.lucene.document.Field;
public class IndexerFile {
public static int indexFile(String indexDir,Photo[] list) throws IOException{
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36));
conf.setOpenMode(OpenMode.CREATE);
IndexWriter writer = new IndexWriter(FSDirectory.open(new File(indexDir)), conf);

for(int i=0;i<list.length;i++){
Document doc=new Document();
doc.add(new Field("photoId", String.valueOf(list[i].getPhotoId()), Field.Store.YES, Field.Index.NO));
if(list[i].getTitle()!=null && list[i].getTitle().length()>0)
doc.add(new Field("title", list[i].getTitle(), Field.Store.YES, Field.Index.ANALYZED));
if(list[i].getDescription()!=null && list[i].getDescription().length()>0)
doc.add(new Field("description", list[i].getDescription(), Field.Store.YES, Field.Index.ANALYZED));
if(list[i].getUserName()!= null && list[i].getUserName().length()>0)
doc.add(new Field("userName", list[i].getUserName(), Field.Store.YES, Field.Index.ANALYZED));
if(list[i].getTag()!= null && list[i].getTag().length()>0)
doc.add(new Field("tag", list[i].getTag(), Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument(doc);
}

int numIndexed = writer.maxDoc();
writer.forceMerge(1);
writer.close();
return numIndexed;
}
}

文件SearcherFile.java是搜索磁盘索引文件内容的;
内容如下:
package test;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Version;
public class SearcherFile {
public static void search(IndexSearcher searcher, String[] q) throws IOException, ParseException {
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
String[] fields = {"title","description","tag","userName"};
Query query = MultiFieldQueryParser.parse(Version.LUCENE_36, q, fields, analyzer);
TopDocs topDocs = searcher.search(query, 100);//100是显示队列的Size
ScoreDoc[] hits = topDocs.scoreDocs;
System.out.println("共有" + searcher.maxDoc() + "条索引,命中" + hits.length + "条");
for (int i = 0; i < hits.length; i++) {
int DocId = hits[i].doc;
Document document = searcher.doc(DocId);
System.out.println("photoId==="+document.get("photoId"));
}
}
}

文件TestDb.java是操作的主文件;
内容如下:
package test;
import java.io.File;
import java.io.IOException;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.Date;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.FSDirectory;
public class TestDb {
public final static String indexDir ="E:\\TestLucene";
private static Connection getConnection() {
Connection conn = null;
String url = "jdbc:mysql://localhost:3306/test";
String userName = "root";
String password = "root";
try {
Class.forName("com.mysql.jdbc.Driver");
conn = java.sql.DriverManager
.getConnection(url, userName, password);
} catch (Exception e) {
e.printStackTrace();
System.out.println("Error Trace in getConnection() : "
+ e.getMessage());
}
return conn;
}
public static void main(String[] args) throws IOException, ParseException, SQLException {
index();//做索引
IndexSearcher searcher=null;
try{
IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexDir)));
searcher = new IndexSearcher(reader);
search(searcher);//搜索
}catch(Exception e){
e.printStackTrace();
}finally{
if(searcher!=null)
searcher.close();
}
}
public static void search(IndexSearcher searcher) throws IOException, ParseException{
//以下是搜索的关键词
String[] q = {"美女1","美女2","好人3","好人5"};
long start=new Date().getTime();
SearcherFile.search(searcher,q);
long end=new Date().getTime();
System.out.println("花费时间:"+(double)(end-start)/1000+"秒");
}
public static void index() throws SQLException{
Connection conn = null;
try {
conn = getConnection();
Photo[] list = Photo.loadPhotos(conn);
IndexerFile.indexFile(indexDir,list);
} catch (Exception e) {
e.printStackTrace();
} finally {
if (conn != null) {
conn.close();
}
}
}
}

二、下面是lucene3.5搜索txt文本文件

建一个E:\\TestLucene\\fileS的文件夹,放需要搜索的文件。
在该文件夹里面随便建三个txt文件,"1.txt","2.txt"和"3.txt"



其中1.txt的内容如下:

老周
北京人民
2009年

2.txt和3.txt也随便写些。



再建一个E:\\TestLucene\\fileIndex的文件夹;放索引文件。





java文件TestQueryFile:内容如下



package test;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Date;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.apache.lucene.document.Field;
public class TestQueryFile {

public static void main(String[] args) throws Exception {
indexF();
String queryString = "北京";
Query query = null;
IndexReader reader = IndexReader.open(FSDirectory.open(new File("E:\\TestLucene\\fileIndex")));
IndexSearcher searcher = new IndexSearcher(reader);
String fields = "body";
try {
QueryParser qp = new QueryParser(Version.LUCENE_36, fields, new StandardAnalyzer(Version.LUCENE_36));//有变化的地方
query = qp.parse(queryString);
} catch (ParseException e) {
}
if (searcher != null) {
TopDocs topDocs = searcher.search(query, 100);//100是显示队列的Size
ScoreDoc[] hits = topDocs.scoreDocs;
System.out.println("共有" + searcher.maxDoc() + "条索引,命中" + hits.length + "条");
}
}

private static void indexF() throws Exception {

File fileDir = new File("E:\\TestLucene\\fileS");


File indexDir = new File("E:\\TestLucene\\fileIndex");

IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36));
conf.setOpenMode(OpenMode.CREATE);
IndexWriter indexWriter = new IndexWriter(FSDirectory.open(indexDir), conf);

File[] textFiles = fileDir.listFiles();
long startTime = new Date().getTime();

//增加document到索引去
for (int i = 0; i < textFiles.length; i++) {
if (textFiles[i].isFile()
&& textFiles[i].getName().endsWith(".txt")) {
System.out.println("File " + textFiles[i].getCanonicalPath()
+ "正在被索引....");
String temp = FileReaderAll(textFiles[i].getCanonicalPath(),
"GBK");
System.out.println(temp);
Document document = new Document();
Field FieldPath = new Field("path", textFiles[i].getPath(),
Field.Store.YES, Field.Index.NO);
Field FieldBody = new Field("body", temp, Field.Store.YES,
Field.Index.ANALYZED,
Field.TermVector.WITH_POSITIONS_OFFSETS);
document.add(FieldPath);
document.add(FieldBody);
indexWriter.addDocument(document);
}
}
//optimize()方法是对索引进行优化
indexWriter.forceMerge(1);
indexWriter.close();

//测试一下索引的时间
long endTime = new Date().getTime();
System.out
.println("这花费了"
+ (endTime - startTime)
+ " 毫秒来把文档增加到索引里面去!"
+ fileDir.getPath());
}

private static String FileReaderAll(String FileName, String charset)
throws IOException {
BufferedReader reader = new BufferedReader(new InputStreamReader(
new FileInputStream(FileName), charset));
String line = null;
StringBuffer temp = new StringBuffer("");

while ((line = reader.readLine()) != null) {
temp.append(line);
}
reader.close();
return temp.toString();
}
}

一执行就知道结果了
 

你可能感兴趣的:(lucene3,站内搜索)