用lucene3.5搜索数据库和txt文件内容

我们以前经常碰到搜索数据库的内容;用like %的sql语句;如果数据量大而且多表查询时;速度实在让人难以忍受。。。如果用lucene3.5那就可以把这个恼人的问题解决了。
<wbr></wbr>
lucene3.5搜索photo表的title,username,tagname,desr内容;
用一个例题来说明更直观;此例题能搜索中文分词;
(需要mysql5的jdbc包和lucene3.5的包):
<wbr></wbr>
1、数据库我用mysql5;建一个photo表;数据库名是test。
--
-- 表的结构 `photo`
--
CREATE TABLE IF NOT EXISTS `photo` (
<wbr> `photo_id` int(10) unsigned NOT NULL AUTO_INCREMENT,<br><wbr> `title` varchar(11) DEFAULT NULL,<br><wbr> `descr` text,<br><wbr> `user_name` varchar(11) DEFAULT NULL,<br><wbr> `tag_name` varchar(11) DEFAULT NULL,<br><wbr> PRIMARY KEY (`photo_id`)<br> ) ENGINE=InnoDB<wbr> DEFAULT CHARSET=utf8 ROW_FORMAT=REDUNDANT AUTO_INCREMENT=5 ;</wbr></wbr></wbr></wbr></wbr></wbr></wbr>
--
-- 导出表中的数据 `photo`
--
INSERT INTO `photo` (`photo_id`, `title`, `descr`, `user_name`, `tag_name`) VALUES
(1, '美女', '美女', '好人5', '美女'),
(2, '美女', '美女', '美女', '美女'),
(3, 'hagh', '说的就是我的是', '', NULL),
(4, 'hagh', '说的就是我的是', ' ', NULL);
<wbr></wbr>
2、java文件有4个:
<wbr></wbr>
文件Photo.java是数据库的photo表的操作文件;内容如下:
package test;
import java.sql.Connection;
import java.util.ArrayList;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
public class Photo {
<wbr>private long photoId;<br><wbr>private String title;<br><wbr>private String description;<br><wbr>private String userName;<br><wbr>private String tag;<br><wbr>public String getDescription() {<br><wbr> return description;<br><wbr>}<br><wbr>public void setDescription(String description) {<br><wbr> this.description = description;<br><wbr>}<br><wbr>public long getPhotoId() {<br><wbr> return photoId;<br><wbr>}<br><wbr>public void setPhotoId(long photoId) {<br><wbr> this.photoId = photoId;<br><wbr>}<br><wbr>public String getTag() {<br><wbr> return tag;<br><wbr>}<br><wbr>public void setTag(String tag) {<br><wbr> this.tag = tag;<br><wbr>}<br><wbr>public String getTitle() {<br><wbr> return title;<br><wbr>}<br><wbr>public void setTitle(String title) {<br><wbr> this.title = title;<br><wbr>}<br><wbr>public String getUserName() {<br><wbr> return userName;<br><wbr>}<br><wbr>public void setUserName(String userName) {<br><wbr> this.userName = userName;<br><wbr>}<br><wbr>public static Photo[] loadPhotos(Connection con) throws Exception {<br><wbr> ArrayList&lt;Photo&gt; list = new ArrayList&lt;Photo&gt;();<br><wbr> PreparedStatement pstm = null;<br><wbr> ResultSet rs = null;<br><wbr> String sql = "select photo_id,title,descr,user_name,tag_name from photo";<br><wbr> try {<br><wbr><wbr> pstm = con.prepareStatement(sql);<br><wbr><wbr> rs = pstm.executeQuery();<br><wbr><wbr> while (rs.next()) {<br><wbr><wbr><wbr> Photo photo = new Photo();<br><wbr><wbr><wbr> photo.setPhotoId(rs.getLong(1));<br><wbr><wbr><wbr> photo.setTitle(rs.getString(2));<br><wbr><wbr><wbr> photo.setDescription(rs.getString(3));<br><wbr><wbr><wbr> photo.setUserName(rs.getString(4));<br><wbr><wbr><wbr> photo.setTag(rs.getString(5));<br><wbr><wbr><wbr> list.add(photo);<br><wbr><wbr> }<br><wbr> } catch (SQLException e) {<br><wbr><wbr> e.printStackTrace();<br><wbr> } finally {<br><wbr><wbr> if (rs != null) {<br><wbr><wbr><wbr> rs.close();<br><wbr><wbr> }<br><wbr><wbr> if (pstm != null) {<br><wbr><wbr><wbr> pstm.close();<br><wbr><wbr> }<br><wbr> }<br><wbr> return (Photo[]) list.toArray(new Photo[list.size()]);<br><wbr>}<br> }</wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr>

文件IndexerFile.java是把数据库的内容备份成索引文件到磁盘中去;
内容如下:
package test;
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.apache.lucene.document.Field;
public class IndexerFile {
<wbr>public static int indexFile(String indexDir,Photo[] list) throws IOException{<br><wbr> IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35));<br><wbr><wbr><wbr><wbr> conf.setOpenMode(OpenMode.CREATE);<br><wbr><wbr><wbr><wbr> IndexWriter writer = new IndexWriter(FSDirectory.open(new File(indexDir)), conf);<br><wbr><br><wbr> for(int i=0;i&lt;list.length;i++){<br><wbr><wbr> Document doc=new Document();<br><wbr><wbr> doc.add(new Field("photoId", String.valueOf(list[i].getPhotoId()), Field.Store.YES, Field.Index.NO));<br><wbr><wbr> if(list[i].getTitle()!=null &amp;&amp; list[i].getTitle().length()&gt;0)<br><wbr><wbr><wbr> doc.add(new Field("title", list[i].getTitle(), Field.Store.YES, Field.Index.ANALYZED));<br><wbr><wbr> if(list[i].getDescription()!=null &amp;&amp; list[i].getDescription().length()&gt;0)<br><wbr><wbr><wbr> doc.add(new Field("description", list[i].getDescription(), Field.Store.YES, Field.Index.ANALYZED));<br><wbr><wbr> if(list[i].getUserName()!= null &amp;&amp; list[i].getUserName().length()&gt;0)<br><wbr><wbr> doc.add(new Field("userName", list[i].getUserName(), Field.Store.YES, Field.Index.ANALYZED));<br><wbr><wbr> if(list[i].getTag()!= null &amp;&amp; list[i].getTag().length()&gt;0)<br><wbr><wbr><wbr> doc.add(new Field("tag", list[i].getTag(), Field.Store.YES, Field.Index.ANALYZED));<br><wbr><wbr> writer.addDocument(doc);<br><wbr> }<br><wbr><br><wbr> int numIndexed = writer.maxDoc();<br><wbr> writer.forceMerge(1);<br><wbr> writer.close();<br><wbr> return numIndexed;<br><wbr>}<br> }<br></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr>
<wbr></wbr>
文件SearcherFile.java是搜索磁盘索引文件内容的;
内容如下:
package test;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Version;
public class SearcherFile {
<wbr>public static void search(IndexSearcher searcher, String[] q) throws IOException, ParseException {<br><wbr> Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35);<br><wbr> String[] fields = {"title","description","tag","userName"};<wbr><wbr><wbr><wbr><br><wbr><wbr><wbr><wbr><wbr><wbr><wbr> Query query = MultiFieldQueryParser.parse(Version.LUCENE_35, q, fields, analyzer);<br><wbr><wbr><wbr><wbr><wbr><wbr><wbr> TopDocs topDocs = searcher.search(query, 100);//100是显示队列的Size<br><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr> ScoreDoc[] hits = topDocs.scoreDocs;<br><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr> System.out.println("共有" + searcher.maxDoc() + "条索引,命中" + hits.length + "条");<br><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr> for (int i = 0; i &lt; hits.length; i++) {<br><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr> int DocId = hits[i].doc;<br><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr> Document document = searcher.doc(DocId);<br><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr> System.out.println("photoId==="+document.get("photoId"));<br><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr> }<br><wbr>}<br> }</wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr>
<wbr></wbr>
文件TestDb.java是操作的主文件;
内容如下:
package test;
import java.io.File;
import java.io.IOException;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.Date;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.FSDirectory;
public class TestDb {
<wbr>public final static String indexDir ="E:\\TestLucene";<br><wbr>private static Connection getConnection() {<br><wbr> Connection conn = null;<br><wbr> String url = "jdbc:mysql://localhost:3306/test";<br><wbr> String userName = "root";<br><wbr> String password = "root";<br><wbr> try {<br><wbr><wbr> Class.forName("com.mysql.jdbc.Driver");<br><wbr><wbr> conn = java.sql.DriverManager<br><wbr><wbr><wbr><wbr> .getConnection(url, userName, password);<br><wbr> } catch (Exception e) {<br><wbr><wbr> e.printStackTrace();<br><wbr><wbr> System.out.println("Error Trace in getConnection() : "<br><wbr><wbr><wbr><wbr> + e.getMessage());<br><wbr> }<br><wbr> return conn;<br><wbr>}<br><wbr>public static void main(String[] args) throws IOException, ParseException, SQLException {<br><wbr> index();//做索引<br><wbr> IndexSearcher searcher=null;<br><wbr> try{<br><wbr><wbr> IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexDir)),false);<wbr><br><wbr><wbr><wbr><wbr><wbr> searcher = new IndexSearcher(reader);<br><wbr><wbr> search(searcher);//搜索<br><wbr> }catch(Exception e){<br><wbr><wbr> e.printStackTrace();<br><wbr> }finally{<br><wbr><wbr> if(searcher!=null)<br><wbr><wbr> searcher.close();<br><wbr> }<br><wbr>}<br><wbr>public static void search(IndexSearcher searcher) throws IOException, ParseException{<br><wbr> //以下是搜索的关键词<br><wbr> String[] q = {"美女1","美女2","好人3","好人5"};<br><wbr> long start=new Date().getTime();<br><wbr> SearcherFile.search(searcher,q);<br><wbr> long end=new Date().getTime();<br><wbr> System.out.println("花费时间:"+(double)(end-start)/1000+"秒");<br><wbr>}<br><wbr>public static void index() throws SQLException{<br><wbr> Connection conn = null;<br><wbr> try {<br><wbr><wbr> conn = getConnection();<br><wbr><wbr> Photo[] list = Photo.loadPhotos(conn);<br><wbr><wbr> IndexerFile.indexFile(indexDir,list);<br><wbr> } catch (Exception e) {<br><wbr><wbr> e.printStackTrace();<br><wbr> } finally {<br><wbr><wbr> if (conn != null) {<br><wbr><wbr><wbr> conn.close();<br><wbr><wbr> }<br><wbr> }<br><wbr>}<br> }<br><wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr>
二、下面是lucene3.5搜索txt文本文件
<wbr></wbr>
建一个E:\\TestLucene\\fileS的文件夹,放需要搜索的文件。
在该文件夹里面随便建三个txt文件,"1.txt","2.txt"和"3.txt"

<wbr></wbr>

其中1.txt的内容如下:<wbr><wbr></wbr></wbr>

老周
北京人民<wbr><br> 2009年</wbr>

2.txt和3.txt也随便写些。

<wbr></wbr>

再建一个E:\\TestLucene\\fileIndex的文件夹;放索引文件。

<wbr></wbr>

<wbr></wbr>

java文件TestQueryFile:内容如下

<wbr></wbr>

package test;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Date;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.apache.lucene.document.Field;
public class TestQueryFile {
<wbr><br><wbr><wbr><wbr><wbr><wbr> public static void main(String[] args) throws Exception {<br><wbr><wbr><wbr><wbr><wbr><wbr><wbr> indexF();<wbr><br><wbr><wbr><wbr><wbr><wbr><wbr><wbr> String queryString = "北京";<br><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr> Query query = null;<br><wbr><wbr><wbr><wbr><wbr><wbr> IndexReader reader = IndexReader.open(FSDirectory.open(new File("E:\\TestLucene\\fileIndex")),true);//read-only<br><wbr><wbr><wbr><wbr><wbr> IndexSearcher searcher = new IndexSearcher(reader);<br><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr> String fields = "body";<br><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr> try {<br><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr> QueryParser qp = new QueryParser(Version.LUCENE_35, fields, new StandardAnalyzer(Version.LUCENE_35));//有变化的地方<br><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr> query = qp.parse(queryString);<br><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr> } catch (ParseException e) {<br><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr> }<br><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr> if (searcher != null) {<br><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr> TopDocs topDocs = searcher.search(query, 100);//100是显示队列的Size<br><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr> ScoreDoc[] hits = topDocs.scoreDocs;<br><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr> System.out.println("共有" + searcher.maxDoc() + "条索引,命中" + hits.length + "条");<br><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr> }<br><wbr><wbr><wbr><wbr><wbr><wbr> }<br><wbr><br><wbr><wbr><wbr><wbr><wbr> private static void indexF() throws Exception {<br><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><br><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr> File fileDir = new File("E:\\TestLucene\\fileS");<br><wbr><wbr><br><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><br><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr> File indexDir = new File("E:\\TestLucene\\fileIndex");<br><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><br><wbr><wbr><wbr><wbr><wbr><wbr><wbr> IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35));<br><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr>

你可能感兴趣的:(lucene3.5)