利用lucene给网站、系统增加搜索功能

有些网站的搜索功能都是直接使用like %关键词%方式对数据库进行关键词查找，不过这种方式速度比较慢，而且影响数据库服务器性能。
其实我们可以先把数据从数据库查询出来，利用lucene建立索引。以后每次查找都从索引中查找，可以提高查询速度和减轻服务器负担。
本篇用到的技术：lucene3.0.2,IKAnalyzer3.2.5
search.properties主要是配置搜索的信息，内容：

sql=select iId,title,content,credate from archeive //指定查找sql，需要建立索引的数据
update.field=iId
update.value=
search.condition=title,content                     //搜索时的查找字段
index.path=D:/project/Java/lucene/WebContent/WEB-INF/classes/Index  //索引的保存地址
period=10000                                      //更新索引的时间间隔

com.search.util.SearchConfig主要是读取search.properties的信息。

public class SearchConfig

{
    private Properties searchPro;
    private String searchFile = "search.properties";
    private String SQL = "sql";
    private String CONDITION = "search.condition";
    private String INDEX = "index.path";
    public SearchConfig()

{
        initSearch();
    }

    public void initSearch()

{
        searchPro = PropertiesUtil.getProperties(searchFile);
    }

    public String getSql()

{
        return searchPro.getProperty(SQL, "");
    }

    public String getCondition()

{
        return searchPro.getProperty(CONDITION, "");
    }

    public File getIndexPath()

{
        String path = searchPro.getProperty(INDEX, "");
        File file = new File(path);
        if (!file.exists())

{
            file.mkdir();
        }
        return file;
    }

    public long getPeriod()

{
        String period = searchPro.getProperty("period", "0");
        return Integer.valueOf(period);
    }

    public String getUpdateField()

{
        return searchPro.getProperty("update.field", "");
    }

    public String getUpdateValue()

{
        return searchPro.getProperty("update.value", "");
    }

    public void save()

{
PropertiesUtil.saveProperties(searchPro, searchFile);
}
}

com.search.util.LuceneUtil代码介绍，主要是生成索引和搜索。

public class LuceneUtil

{
    private File indexpath = null;
    private String sql = null;
    private String condition = null;
    private String updateField = null;
    private String updateValue = null;
    private SearchConfig sc = null;
    public LuceneUtil()

{
        sc = new SearchConfig();
        indexpath = sc.getIndexPath();
        sql = sc.getSql();
        condition = sc.getCondition();
        updateField = sc.getUpdateField();
        updateValue = sc.getUpdateValue();
        if(!updateValue.equals(""))

{
            sql = sql + " where " + updateField + " > " + updateValue;
        }
    }

    public void createIndex()

{
        System.out.println("==========正在生成数据库索引。");
        //把数据库中的数据查询出来，
        ResultSet rs = SQLHelper.getResultSet(sql);
        try

{
            //打开索引文件
            FSDirectory directory = FSDirectory.open(indexpath);
            Analyzer analyzer = new IKAnalyzer();
            IndexWriter writer = new IndexWriter(FSDirectory.open(indexpath),
                    analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
            while (rs.next())

{
                Document doc = new Document();
                doc.add(new Field("id", String.valueOf(rs.getInt(1)),
                        Field.Store.YES, Field.Index.ANALYZED));
                doc.add(new Field("title", rs.getString(2), Field.Store.YES,
                        Field.Index.ANALYZED));
                doc.add(new Field("content", rs.getString(3), Field.Store.YES,
                        Field.Index.ANALYZED));
                writer.addDocument(doc);
            }
            writer.close();
            directory.close();
        } catch (Exception e)

{
            e.printStackTrace();
        }
    }

    public List<Document> search(String keyword)

{
List<Document> list = new ArrayList<Document>();
try

{
            FSDirectory directory = FSDirectory.open(indexpath);
            IndexReader reader = IndexReader.open(directory, true);
            IndexSearcher isearcher = new IndexSearcher(reader);
            isearcher.setSimilarity(new IKSimilarity());

            if(keyword == null || keyword.equals(""))

{
                return list;
            }
            Query query = IKQueryParser.parseMultiField(condition.split(","), keyword);

            // 搜索相似度最高的10条记录
            TopDocs topDocs = isearcher.search(query, 10);

            // 输出结果
            ScoreDoc[] scoreDocs = topDocs.scoreDocs;

            for (int i = 0; i < topDocs.totalHits; i++)

{
                Document targetDoc = isearcher.doc(scoreDocs[i].doc);
                list.add(targetDoc);
            }

            isearcher.close();
            directory.close();
        } catch (Exception e)

{
            e.printStackTrace();
        }
        return list;
    }
}

com.search.listener.Indexlistener启动索引更新程序

public class Indexlistener implements ServletContextListener

{
public void contextInitialized(javax.servlet.ServletContextEvent arg0)

{
        new IndexTask();
    }

    public void contextDestroyed(javax.servlet.ServletContextEvent arg0)

{
}
}

com.search.listener.IndexTask定时更新索引

public class IndexTask

{
public IndexTask()

{
        Timer timer = new Timer();
        SearchConfig sc = new SearchConfig();
        timer.schedule(new Task(), new Date(), sc.getPeriod());
    }

    static class Task extends TimerTask

{
public void run()

{
            LuceneUtil lu = new LuceneUtil();
            lu.createIndex();
        }
    }
}

com.search.util.RedHighlighter关键词高亮显示

public class RedHighlighter

{

public static String getBestFragment(String keyword, String field, String word)

{
        SearchConfig sc = new SearchConfig();
        String condition = sc.getCondition();
        try

{
            Query query = IKQueryParser.parseMultiField(condition.split(","), keyword);
            SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(
                    "<font color='red'>", "</font>");
            Highlighter highlighter = new Highlighter(simpleHTMLFormatter,
                    new QueryScorer(query));
            highlighter.setTextFragmenter(new SimpleFragmenter(100));
            String c = highlighter.getBestFragment(new IKAnalyzer(),
                    field, word);
            return c;
        }
        catch(Exception e)

{
            e.printStackTrace();
        }
        return "";
    }
}

index.jsp搜索页面

@ page language="java" contentType="text/html; charset=GBK"
pageEncoding="GBK" %>
<%

@page import="com.search.util.LuceneUtil" %>
<%

@page import="java.util.*" %>
<%

@page import="org.apache.lucene.document.Document" %>
<%

@page import="com.search.util.RedHighlighter" %>
<%

@page import="java.net.URLEncoder" %> < html >
<! DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"

"http://www.w3.org/TR/html4/loose.dtd" >
< head >
< meta http-equiv ="Content-Type" content ="text/html; charset=GBK" >
< title > 搜索 </ title >
< link rel ="stylesheet" href ="./style/style.css" type ="text/css" >
</ head >
<%

    //request.setCharacterEncoding("GBK");
    String w = request.getParameter("w");
    int size = 0;
    long time = 0;
    List<Document> list = null;
    if(w != null && !w.equals("")){
        w = new String(w.getBytes("ISO8859-1"), "GBK");
    }
    else{
        w = "";
    }

    LuceneUtil lu = new LuceneUtil();
    Date start = new Date();
    list = lu.search(w);
    Date end = new Date();
    size = list.size();
    time = end.getTime() - start.getTime();
%>
< script type ="text/javascript" >

function submit()

{

}
</ script >
< body >
< div class ="seachInput" align ="center" >
< form method ="get" action ="index.jsp" >< br >
< input type ="text" class ="txtSeach" id ="w" name ="w" value ="<%=w %>"
     >< input type ="submit"
    class ="btnSearch" onclick ="submit" value ="找一下" >     < br >
</ form >
</ div >
< div id ="searchInfo" >< span style ="float: left; margin-left: 15px;" ></ span > 找到相关内容 <% = size %> 篇，

用时 <% = time %> 毫秒
</ div >
< div id ="main" >
< div id ="searchResult" >
< div class ="forflow" >
<%

    if(list != null && list.size() > 0){
        for(Document doc:list){
            String title = RedHighlighter.getBestFragment(w, "title", doc.get("title"));
            String content = RedHighlighter.getBestFragment(w, "content", doc.get

("content"));
             %>
             < div class ="searchItem" >
                 < a href ="#" class ="searchItemTitle" target ="_blank" > <% = title %> </ a >
                 < div class ="searchCon" >
                     <% = content %>
                 </ div >
             </ div >
             <%

        }
    }

%>
</ div >
</ div >
</ div >
</ body >
</ html >

运行效果:

附件: 完整代码

利用lucene给网站、系统增加搜索功能

你可能感兴趣的:(利用lucene给网站、系统增加搜索功能)