[置顶] Lucene和PanGu(盘古分词)

 目前最新版本盘古分词下载:http://download.csdn.net/detail/ysq5202121/4469105

先用盘古分词生成索引,然后对进行查询语句进行分词查询。比较简单。

using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using Lucene.Net.Analysis;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Lucene.China;
using Lucene.Net.Analysis.PanGu;
using System.Data;
using System.Data.SqlClient;
using System.IO;
using Lucene.Net.Analysis.Standard;
using PanGu;
using System.Text;
using System.Diagnostics;

    /// <summary>
    /// 获取数据源
     /// </summary>
    /// <returns></returns>
    public DataTable GetTable()
    {
        DataTable dt = new DataTable();
        using (SqlConnection con = new SqlConnection("Data Source=HQ-M-YSQ;Initial Catalog=SpiderDB;Integrated Security=True"))
        {
            SqlDataAdapter da = new SqlDataAdapter("select * from Fourm", con);

            da.Fill(dt);
            da.Dispose();
        }
        return dt;
    }

    /// <summary>
    /// 生成索引
    /// </summary>
    public void CreateIndex()
    {


        String Paths=Server.MapPath("./")+"DataIndex";
        PanGuAnalyzer analyzer=new PanGuAnalyzer();
        IndexWriter indexWritr = new IndexWriter(Paths, analyzer, true);
        DataTable dt=GetTable();
        foreach (DataRow item in dt.Rows)
        {
            Document doc = new Document();
            doc.Add(new Field("PID", item["id"].ToString(),Field.Store.YES,Field.Index.TOKENIZED));
            doc.Add(new Field("PTitle", item["title"].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
            doc.Add(new Field("PContext", item["context"].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
            indexWritr.AddDocument(doc);
        }
        indexWritr.Close();
    }

    protected void Button1_Click1(object sender, EventArgs e)
    {

        CreateIndex();
    }

    protected void Button2_Click(object sender, EventArgs e)
    {
       
        Search(txtContext.Text.Trim());

    }
     //查询
     public void Search(String keyWord)
    {
        Stopwatch sw = new Stopwatch();  
        StringBuilder builder = new StringBuilder(); 
        PanGuTokenizer tokenizer = new PanGuTokenizer();
        ICollection<WordInfo> list= tokenizer.SegmentToWordInfos(keyWord);
        foreach (WordInfo word in list)
        {
            if (word == null)
            {
                continue;
            }
            keyWord = keyWord + word + ",";
            builder.AppendFormat("{0}^{1}", word.Word, (int)Math.Pow(3, word.Rank));
        }
        keyWord = keyWord.Substring(0, keyWord.Length - 1);
        PanGuAnalyzer analyzer = new PanGuAnalyzer(true);
        MultiFieldQueryParser Parser = new MultiFieldQueryParser(new string[] { "PTitle", "PContext" }, analyzer);
        Query query = Parser.Parse(builder.ToString());
        String Paths = Server.MapPath("./") + "DataIndex";
        IndexSearcher search = new IndexSearcher(Paths);
        sw.Start();
        Hits hits=   search.Search(query);
        sw.Stop();
        StringBuilder sb = new StringBuilder();
        for (var i = 0; i < hits.Length(); i++)
        {
            PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("<font color='red'>", "</font>");
            //创建高亮,输入HTML代码和 盘古对象Semgent  
            PanGu.HighLight.Highlighter highter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new Segment());  

            Document doc = hits.Doc(i);
            String title = highter.GetBestFragment(keyWord, doc.Get("PTitle"));
            String context = highter.GetBestFragment(keyWord, doc.Get("PContext"));
            sb.Append("标题:" + title + "<br/>" + context + doc.Get("PContext") + "<br/>");
        }

        search.Close();
        Response.Write(sb + "用时:" + (sw.Elapsed.TotalMilliseconds).ToString());
    }

你可能感兴趣的:(String,object,Lucene,search,query,button)