Lucene.net 全文检索 盘古分词

lucene.net + 盘古分词

引用:

1.Lucene.Net.dll

2.PanGu.Lucene.Analyzer.dll 

3.PanGu.HighLight.dll

4.PanGu.dll

 1 using Lucene.Net.Search;

 2 using Lucene.Net.Store;

 3 using Lucene.Net.QueryParsers;

 4 using Lucene.Net.Documents;

 5 using Lucene.Net.Index;

 6 using Lucene.Net.Analysis.Standard;

 7 using Lucene.Net.Analysis;

 8 using Lucene.Net.Analysis.PanGu;

 9 using PanGu.HighLight;

10 using PanGu;

 

1.建立索引:

 1 static string path = @"G:\indextest";//索引文件储存位置

 2 

 3 static void CreateIndex()

 4         {

 5             //创建索引库目录

 6             var directory = FSDirectory.Open(new DirectoryInfo(path));

 7             Analyzer analyzer = null;

 8             //analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);

 9 

10             if (isPangu)

11             {

12                 analyzer = new PanGuAnalyzer();//盘古Analyzer

13             }

14             else

15             {

16                 analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);

17             }

18 

19             //创建一个索引,采用StandardAnalyzer对句子进行分词

20             IndexWriter indexWriter = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

21             MySqlConnection conn = new MySqlConnection(@"server=localhost;User Id=root;password=123456;Database=ecshop");

22             conn.Open();

23             MySqlCommand cmd = new MySqlCommand("select goods_name,goods_brief from ecs_goods", conn);

24             MySqlDataReader reader = cmd.ExecuteReader();

25             while (reader.Read())

26             {

27                 //域的集合:文档,类似于表的行

28                 Document doc = new Document();

29                 //要索引的字段

30                 doc.Add(new Field("goods_name", reader["goods_name"].ToString(), Field.Store.YES, Field.Index.ANALYZED));

31                 doc.Add(new Field("goods_brief", reader["goods_brief"].ToString(), Field.Store.YES, Field.Index.ANALYZED));

32                 indexWriter.AddDocument(doc);

33             }

34             reader.Close();

35             //对索引文件进行优化

36             indexWriter.Optimize();

37             indexWriter.Close();

38         }

 

2.搜索:

 1      protected void Page_Load(object sender, EventArgs e)

 2         {

 3             keyword = Request.Form["q"];

 4             if (keyword != null && keyword != "")

 5             {

 6                 var watch = Stopwatch.StartNew();

 7                 Analyzer analyzer = null;

 8                 analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);

 9 

10                 //搜索

11                 IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(new DirectoryInfo(path)), true);

12 

13                 //查询表达式

14                 QueryParser queryP = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "goods_name", analyzer);

15 

16                 //query.parse:注入查询条件

17                 Query query = queryP.Parse(keyword);

18                 var hits = searcher.Search(query, 200);

19 

20                 //create highlighter

21                 //IFormatter formatter = new SimpleHTMLFormatter("<span style=\"font-weight:bold;color: red;\">", "</span>");

22                 //SimpleFragmenter fragmenter = new SimpleFragmenter(80);

23                 //var scorer = new QueryScorer(query);

24                 //Highlighter highlighter = new Highlighter(formatter, scorer);

25                 //highlighter.TextFragmenter = fragmenter;

26 

27                 //PanGu create highlighter

28                 PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter =

29                    new PanGu.HighLight.SimpleHTMLFormatter("<span style=\"font-weight:bold;color: red;\">", "</span>");

30                 PanGu.HighLight.Highlighter highlighter =

31                     new PanGu.HighLight.Highlighter(simpleHTMLFormatter,

32                     new Segment());

33                 highlighter.FragmentSize = 50;

34 

35                 for (int i = 0; i < hits.totalHits; i++)

36                 {

37                     Document doc = searcher.Doc(hits.scoreDocs[i].doc);

38                     //TokenStream stream = analyzer.TokenStream("goods_name", new StringReader(doc.Get("goods_name")));

39                     //String sample = highlighter.GetBestFragments(stream, doc.Get("goods_name"), 2, "...");

40                     goods g = new goods();

41                     g.goods_name = highlighter.GetBestFragment(keyword, doc.Get("goods_name"));

42                     g.goods_brief = highlighter.GetBestFragment(keyword, doc.Get("goods_brief"));

43                     gs.Add(g);

44                 }

45 

46                 watch.Stop();

47 

48                 tasktime = "搜索耗费时间:" + watch.ElapsedMilliseconds + "毫秒";

49             }

50         }

 多字段搜索

1  string[] fields = { "Title", "Content" };

2                 MultiFieldQueryParser mq = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_29, fields, analyzer);

3                 Query multiquery = mq.Parse(keyword);// MultiFieldQueryParser.Parse(Lucene.Net.Util.Version.LUCENE_29, new string[] { keyword }, fields, analyzer);

4                 var hits1 = searcher.Search(multiquery, 200);

 

你可能感兴趣的:(Lucene)