using System; using System.Collections.Generic; using System.Linq; using System.Web; using System.Web.UI; using System.Web.UI.WebControls; using Lucene.Net.Index; using Lucene.Net.Analysis.Standard; using Lucene.Net.Documents; using System.Text; using System.IO; using Lucene.Net.Search; using Lucene.Net.QueryParsers; public partial class Default2 : System.Web.UI.Page { StandardAnalyzer analy = new StandardAnalyzer(); protected void Page_Load(object sender, EventArgs e) { //Lucene.Net.Store.Directory dir = new Lucene.Net.Store.RAMDirectory(); 这是内存存贮用到的 //Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.GetDirectory(Server.MapPath("index/"),false); //上面这个是存储到文件时使用的,为true时将删除已有索引库文件,可以通过IndexReader.IndexExists()方法判断。 IndexWriter index = new IndexWriter(Server.MapPath("index/"), analy, true); Document doc = new Document(); Fieldable field = new Field("key","1",Field.Store.YES,Field.Index.UN_TOKENIZED); doc.Add(field); index.AddDocument(doc); index.Optimize(); index.Close(); } protected void Button1_Click(object sender, EventArgs e) { IndexSearcher search = new IndexSearcher(Server.MapPath("index/")); IndexReader read = search.GetIndexReader(); Query query = new PrefixQuery(new Term("key","1")); BooleanQuery q = new BooleanQuery(); q.Add(query, BooleanClause.Occur.MUST); Hits hit = search.Search(q); for (int i = 0; i < hit.Length(); i++) { this.TextBox1.Text += hit.Doc(i).Get("key"); } query.Clone(); search.Close(); } }
StandardAnalyzer是自带的分词器,是按单个字分词,如果要实现中文分词需要从网上下载ChineseAnalyzer中文分词器。
另附高亮显示分词的代码,注意,Lucene.net的高亮类不在Lucene.Net.Search下,而是一个单独的dll,在下载包里有,叫Lucene.Net.Highlighter.dll。
//设置高亮 SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'>", "</font>"); Highlighter lit = new Highlighter(simpleHTMLFormatter,new Lucene.Net.Highlight.QueryScorer(q)); for (int i = begin; hit != null && i < end; i++) { if (tStr != null && !tStr.Equals("")) { index.Title = tStr; } else { index.Title = hit.Doc(i).Get("title"); } list.Add(index); }
如果出现表单提交后页面加载两次,是因为这个:<input name="btn" type="submit" value="提交" onclick="sub()"/>
KeywordAnalyzer分词,没有任何变化;
SimpleAnalyzer对中文效果太差;
StandardAnalyzer对中文单字拆分;
StopAnalyzer和SimpleAnalyzer差不多;
WhitespaceAnalyzer只按空格划分。
Query q = MultiFieldQueryParser.Parse(new string[]{"id","name"},new string[]{"123","asd"},new WhitespaceAnalyzer()) 这是多字段组合查询的用法,但是第一个条件不能使通配符的查询。
SortField s = new SortField("title",SortField.SCORE,false);
SortField time = new SortField("time", true);
Sort sort = new Sort(new SortField[] { s,time });
这里的SortField.SCORE指的是关键字的匹配度。