/// lucene.net:全文检索的工具包,不是应用,只是个类库,完成了全文检索的功能
/// 就是把数据拆分—存起来—查询时—拆分—匹配—结果
///
/// Analysis–分词器,负责把字符串拆分成原子,包含了标准分词,直接空格拆分
/// 项目中用的是盘古中文分词,
/// Document–数据结构,定义存储数据的格式
/// Index–索引的读写类
/// QueryParser–查询解析器,负责解析查询语句
/// Search—负责各种查询类,命令解析后得到就是查询类
/// Store—索引存储类,负责文件夹等等
/// Util—常见工具类库
///
/// lucene是全文搜索必备的,是大型系统必备的
///
/// Search:
/// TermQuery–单元查询 new Term(“title”,“张三”) title:张三
/// BoolenQuery—new Term(“title”,“张三”) and new Term(“title”,“李四”) title:张三 + title:李四
/// new Term(“title”,“张三”) or new Term(“title”,“李四”) title:张三 title:李四
/// WildcardQuery—通配符 new Term(“title”,“张?”) title:张?
/// new Term(“title”,“张*”) title:张*
/// PrefixQuery—前缀查询 以xx开头 title:张*
/// PhraseQuery—间隔距离 包含没有 包含提莫 而且二者距离不能超过5
/// title: “没有 提莫”~5
/// 没有蘑菇的提莫 没有蘑菇的蘑菇的蘑菇的提莫
/// FuzzyQuery—近似查询,ibhone----iphone title:ibhone~
/// RangeQuery—范围查询 [1,100] {1,100}
///
/// Lucene.Net一进一出,建立索引需要获取数据源,分词-保存到硬盘
/// 索引查找,
/// 自然会有些延迟,以前淘宝上架宝贝,第二天才能搜索的
/// 索引更新策略:1 数据跟新—丢一个队列—一个processor通过队列完成更新
/// 2 每一周全部索引一遍
///
/// lucene索引存的是原子–docid1,docid2,docid3
/// 不store可以大量节约空间;查找时原子匹配多个id;
///
/// 初始化索引
///
public static void InitIndex()
{
List<Commodity> commodityList = GetList();//数据源
FSDirectory directory = FSDirectory.Open(StaticConstant.TestIndexPath);//文件夹
using (IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED))//索引写入器
{
foreach (Commodity commdity in commodityList)
{
for (int k = 0; k < 10; k++)
{
Document doc = new Document();//一条数据
doc.Add(new Field("id", commdity.Id.ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED));//一个字段 列名 值 是否保存值 是否分词
doc.Add(new Field("title", commdity.Title, Field.Store.YES, Field.Index.ANALYZED));
doc.Add(new Field("url", commdity.Url, Field.Store.NO, Field.Index.NOT_ANALYZED));
doc.Add(new Field("imageurl", commdity.ImageUrl, Field.Store.NO, Field.Index.NOT_ANALYZED));
doc.Add(new Field("content", "this is lucene working,powerful tool " + k, Field.Store.YES, Field.Index.ANALYZED));
doc.Add(new NumericField("price", Field.Store.YES, true).SetDoubleValue((double)(commdity.Price + k)));
//doc.Add(new NumericField("time", Field.Store.YES, true).SetLongValue(DateTime.Now.ToFileTimeUtc()));
doc.Add(new NumericField("time", Field.Store.YES, true).SetIntValue(int.Parse(DateTime.Now.ToString("yyyyMMdd")) + k));
writer.AddDocument(doc);//写进去
}
}
writer.Optimize();//优化 就是合并
}
}
基础的查询
FSDirectory dir = FSDirectory.Open(StaticConstant.TestIndexPath);
IndexSearcher searcher = new IndexSearcher(dir);//查找器
TermQuery query = new TermQuery(new Term("title", "图书馆"));//包含
TopDocs docs = searcher.Search(query, null, 10000);//找到的数据
foreach (ScoreDoc sd in docs.ScoreDocs)
{
Document doc = searcher.Doc(sd.Doc);
Console.WriteLine("***************************************");
Console.WriteLine(string.Format("id={0}", doc.Get("id")));
Console.WriteLine(string.Format("title={0}", doc.Get("title")));
Console.WriteLine(string.Format("time={0}", doc.Get("time")));
Console.WriteLine(string.Format("price={0}", doc.Get("price")));
Console.WriteLine(string.Format("content={0}", doc.Get("content")));
}
Console.WriteLine("1一共命中了{0}个", docs.TotalHits);
关键字查询
FSDirectory dir = FSDirectory.Open(StaticConstant.TestIndexPath);
IndexSearcher searcher = new IndexSearcher(dir);//查找器
QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "title", new PanGuAnalyzer());//解析器
string keyword = "高中政治 人 教 新课 标 选修 生活 中的 法律常识";
Query query = parser.Parse(keyword);
TopDocs docs = searcher.Search(query, null, 10000);//找到的数据
int i = 0;
foreach (ScoreDoc sd in docs.ScoreDocs)
{
if (i++ < 1000)
{
Document doc = searcher.Doc(sd.Doc);
Console.WriteLine("***************************************");
Console.WriteLine(string.Format("id={0}", doc.Get("id")));
Console.WriteLine(string.Format("title={0}", doc.Get("title")));
Console.WriteLine(string.Format("time={0}", doc.Get("time")));
Console.WriteLine(string.Format("price={0}", doc.Get("price")));
}
}
Console.WriteLine($"一共命中{docs.TotalHits}");
多条件查询,除了关键字,时间,排序
FSDirectory dir = FSDirectory.Open(StaticConstant.TestIndexPath);
IndexSearcher searcher = new IndexSearcher(dir);//查找器
QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "title", new PanGuAnalyzer());//解析器
string keyword = "高中政治 人 教 新课 标 选修 生活 中的 法律常识";
Query query = parser.Parse(keyword);
NumericRangeFilter<int> timeFilter = NumericRangeFilter.NewIntRange("time", 20190101, 20191231, true, true);//过滤
SortField sortPrice = new SortField("price", SortField.DOUBLE, false);//降序
SortField sortTime = new SortField("time", SortField.INT, true);//升序
Sort sort = new Sort(sortTime, sortPrice);//排序 哪个前哪个后
TopDocs docs = searcher.Search(query, timeFilter, 10000, sort);//找到的数据
int i = 0;
foreach (ScoreDoc sd in docs.ScoreDocs)
{
if (i++ < 1000)
{
Document doc = searcher.Doc(sd.Doc);
Console.WriteLine("***************************************");
Console.WriteLine(string.Format("id={0}", doc.Get("id")));
Console.WriteLine(string.Format("title={0}", doc.Get("title")));
Console.WriteLine(string.Format("time={0}", doc.Get("time")));
Console.WriteLine(string.Format("price={0}", doc.Get("price")));
}
}
Console.WriteLine("3一共命中了{0}个", docs.TotalHits);
/// 1 索引增删改查和分词处理
/// 2 京东数据多线程建立索引
/// 3 索引查询接口封装
///
/// Lucene–封装的lucene相关操作封装
///
/// LuceneAnalyze–负责完成查询关键字解析,尽可能拆分成原子数组
/// 如果只有一个词,prefix查询 苹果*
/// 如果是多个词,换成或者关系,
/// 都是为了更多的命中结果(贪婪搜索)
/// 做个关键词清理
///
/// LuceneBulid— BuildIndex–MergeIndex 多线程写不同子路径,完成后合并
/// 增加/删除索引 更新索引-只能先删除再更新
///
/// LuceneQuery—QueryIndexPage 支持关键字,支持范围过滤 支持排序
///
/// Processor—Lucene多线程建立索引
/// IndexBuilder 入口,启动多线程创建+完成后的Merge
/// IndexBuilderPerThread 每个线程是如何完成索引建立的
///
/// DataService–CommodityLucene对外提供的搜索封装
/// CommodityRepository-SqlHelper,完成数据库数据查询
批量索引建立
IndexBuilder.Build();
int total = 0;
string pricefilter = "[50,2000]";
string priceorderby = "price desc";
List<Commodity> commoditylist = CommodityLucene.QueryCommodity(1, 30, out total, "书", null, pricefilter, priceorderby);
foreach (Commodity commodity in commoditylist)
{
Console.WriteLine("title={0},price={1}", commodity.Title, commodity.Price);
}
///
/// 索引建立
///
public class IndexBuilder
{
private static Logger logger = new Logger(typeof(IndexBuilder));
private static List<string> PathSuffixList = new List<string>();
private static CancellationTokenSource CTS = null;
public static void Build()
{
try
{
logger.Debug(string.Format("{0} BuildIndex开始",DateTime.Now));
List<Task> taskList = new List<Task>();
TaskFactory taskFactory = new TaskFactory();
CTS = new CancellationTokenSource(); //线程取消
//30个表 30个线程 不用折腾,一线程一表 平均分配
//如果我不开启30 个线程,10 个线程?
//自己去想想,怎么样可以做,随便配置线程数量,但是可以均匀分配任务?
for (int i = 1; i < 31; i++)
{
IndexBuilderPerThread thread = new IndexBuilderPerThread(i, i.ToString("000"), CTS);
PathSuffixList.Add(i.ToString("000"));
taskList.Add(taskFactory.StartNew(thread.Process));//开启一个线程 里面创建索引
}
taskList.Add(taskFactory.ContinueWhenAll(taskList.ToArray(), MergeIndex));
Task.WaitAll(taskList.ToArray()); //为了展示出多线程的异常
logger.Debug(string.Format("BuildIndex{0}", CTS.IsCancellationRequested ? "失败" : "成功"));
}
catch (Exception ex)
{
logger.Error("BuildIndex出现异常", ex);
}
finally
{
logger.Debug(string.Format("{0} BuildIndex结束", DateTime.Now));
}
}
private static void MergeIndex(Task[] tasks)
{
try
{
if (CTS.IsCancellationRequested) return;
ILuceneBulid builder = new LuceneBulid();
builder.MergeIndex(PathSuffixList.ToArray());
}
catch (Exception ex)
{
CTS.Cancel();
logger.Error("MergeIndex出现异常", ex);
}
}
}
public class IndexBuilderPerThread
{
private Logger logger = new Logger(typeof(IndexBuilderPerThread));
private int CurrentThreadNum = 0;
private string PathSuffix = "";
private CancellationTokenSource CTS = null;
public IndexBuilderPerThread(int threadNum, string pathSuffix, CancellationTokenSource cts)
{
this.CurrentThreadNum = threadNum;
this.PathSuffix = pathSuffix;
this.CTS = cts;
}
public void Process()
{
try
{
logger.Debug(string.Format("ThreadNum={0}开始创建", CurrentThreadNum));
CommodityRepository commodityRepository = new CommodityRepository();
ILuceneBulid builder = new LuceneBulid();
bool isFirst = true;
int pageIndex = 1;
while (!CTS.IsCancellationRequested)
{
List<CourseEntity> commodityList = commodityRepository.QueryList(CurrentThreadNum, pageIndex, 1000);
if (commodityList == null || commodityList.Count == 0)
{
break;
}
//else if (pageIndex == 11)
//{
// break;//为了测试 只做10000条数据
//}
else
{
builder.BuildIndex(commodityList, PathSuffix, isFirst);
logger.Debug(string.Format("ThreadNum={0}完成{1}条的创建", CurrentThreadNum, 1000 * pageIndex++));
isFirst = false;
}
}
}
catch (Exception ex)
{
CTS.Cancel();
logger.Error(string.Format("ThreadNum={0}出现异常", CurrentThreadNum), ex);
}
finally
{
logger.Debug(string.Format("ThreadNum={0}完成创建", CurrentThreadNum));
}
}
}
备注:相关类
///
/// 空格分词器
///
public class BlankAnalyzer : Analyzer
{
public override TokenStream TokenStream(string fieldName, TextReader reader)
{
return new BlankTokenizer(reader);
}
public override TokenStream ReusableTokenStream(string fieldName, TextReader reader)
{
Tokenizer tokenizer = (Tokenizer)this.PreviousTokenStream;
if (tokenizer == null)
{
tokenizer = new BlankTokenizer(reader);
this.PreviousTokenStream = tokenizer;
}
else
{
tokenizer.Reset(reader);
}
return tokenizer;
}
}
public class BlankTokenizer : CharTokenizer
{
public BlankTokenizer(TextReader in_Renamed)
: base(in_Renamed)
{
}
public BlankTokenizer(AttributeSource source, TextReader in_Renamed)
: base(source, in_Renamed)
{
}
public BlankTokenizer(AttributeSource.AttributeFactory factory, TextReader in_Renamed)
: base(factory, in_Renamed)
{
}
protected override bool IsTokenChar(char c)
{
return c != ' ';
}
}
///
/// 逗号分词器
///
public class CommaAnalyzer : Analyzer
{
public override TokenStream TokenStream(string fieldName, TextReader reader)
{
return new CommaTokenizer(reader);
}
public override TokenStream ReusableTokenStream(string fieldName, TextReader reader)
{
Tokenizer tokenizer = (Tokenizer)this.PreviousTokenStream;
if (tokenizer == null)
{
tokenizer = new CommaTokenizer(reader);
this.PreviousTokenStream = tokenizer;
}
else
{
tokenizer.Reset(reader);
}
return tokenizer;
}
}
public class CommaTokenizer : CharTokenizer
{
public CommaTokenizer(TextReader in_Renamed)
: base(in_Renamed)
{
}
public CommaTokenizer(AttributeSource source, TextReader in_Renamed)
: base(source, in_Renamed)
{
}
public CommaTokenizer(AttributeSource.AttributeFactory factory, TextReader in_Renamed)
: base(factory, in_Renamed)
{
}
protected override bool IsTokenChar(char c)
{
return c != ',';
}
}
public interface ILuceneAnalyze
{
///
/// 根据查询的field将keyword分词
///
///
///
string[] AnalyzerKey(string keyword);
}
public interface ILuceneBulid
{
///
/// 批量创建索引
///
///
/// 索引目录后缀,加在电商的路径后面,为空则为根目录.如sa\1
/// 默认为false 增量索引 true的时候删除原有索引
void BuildIndex(List<CourseEntity> ciList, string pathSuffix = "", bool isCreate = false);
///
/// 将索引合并到上级目录
///
/// 子文件夹名
void MergeIndex(string[] sourceDirs);
///
/// 新增一条数据的索引
///
///
void InsertIndex(CourseEntity ci);
///
/// 批量新增数据的索引
///
/// sourceflag统一的
void InsertIndexMuti(List<CourseEntity> ciList);
///
/// 删除一条数据的索引
///
///
void DeleteIndex(CourseEntity ci);
///
/// 批量删除数据的索引
///
/// sourceflag统一的
void DeleteIndexMuti(List<CourseEntity> ciList);
///
/// 更新一条数据的索引
///
///
void UpdateIndex(CourseEntity ci);
///
/// 批量更新数据的索引
///
/// sourceflag统一的
void UpdateIndexMuti(List<CourseEntity> ciList);
}
public interface ILuceneQuery
{
///
/// 获取课程信息数据
///
///
///
List<CourseEntity> QueryIndex(string queryString);
///
/// 分页获取商品信息数据
///
///
/// 第一页为1
///
///
///
List<CourseEntity> QueryIndexPage(string queryString, int pageIndex, int pageSize, out int totalCount, string priceFilter, string priceOrderBy);
}
public class LuceneAnalyze : ILuceneAnalyze
{
private Logger logger = new Logger(typeof(LuceneAnalyze));
#region AnalyzerKey
///
/// 将搜索的keyword分词
///
///
///
public string[] AnalyzerKey(string keyword)
{
Analyzer analyzer = new PanGuAnalyzer();
QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "title", analyzer);
Query query = parser.Parse(this.CleanKeyword(keyword));
if (query is TermQuery)
{
Term term = ((TermQuery)query).Term;
return new string[] {
term.Text };
}
else if (query is PhraseQuery)
{
Term[] term = ((PhraseQuery)query).GetTerms();
return term.Select(t => t.Text).ToArray();
}
else if (query is BooleanQuery)
{
BooleanClause[] clauses = ((BooleanQuery)query).GetClauses();
List<string> analyzerWords = new List<string>();
foreach (BooleanClause clause in clauses)
{
Query childQuery = clause.Query;
if (childQuery is TermQuery)
{
Term term = ((TermQuery)childQuery).Term;
analyzerWords.Add(term.Text);
}
else if (childQuery is PhraseQuery)
{
Term[] term = ((PhraseQuery)childQuery).GetTerms();
analyzerWords.AddRange(term.Select(t => t.Text));
}
}
return analyzerWords.ToArray();
}
else
{
logger.Debug(string.Format("AnalyzerKey在解析keyword={0}的结果为new string[] { keyword } ", keyword));
return new string[] {
keyword };
}
}
///
/// 清理头尾and or 关键字
///
///
///
private string CleanKeyword(string keyword)
{
if (string.IsNullOrWhiteSpace(keyword))
{
}
else
{
bool isClean = false;
while (!isClean)
{
keyword = keyword.Trim();
if (keyword.EndsWith(" AND"))
{
keyword = string.Format("{0}and", keyword.Remove(keyword.Length - 3, 3));
}
else if (keyword.EndsWith(" OR"))
{
keyword = string.Format("{0}or", keyword.Remove(keyword.Length - 2, 2));
}
else if (keyword.StartsWith("AND "))
{
keyword = string.Format("and{0}", keyword.Substring(3));
}
else if (keyword.StartsWith("OR "))
{
keyword = string.Format("or{0}", keyword.Substring(2));
}
else if (keyword.Contains(" OR "))
{
keyword = keyword.Replace(" OR ", " or ");
}
else if (keyword.Contains(" AND "))
{
keyword = keyword.Replace(" AND ", " and ");
}
else
isClean = true;
}
}
return QueryParser.Escape(keyword);
}
#endregion AnalyzerKey
}
///
/// 多线程的问题 :多文件写,然后合并
/// 延时:异步队列
///
///
public class LuceneBulid : ILuceneBulid
{
#region Identity
private Logger logger = new Logger(typeof(LuceneBulid));
#endregion Identity
#region 批量BuildIndex 索引合并
///
/// 批量创建索引(要求是统一的sourceflag,即目录是一致的)
///
/// sourceflag统一的
/// 索引目录后缀,加在电商的路径后面,为空则为根目录.如sa\1
/// 默认为false 增量索引 true的时候删除原有索引
public void BuildIndex(List<CourseEntity> ciList, string pathSuffix = "", bool isCreate = false)
{
IndexWriter writer = null;
try
{
if (ciList == null || ciList.Count == 0)
{
return;
}
string rootIndexPath = StaticConstant.IndexPath;
string indexPath = string.IsNullOrWhiteSpace(pathSuffix) ? rootIndexPath : string.Format("{0}\\{1}", rootIndexPath, pathSuffix);
DirectoryInfo dirInfo = Directory.CreateDirectory(indexPath);
LuceneIO.Directory directory = LuceneIO.FSDirectory.Open(dirInfo);
writer = new IndexWriter(directory, new PanGuAnalyzer(), isCreate, IndexWriter.MaxFieldLength.LIMITED);
//writer = new IndexWriter(directory, CreateAnalyzerWrapper(), isCreate, IndexWriter.MaxFieldLength.LIMITED);
writer.SetMaxBufferedDocs(100);//控制写入一个新的segent前内存中保存的doc的数量 默认10
writer.MergeFactor = 100;//控制多个segment合并的频率,默认10
writer.UseCompoundFile = true;//创建复合文件 减少索引文件数量
ciList.ForEach(c => CreateCIIndex(writer, c));
}
finally
{
if (writer != null)
{
//writer.Optimize(); 创建索引的时候不做合并 merge的时候处理
writer.Close();
}
}
}
///
/// 将索引合并到上级目录
///
/// 子文件夹名
public void MergeIndex(string[] childDirs)
{
Console.WriteLine("MergeIndex Start");
IndexWriter writer = null;
try
{
if (childDirs == null || childDirs.Length == 0) return;
Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
string rootPath = StaticConstant.IndexPath;
DirectoryInfo dirInfo = Directory.CreateDirectory(rootPath);
LuceneIO.Directory directory = LuceneIO.FSDirectory.Open(dirInfo);
writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);//删除原有的
LuceneIO.Directory[] dirNo = childDirs.Select(dir => LuceneIO.FSDirectory.Open(Directory.CreateDirectory(string.Format("{0}\\{1}", rootPath, dir)))).ToArray();
writer.MergeFactor = 100;//控制多个segment合并的频率,默认10
writer.UseCompoundFile = true;//创建符合文件 减少索引文件数量
writer.AddIndexesNoOptimize(dirNo);
}
finally
{
if (writer != null)
{
writer.Optimize();
writer.Close();
}
Console.WriteLine("MergeIndex End");
}
}
//总结来说:之前开发系统的时候,做的是数据的增删改,这里改成对索引的增删改
//Field.Store.YES:存储字段值(未分词前的字段值)
//Field.Store.NO:不存储,存储与索引没有关系
//Field.Store.COMPRESS:压缩存储,用于长文本或二进制,但性能受损
//Field.Index.ANALYZED:分词建索引
//Field.Index.ANALYZED_NO_NORMS:分词建索引,但是Field的值不像通常那样被保存,而是只取一个byte,这样节约存储空间
//Field.Index.NOT_ANALYZED:不分词且索引
//Field.Index.NOT_ANALYZED_NO_NORMS:不分词建索引,Field的值去一个byte保存
//TermVector表示文档的条目(由一个Document和Field定位)和它们在当前文档中所出现的次数
//Field.TermVector.YES:为每个文档(Document)存储该字段的TermVector
//Field.TermVector.NO:不存储TermVector
// Field.TermVector.WITH_POSITIONS:存储位置
//Field.TermVector.WITH_OFFSETS:存储偏移量
//Field.TermVector.WITH_POSITIONS_OFFSETS:存储位置和偏移量
#endregion 批量BuildIndex 索引合并
#region 单个/批量索引增删改
///
/// 新增一条数据的索引
///
///
public void InsertIndex(CourseEntity ci)
{
IndexWriter writer = null;
try
{
if (ci == null) return;
string rootIndexPath = StaticConstant.IndexPath;
DirectoryInfo dirInfo = Directory.CreateDirectory(rootIndexPath);
bool isCreate = dirInfo.GetFiles().Count() == 0;//下面没有文件则为新建索引
LuceneIO.Directory directory = LuceneIO.FSDirectory.Open(dirInfo);
writer = new IndexWriter(directory, CreateAnalyzerWrapper(), isCreate, IndexWriter.MaxFieldLength.LIMITED);
writer.MergeFactor = 100;//控制多个segment合并的频率,默认10
writer.UseCompoundFile = true;//创建符合文件 减少索引文件数量
CreateCIIndex(writer, ci);
}
catch (Exception ex)
{
logger.Error("InsertIndex异常", ex);
throw ex;
}
finally
{
if (writer != null)
{
//if (fileNum > 50)
// writer.Optimize();
writer.Close();
}
}
}
///
/// 批量新增数据的索引
///
///
public void InsertIndexMuti(List<CourseEntity> ciList)
{
BuildIndex(ciList, "", false);
}
///
/// 批量删除数据的索引
///
///
public void DeleteIndexMuti(List<CourseEntity> ciList)
{
IndexReader reader = null;
try
{
if (ciList == null || ciList.Count == 0) return;
Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
string rootIndexPath = StaticConstant.IndexPath;
DirectoryInfo dirInfo = Directory.CreateDirectory(rootIndexPath);
LuceneIO.Directory directory = LuceneIO.FSDirectory.Open(dirInfo);
reader = IndexReader.Open(directory, false);
foreach (CourseEntity ci in ciList)
{
reader.DeleteDocuments(new Term("productid", ci.CourseId.ToString()));
}
}
catch (Exception ex)
{
logger.Error("DeleteIndex异常", ex);
throw ex;
}
finally
{
if (reader != null)
{
reader.Dispose();
}
}
}
///
/// 删除多条数据的索引
///
///
public void DeleteIndex(CourseEntity ci)
{
IndexReader reader = null;
try
{
if (ci == null) return;
Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
string rootIndexPath = StaticConstant.IndexPath;
DirectoryInfo dirInfo = Directory.CreateDirectory(rootIndexPath);
LuceneIO.Directory directory = LuceneIO.FSDirectory.Open(dirInfo);
reader = IndexReader.Open(directory, false);
reader.DeleteDocuments(new Term("productid", ci.CourseId.ToString()));
}
catch (Exception ex)
{
logger.Error("DeleteIndex异常", ex);
throw ex;
}
finally
{
if (reader != null)
{
reader.Dispose();
}
}
}
/
/ 更新一条数据的索引
/
//public void UpdateIndex(Commodity ci)
//{
// DeleteIndex(ci);
// InsertIndex(ci);
//}
///
/// 更新一条数据的索引
///
///
public void UpdateIndex(CourseEntity ci)
{
IndexWriter writer = null;
try
{
if (ci == null) return;
string rootIndexPath = StaticConstant.IndexPath;
DirectoryInfo dirInfo = Directory.CreateDirectory(rootIndexPath);
bool isCreate = dirInfo.GetFiles().Count() == 0;//下面没有文件则为新建索引
LuceneIO.Directory directory = LuceneIO.FSDirectory.Open(dirInfo);
writer = new IndexWriter(directory, CreateAnalyzerWrapper(), isCreate, IndexWriter.MaxFieldLength.LIMITED);
writer.MergeFactor = 100;//控制多个segment合并的频率,默认10
writer.UseCompoundFile = true;//创建符合文件 减少索引文件数量
writer.UpdateDocument(new Term("productid", ci.CourseId.ToString()), ParseCItoDoc(ci));
}
catch (Exception ex)
{
logger.Error("InsertIndex异常", ex);
throw ex;
}
finally
{
if (writer != null)
{
//if (fileNum > 50)
// writer.Optimize();
writer.Close();
}
}
}
///
/// 批量更新数据的索引
///
/// sourceflag统一的
public void UpdateIndexMuti(List<CourseEntity> ciList)
{
IndexWriter writer = null;
try
{
if (ciList == null || ciList.Count == 0) return;
string rootIndexPath = StaticConstant.IndexPath;
DirectoryInfo dirInfo = Directory.CreateDirectory(rootIndexPath);
bool isCreate = dirInfo.GetFiles().Count() == 0;//下面没有文件则为新建索引
LuceneIO.Directory directory = LuceneIO.FSDirectory.Open(dirInfo);
writer = new IndexWriter(directory, CreateAnalyzerWrapper(), isCreate, IndexWriter.MaxFieldLength.LIMITED);
writer.MergeFactor = 50;//控制多个segment合并的频率,默认10
writer.UseCompoundFile = true;//创建符合文件 减少索引文件数量
foreach (CourseEntity ci in ciList)
{
writer.UpdateDocument(new Term("productid", ci.CourseId.ToString()), ParseCItoDoc(ci));
}
}
catch (Exception ex)
{
logger.Error("InsertIndex异常", ex);
throw ex;
}
finally
{
if (writer != null)
{
//if (fileNum > 50)
// writer.Optimize();
writer.Close();
}
}
}
#endregion 单个索引增删改
#region PrivateMethod
///
/// 创建分析器
///
///
private PerFieldAnalyzerWrapper CreateAnalyzerWrapper()
{
Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
PerFieldAnalyzerWrapper analyzerWrapper = new PerFieldAnalyzerWrapper(analyzer);
analyzerWrapper.AddAnalyzer("title", new PanGuAnalyzer());
analyzerWrapper.AddAnalyzer("categoryid", new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30));
return analyzerWrapper;
}
///
/// 创建索引
///
///
///
///
private void CreateCIIndex(IndexWriter writer, CourseEntity ci)
{
try
{
writer.AddDocument(ParseCItoDoc(ci));
}
catch (Exception ex)
{
logger.Error("CreateCIIndex异常", ex);
throw ex;
}
}
///
/// 将Commodity转换成doc
///
///
///
private Document ParseCItoDoc(CourseEntity ci)
{
Document doc = new Document();
doc.Add(new Field("id", ci.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.Add(new Field("title", ci.Title, Field.Store.YES, Field.Index.ANALYZED));//盘古分词
doc.Add(new Field("courseId", ci.CourseId.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.Add(new Field("categoryid", ci.CategoryId.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.Add(new Field("imageurl", ci.ImageUrl, Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.Add(new Field("url", ci.Url, Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.Add(new NumericField("price", Field.Store.YES, true).SetFloatValue((float)ci.Price));
return doc;
}
#endregion PrivateMethod
}
public class LuceneQuery : ILuceneQuery
{
#region Identity
private Logger logger = new Logger(typeof(LuceneQuery));
#endregion Identity
#region QueryIndex
///
/// 获取课程信息数据
///
///
///
public List<CourseEntity> QueryIndex(string queryString)
{
IndexSearcher searcher = null;
try
{
List<CourseEntity> ciList = new List<CourseEntity>();
Directory dir = FSDirectory.Open(StaticConstant.IndexPath);
searcher = new IndexSearcher(dir);
Analyzer analyzer = new PanGuAnalyzer();
//--------------------------------------这里配置搜索条件
QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "title", analyzer);
Query query = parser.Parse(queryString);
Console.WriteLine(query.ToString()); //显示搜索表达式
TopDocs docs = searcher.Search(query, (Filter)null, 10000);
foreach (ScoreDoc sd in docs.ScoreDocs)
{
Document doc = searcher.Doc(sd.Doc);
ciList.Add(DocumentToCommodityInfo(doc));
}
return ciList;
}
finally
{
if (searcher != null)
{
searcher.Dispose();
}
}
}
///
/// 分页获取课程信息数据
///
///
/// 第一页为1
///
///
///
public List<CourseEntity> QueryIndexPage(string queryString, int pageIndex, int pageSize, out int totalCount, string priceFilter, string priceOrderBy)
{
totalCount = 0;
IndexSearcher searcher = null;
try
{
List<CourseEntity> ciList = new List<CourseEntity>();
FSDirectory dir = FSDirectory.Open(StaticConstant.IndexPath);
searcher = new IndexSearcher(dir);
Analyzer analyzer = new PanGuAnalyzer();
//--------------------------------------这里配置搜索条件
QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "title", analyzer);
Query query = parser.Parse(queryString);
pageIndex = Math.Max(1, pageIndex);//索引从1开始
int startIndex = (pageIndex - 1) * pageSize;
int endIndex = pageIndex * pageSize;
NumericRangeFilter<float> numPriceFilter = null;
if (!string.IsNullOrWhiteSpace(priceFilter))
{
bool isContainStart = priceFilter.StartsWith("[");
bool isContainEnd = priceFilter.EndsWith("]");
string[] floatArray = priceFilter.Replace("[", "").Replace("]", "").Replace("{", "").Replace("}", "").Split(',');
float start = 0;
float end = 0;
if (!float.TryParse(floatArray[0], out start) || !float.TryParse(floatArray[1], out end))
{
throw new Exception("Wrong priceFilter");
}
numPriceFilter = NumericRangeFilter.NewFloatRange("price", start, end, isContainStart, isContainEnd);
}
Sort sort = new Sort();
if (!string.IsNullOrWhiteSpace(priceOrderBy))
{
SortField sortField = new SortField("price", SortField.FLOAT, priceOrderBy.EndsWith("asc", StringComparison.CurrentCultureIgnoreCase));
sort.SetSort(sortField);
}
TopDocs docs = searcher.Search(query, numPriceFilter, 500, sort);
//TopDocs docs = searcher.Search(query, null, 10000);
totalCount = docs.TotalHits;
//PrintScores(docs, startIndex, endIndex, searcher);
for (int i = startIndex; i < endIndex && i < totalCount; i++)
{
Document doc = searcher.Doc(docs.ScoreDocs[i].Doc);
ciList.Add(DocumentToCommodityInfo(doc));
}
return ciList;
}
finally
{
if (searcher != null)
{
searcher.Dispose();
}
}
}
private void PrintScores(TopDocs docs, int startIndex, int endIndex, MultiSearcher searcher)
{
ScoreDoc[] scoreDocs = docs.ScoreDocs;
for (int i = startIndex; i < endIndex && i < scoreDocs.Count(); i++)
{
int docId = scoreDocs[i].Doc;
Document doc = searcher.Doc(docId);
logger.Info(string.Format("{0}的分值为{1}", doc.Get("productid"), scoreDocs[i].Score));
}
}
#endregion QueryIndex
#region private
private CourseEntity DocumentToCommodityInfo(Document doc)
{
return new CourseEntity()
{
Id = int.Parse(doc.Get("id")),
Title = doc.Get("title"),
CourseId = long.Parse(doc.Get("productid")),
CategoryId = int.Parse(doc.Get("categoryid")),
ImageUrl = doc.Get("iamgeurl"),
Price = decimal.Parse(doc.Get("price")),
Url = doc.Get("url")
};
}
#endregion private
}
public class CommodityLucene
{
private static Logger logger = new Logger(typeof(CommodityLucene));
#region QueryCommodity
///
/// 用lucene进行商品查询
///
///
///
///
///
///
/// [13,50] 13,50且包含13到50 {13,50} 13,50且不包含13到50
/// price desc price asc
///
public static List<CourseEntity> QueryCommodity(int pageIndex, int pageSize, out int totalCount, string keyword, List<int> categoryIdList, string priceFilter, string priceOrderBy)
{
totalCount = 0;
try
{
if (string.IsNullOrWhiteSpace(keyword) && (categoryIdList == null || categoryIdList.Count == 0)) return null;
ILuceneQuery luceneQuery = new LuceneQuery();
string queryString = string.Format(" {0} {1}",
string.IsNullOrWhiteSpace(keyword) ? "" : string.Format(" +{0}", AnalyzerKeyword(keyword)),
categoryIdList == null || categoryIdList.Count == 0 ? "" : string.Format(" +{0}", AnalyzerCategory(categoryIdList)));
return luceneQuery.QueryIndexPage(queryString, pageIndex, pageSize, out totalCount, priceFilter, priceOrderBy);
}
catch (Exception ex)
{
logger.Error(string.Format("QueryCommodity参数为{0}出现异常", keyword), ex);
return null;
}
}
#endregion QueryCommodity
///
/// 为keyword做盘古分词
///
///
///
///
private static string AnalyzerKeyword(string keyword)
{
StringBuilder queryStringBuilder = new StringBuilder();
ILuceneAnalyze analyzer = new LuceneAnalyze();
string[] words = analyzer.AnalyzerKey(keyword);
if (words.Length == 1)
{
queryStringBuilder.AppendFormat("{0}:{1}* ", "title", words[0]);
}
else
{
StringBuilder fieldQueryStringBuilder = new StringBuilder();
foreach (string word in words)
{
queryStringBuilder.AppendFormat("{0}:{1} ", "title", word);
}
}
string result = queryStringBuilder.ToString().TrimEnd();
logger.Info(string.Format("AnalyzerKeyword 将 keyword={0}转换为{1}", keyword, result));
return result;
}
///
/// 为类别做custom分词
///
///
///
private static string AnalyzerCategory(List<int> categoryIdList)
{
return string.Join(" ", categoryIdList.Select(c => string.Format("{0}:{1}", "categoryid", c)));
}
}
}
///
/// 数据库查询
///
public class CommodityRepository //: IRepository
{
private Logger logger = new Logger(typeof(CommodityRepository));
public void SaveList(List<CourseEntity> commodityList)
{
if (commodityList == null || commodityList.Count == 0) return;
IEnumerable<IGrouping<string, CourseEntity>> group = commodityList.GroupBy<CourseEntity, string>(c => GetTableName(c));
foreach (var data in group)
{
SqlHelper.InsertList<CourseEntity>(data.ToList(), data.Key);
}
}
private string GetTableName(CourseEntity commodity)
{
return string.Format("Tencent_Subject_{0}", (commodity.CourseId % 30 + 1).ToString("000"));
}
///
/// 分页获取商品数据
///
///
/// 从1开始
///
///
public List<CourseEntity> QueryList(int tableNum,int pageIndex, int pageSize)
{
string sql = string.Format("SELECT top {2} * FROM Tencent_Subject_{0} WHERE id>{1};", tableNum.ToString("000"), pageSize * Math.Max(0, pageIndex - 1), pageSize);
return SqlHelper.QueryList<CourseEntity>(sql);
}
}
public class SqlHelper
{
private static Logger logger = new Logger(typeof(SqlHelper));
private static string ConnStr = ConfigurationManager.ConnectionStrings["TencentConn"].ConnectionString;
public static void ExecuteNonQuery(string sql)
{
//try
//{
using (SqlConnection sqlConn = new SqlConnection(ConnStr))
{
sqlConn.Open();
SqlCommand cmd = new SqlCommand(sql, sqlConn);
cmd.ExecuteNonQuery();//.ExecuteNonQueryAsync();//
}
//}
//catch (Exception ex)
//{
//}
//finally
//{
//}
}
public static List<T> QueryList<T>(string sql) where T : new()
{
using (SqlConnection sqlConn = new SqlConnection(ConnStr))
{
sqlConn.Open();
SqlCommand cmd = new SqlCommand(sql, sqlConn);
cmd.CommandTimeout = 120;
return TransList<T>(cmd.ExecuteReader());
}
}
public static void Insert<T>(T model, string tableName) where T : new()
{
string sql = GetInsertSql<T>(model, tableName);
ExecuteNonQuery(sql);
}
public static void InsertList<T>(List<T> list, string tableName) where T : new()
{
string sql = string.Join(" ", list.Select(t => GetInsertSql<T>(t, tableName)));
ExecuteNonQuery(sql);
}
#region Private
private static string GetInsertSql<T>(T model, string tableName)
{
StringBuilder sbSql = new StringBuilder();
StringBuilder sbFields = new StringBuilder();
StringBuilder sbValues = new StringBuilder();
Type type = model.GetType();
var properties = type.GetProperties();
foreach (PropertyInfo p in properties)
{
string name = p.Name;
if (!name.Equals("id", StringComparison.OrdinalIgnoreCase))
{
sbFields.AppendFormat("[{0}],", name);
sbValues.AppendFormat("'{0}',", p.GetValue(model));
}
}
sbSql.AppendFormat("INSERT INTO {0} ({1}) VALUES ({2});",tableName, sbFields.ToString().TrimEnd(','), sbValues.ToString().TrimEnd(','));
return sbSql.ToString();
}
private static List<T> TransList<T>(SqlDataReader reader) where T : new()
{
List<T> tList = new List<T>();
Type type = typeof(T);
var properties = type.GetProperties();
if (reader.Read())
{
do
{
T t = new T();
foreach (PropertyInfo p in properties)
{
p.SetValue(t, Convert.ChangeType(reader[p.Name], p.PropertyType));
}
tList.Add(t);
}
while (reader.Read());
}
return tList;
}
private static T TransModel<T>(SqlDataReader reader) where T : new()
{
T t = new T();
if (reader.Read())
{
do
{
Type type = typeof(T);
var properties = type.GetProperties();
foreach (PropertyInfo p in properties)
{
p.SetValue(t, Convert.ChangeType(reader[p.Name], p.PropertyType));
}
}
while (reader.Read());
}
return t;
}
#endregion Private
}