Lucene.NET 开发实现

最近在帮一个朋友忙,帮他们一个软件设计一下架构,该应用程序某核心逻辑设计数据量较大,客户对查询要求又很高。这种需求除了在数据库设计要考虑水平分表,分区视图之类的设计,在程序中也要考虑效率问题,于是就决定使用LUCENE.NET将核心数据做索引文件,作假全文搜索,这样就算数据量由千万级别,查询也只在几秒钟完成,对性能帮助还是有很大帮助。之前也没仔细了解过LUCENE方面技术,正好学习一下。

LUCENE.NET是JAVA移植到.NET平台上的开源技术,技术资料也很丰富。

以下是创建索引代码:

  public static void Run()

        {

            QryPage qryPage = new QryPage();

            qryPage.PerPageSize = 350;

            qryPage.PageNumber = 0;

            qryPage.PageCount = 10000;

            qryPage.NeedInitPageNo = false;

            List<AutoParts> packages = new List<AutoParts>();

            while (qryPage.PageNumber < qryPage.PageCount)

            {

                IList<AutoParts> autoPartses = new CustomerQuery().QueryAutoParts(new AutoPartDTO(),  ref qryPage);//获取索引数据

                foreach (var p in autoPartses)

                {

                    if (!IsValidProduct(p))

                    {

                        continue;

                    }

                     packages.Add(p);



                }

              qryPage.PageNumber++;

            }



            //

            // Write search item index to file.

            //

            Write(packages);



        }



        public static void Write(List<AutoParts> packages)

        {

            build( packages);

        }



        public static void build( List<AutoParts> packages)

        {

        

            var writer = new IndexWriter(Common.ProductIndexPath, new EsayTooAnalyzer(), true);

            try

            {

                writer.SetMaxFieldLength(1000);

                writer.SetUseCompoundFile(true);

                Logger.Info("Indexing to directory '" + Common.ProductIndexPath + "'...");

                DateTime start = System.DateTime.Now;

                indexDocs(writer, packages);

                

                Logger.Info("Optimizing...");

                writer.Optimize();

                writer.Close();

               

                DateTime end = System.DateTime.Now;

                //Console.Out.WriteLine(end.Ticks - start.Ticks + " total milliseconds");



                Logger.Info(end.Ticks - start.Ticks + " total milliseconds");

            }

            catch (Exception e)

            {

                Console.WriteLine(e.Message);

            }

        }



        public static void UpdateIndex(AutoParts dto)

        {

            try

            {

                Term tm = new Term("id", dto.Id.ToString());

                var qerty = new TermQuery(tm);

                     var productIndexReader = IndexReader.Open(Common.ProductIndexPath);

             var   searcher = new IndexSearcher(productIndexReader);

                var his = searcher.Search(qerty);



                var   reader = IndexReader.Open(Common.ProductIndexPath);

                reader.DeleteDocuments(tm);

                var writer = new IndexWriter(Common.ProductIndexPath, new EsayTooAnalyzer(), false);



                AddDocument(dto, writer);

                writer.Optimize();

                writer.Close();

            }

            catch (Exception e)

            {

                Console.WriteLine("添加索引出错,配件ID:" + dto.Id + "\n");

                Console.Write(e.Message);

            }

         

          }

        public static void AddDocument(AutoParts dto)

        {

            try

            {

                var writer = new IndexWriter(Common.ProductIndexPath, new EsayTooAnalyzer(), false);

                AddDocument(dto, writer);

                writer.Optimize();

                writer.Close();

            }

            catch (Exception e)

            {

                Console.WriteLine("添加索引出错,配件ID:"+dto.Id+"\n");

                Console.WriteLine(e.Message);

                

                throw;

            }

       

        }



        private static void AddDocument(AutoParts package, IndexWriter getWriter)

        {

            Document doc = new Document();



            doc.Add(new Field("id", package.Id.ToString(), Field.Store.YES, Field.Index.UN_TOKENIZED));

            doc.Add(new Field("CarCategoryId", package.CarCategoryId.ToString(), Field.Store.YES, Field.Index.UN_TOKENIZED));

            doc.Add(new Field("Name", package.Name, Field.Store.YES, Field.Index.UN_TOKENIZED));

            doc.Add(new Field("Code", package.Code, Field.Store.YES, Field.Index.UN_TOKENIZED));

            doc.Add(new Field("FSPrice", package.FSPrice, Field.Store.YES, Field.Index.NO));

            doc.Add(new Field("YCPrice", package.YCPrice, Field.Store.YES, Field.Index.NO));

            doc.Add(new Field("YCCost", package.YCCost, Field.Store.YES, Field.Index.NO));

            doc.Add(new Field("YCSupplier", package.YCSupplier, Field.Store.YES, Field.Index.NO));

            doc.Add(new Field("FCPrice", package.FCPrice, Field.Store.YES, Field.Index.NO));

            doc.Add(new Field("FCCost", package.FCCost, Field.Store.YES, Field.Index.NO));

            //doc.Add(new Field("FCSupplier", new StringReader(package.FCSupplier) ));

            doc.Add(new Field("FCSupplier", package.FCCost, Field.Store.YES, Field.Index.NO));

            doc.Add(new Field("CCPrice", package.CCPrice, Field.Store.YES, Field.Index.NO));

            doc.Add(new Field("CCCost", package.CCCost, Field.Store.YES, Field.Index.NO));

            doc.Add(new Field("CCSupplier", package.CCSupplier, Field.Store.YES, Field.Index.NO));

            doc.Add(new Field("Repire", package.Repire, Field.Store.YES, Field.Index.NO));

            doc.Add(new Field("AskPriceInfo", package.AskPriceInfo, Field.Store.YES, Field.Index.NO));

            doc.Add(new Field("AskCustomer", package.AskCustomer, Field.Store.YES, Field.Index.NO));

            doc.Add(new Field("Description", package.Description, Field.Store.YES, Field.Index.NO));

            doc.Add(new Field("Picture1", package.Picture1.ToString(), Field.Store.YES, Field.Index.NO));

            doc.Add(new Field("Picture2", package.Picture2.ToString(), Field.Store.YES, Field.Index.NO));

            doc.Add(new Field("Picture3", package.Picture3.ToString(), Field.Store.YES, Field.Index.NO));

            doc.Add(new Field("IsAvaliable", package.IsAvaliable.ToString(), Field.Store.YES, Field.Index.NO));

            doc.Add(new Field("CarTypeTags", package.CarTypeTags, Field.Store.YES, Field.Index.TOKENIZED));

            doc.Add(new Field("ModifiedTime", package.ModifiedTime.ToShortDateString(), Field.Store.YES, Field.Index.UN_TOKENIZED));

            doc.Add(new Field("ModifiedBy", package.ModifiedBy, Field.Store.YES, Field.Index.NO));





            getWriter.AddDocument(doc);

        }



        private static void indexDocs(IndexWriter writer, List<AutoParts> packages)

        {

            try

            {

                int i = 0;

                foreach (var package in packages)

                {

                    i++;

                    Console.WriteLine("生成索引顺序"+i);

                    AddDocument(package, writer);

                }

            }

            catch (Exception e)

            {

                Console.Write(e.Message);

            }

        }

        private static bool IsValidProduct(AutoParts autoParts)

        {

            return true;

        }

    }

  其中更新索引方法还在调试,因为发现删除索引方法不成功,还在调试

 下面是查询核心算法,其中也包含了分页查询,完全可以按照数据库一致的方式来进行查询,核心数据底层查询就如下查询即可

public static List<AutoPartDTO> Query(QueryCritiriaDTO dto, ref QryPage page)// int pageIndex, int pageSize, out int totalRec)

        {

            IndexSearcher searcher;

                    if (page.PageNumber == 0)

                    page.PageNumber = 1;



                Sort sort = new Sort(new SortField("id", SortField.DOC, false));



                Query query = CreateQuery(dto);

                MutiFilter filter = CreateFilter(dto);



                query = filter.getFilterQuery(query);



                var productIndexReader = IndexReader.Open(Common.ProductIndexPath);

                searcher = new IndexSearcher(productIndexReader);

            try

            {

                TopDocs topDocs = searcher.Search(query, null, page.PageNumber * page.PerPageSize, sort);

                page.TotalCount = topDocs.totalHits;

                page.PageCount = (int)Math.Ceiling((decimal)page.TotalCount / (decimal)page.PerPageSize);



                if (page.PageCount == 1 || page.PageCount == 0)

                    return TopDocs2Data(searcher, topDocs.scoreDocs);



                return TopDocs2Data(searcher,topDocs.scoreDocs, page);

            }

            catch (Exception e)

            {

                Console.WriteLine("查询出错");

                Console.WriteLine(e.Message);

                return new List<AutoPartDTO>();

            }

            finally

            {

                searcher.Close();

            }

        }



        private static Query CreateQuery(QueryCritiriaDTO dto)

        {

            var booleanQuery = new BooleanQuery();



            if (dto.CatetoryL3 != -1 && dto.CatetoryL3 != 0)

            {

                TermQuery searcher = new TermQuery(new Term("CarCategoryId", dto.CatetoryL3.ToString()));

                booleanQuery.Add(searcher, BooleanClause.Occur.MUST);

            }



            if (dto.CatetoryL4 != -1 && dto.CatetoryL4 != 0)

            {

                FuzzyQuery searcher = new FuzzyQuery(new Term("CarTypeTags", dto.CatetoryL4.ToString()), 0.3f);

                booleanQuery.Add(searcher, BooleanClause.Occur.MUST);

            }



            if (!string.IsNullOrEmpty(dto.Name))

            {

                //FuzzyQuery wildcardQuery = new FuzzyQuery(new Term("Name", dto.Name));

                TermQuery searcher = new TermQuery(new Term("Name", dto.Name));

                booleanQuery.Add(searcher, BooleanClause.Occur.MUST);

            }



            if (!string.IsNullOrEmpty(dto.Code))

            {

                TermQuery searcher = new TermQuery(new Term("Code", dto.Code));

                booleanQuery.Add(searcher, BooleanClause.Occur.MUST);

            }



            if (!string.IsNullOrEmpty(dto.SupplierId))

            {

                TermQuery searcher = new TermQuery(new Term("SupplierId", dto.SupplierId));

                booleanQuery.Add(searcher, BooleanClause.Occur.MUST);

            }



            return booleanQuery;

        }



        private static MutiFilter CreateFilter(QueryCritiriaDTO dto)

        {



            MutiFilter mf = new MutiFilter();



                   if (dto.Start != CP.Utils.DateTimeUtil.MIN_DATETIME && dto.End != CP.Utils.DateTimeUtil.MIN_DATETIME)

            {

                mf.AddRangeFilter("ModifiedTime", dto.Start.ToShortDateString(), dto.End.ToShortDateString());

            }

         //   RangeFilter rf3 = new RangeFilter("ModifiedTime", dto.Start.ToShortDateString(), dto.End.ToShortDateString(),true, true);





            return mf;







        }



        #region 获取最终的数据

        /// <summary>

        /// 获取最终的数据

        /// </summary>

        /// <param name="scoreDoc"></param>

        /// <param name="pageIndex"></param>

        /// <param name="pageSize"></param>

        /// <param name="totalRec"></param>

        /// <returns></returns>

        private static List<AutoPartDTO> TopDocs2Data(IndexSearcher searcher, ScoreDoc[] scoreDoc, QryPage page)// int pageIndex, int pageSize, int totalRec)

        {

            int start = (page.PageNumber - 1) * page.PerPageSize;

            int end = page.PageNumber * page.PerPageSize;

            if (end > page.TotalCount)

                end = page.TotalCount;



            List<AutoPartDTO> list = new List<AutoPartDTO>();

            for (int index = start; index < end; index++)

            {

                Document doc = searcher.Doc(scoreDoc[index].doc);

                // Document doc = Common.GenerateSearcher().Doc(sd.doc);

                AutoPartDTO autoPartDto = new AutoPartDTO() { };

                autoPartDto.Id = long.Parse(doc.Get("id"));

                autoPartDto.Name = doc.Get("Name");

                

                list.Add(autoPartDto);



            }

            return list;

        }

        /// <summary>

        /// 获取最终的数据

        /// </summary>

        /// <param name="docs"></param>

        /// <returns></returns>

        private static List<AutoPartDTO> TopDocs2Data(IndexSearcher searcher,  ScoreDoc[] docs)

        {

            if (docs == null || docs.Length == 0)

                return null;

            List<AutoPartDTO> list = new List<AutoPartDTO>();

            foreach (ScoreDoc sd in docs)

            {

                Document doc = searcher.Doc(sd.doc);

                AutoPartDTO autoPartDto = new AutoPartDTO() { };



                autoPartDto.Id = long.Parse(doc.Get("id"));

                autoPartDto.Name = doc.Get("Name");

             

                list.Add(autoPartDto);

            }

            return list;

        }

        #endregion

    }

    public class Common

    {

        public static string ProductIndexPath

        {

            get { return IndexStoredDirectory; }

        }



        private static string IndexStoredDirectory = AppDomain.CurrentDomain.BaseDirectory + "auto.index";

    

    }



    public class MutiFilter

    {

        private List<Filter> filterList;

        public MutiFilter()

        {

            filterList = new List<Filter>();

        }

        public void AddFilter(String Field, String Value)

        {

            Term term = new Term(Field, Value);//添加term

            QueryFilter filter = new QueryFilter(new TermQuery(term));//添加过滤器

            filterList.Add(filter);//加入List,可以增加多個过滤

        }

        public void AddRangeFilter(string Field, string start, string end)

        {

            Term ts = new Term(Field, start);

            Term te = new Term(Field, end);

            var q = new RangeQuery(ts, te, true);

            //var q = new RangeQuery(begin, end, true);

            var filter = new QueryFilter(q);

            filterList.Add(filter);//加入List,可以增加多個过滤

        }



        public Query getFilterQuery(Query query)

        {

            for (int i = 0; i < filterList.Count; i++)

            {

                //取出多個过滤器,在结果中再次定位结果

                query = new FilteredQuery(query, filterList[i]);

            }

            return query;

        }



    }

   

    public class EsayTooTokenizer : CharTokenizer

    {

        public EsayTooTokenizer(TextReader reader)

            : base(reader)

        {

        }



        //单纯按照“,” 空格 分词

        protected override bool IsTokenChar(char c)

        {

            return c == ',' || c == ' ' ? false : true;

        }

    }



    public class EsayTooAnalyzer : Analyzer//自定义最简单的分词器
{ public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader) { return new EsayTooTokenizer(reader); } }

  

你可能感兴趣的:(Lucene)