ElasticSearch系列学习
ElasticSearch第一步-环境配置
ElasticSearch第二步-CRUD之Sense
ElasticSearch第三步-中文分词
ElasticSearch第四步-查询详解
ElasticSearch第五步-.net平台下c#操作ElasticSearch详解
前面我们讲解了关于ElasticSearch的安装配置,以及CRUD
本章我将讲解怎么使用c#操作ElasticSearch。
首先你需要一定的技术储备,比如:asp.net webapi,mvc,jsonp,knockout。这些知识在这里不再讲解,请自行Google。
项目DEMO介绍
搜索和索引功能我是以服务(webapi项目)方式提供的,在客户端(mvc项目)中的view视图中,直接使用ajax(jsonp格式)方式调用webapi,然后使用knockout绑定到table上的。
项目结构如图:
引入驱动
工欲善其事必先利其器,首先我们需要在Supernova.Webapi层中引入操作ElasticSearch的驱动dll PlainElastic.Net。
如图:
封装操作ElasticSearch的ElasticSearchHelper
demo中涉及的实体对象模型
////// ik分词结果对象 /// public class ik { public Listtokens { get; set; } } public class tokens { public string token { get; set; } public int start_offset { get; set; } public int end_offset { get; set; } public string type { get; set; } public int position { get; set; } } /// /// 测试数据对象 /// public class personList { public personList() { this.list = new List(); } public int hits { get; set; } public int took { get; set; } public List list { get; set; } } public class person { public string id { get; set; } public string name { get; set; } public int age { get; set; } public bool sex { get; set; } public DateTime birthday { get; set; } public string intro { get; set; } }
详细介绍ElasticSearchHelper里面的方法
1.索引文档(注意:索引文档之前先用配置filed对应的ik分词):
public IndexResult Index(string indexName, string indexType, string id, string jsonDocument) { var serializer = new JsonNetSerializer(); string cmd = new IndexCommand(indexName, indexType, id); OperationResult result = Client.Put(cmd, jsonDocument); var indexResult = serializer.ToIndexResult(result.Result); return indexResult; } public IndexResult Index(string indexName, string indexType, string id, object document) { var serializer = new JsonNetSerializer(); var jsonDocument = serializer.Serialize(document); return Index(indexName, indexType, id, jsonDocument); }
2.对单个字段的全文检索,字段intro 包含词组key中的任意一个单词。例如:词组(中国美好),只要每条数据的intro字段包含"中国"或者"美好"就返回。
public personList Search(string indexName, string indexType, string key,int from ,int size) { string cmd = new SearchCommand(indexName, indexType); string query = new QueryBuilder () //1 查询 .Query(b => b.Bool(m => //并且关系 m.Must(t => //分词的最小单位或关系查询 t.QueryString(t1=>t1.DefaultField("intro").Query(key)) //.QueryString(t1 => t1.DefaultField("name").Query(key)) // t .Terms(t2=>t2.Field("intro").Values("研究","方鸿渐")) //范围查询 // .Range(r => r.Field("age").From("100").To("200") ) ) ) ) //分页 .From(from) .Size(size) //排序 // .Sort(c => c.Field("age", SortDirection.desc)) //添加高亮 .Highlight(h => h .PreTags("") .PostTags("") .Fields( f => f.FieldName("intro").Order(HighlightOrder.score), f => f.FieldName("_all") ) ) .Build(); string result = Client.Post(cmd, query); var serializer = new JsonNetSerializer(); var list = serializer.ToSearchResult (result); personList datalist = new personList(); datalist.hits = list.hits.total; datalist.took = list.took; var personList= list.hits.hits.Select(c => new Supernova.Webapi.DbHelper.person() { id=c._source.id, age=c._source.age, birthday =c._source.birthday, intro=string.Join("",c.highlight["intro"]), //高亮显示的内容,一条记录中出现了几次 name=c._source.name, sex=c._source.sex, }); datalist.list.AddRange(personList); return datalist; }
3.字段intro 或者name 包含词组key中的所有单词。例如:词组(中国美好),只要每条数据的intro或者name字段包含"中国"并且包含"美好"就返回。
public personList SearchFullFileds(string indexName, string indexType, string key, int from, int size) { MustQuery mustNameQueryKeys = new MustQuery (); MustQuery mustIntroQueryKeys = new MustQuery (); var arrKeys = GetIKTokenFromStr(key); foreach (var item in arrKeys) { mustNameQueryKeys = mustNameQueryKeys.Term(t3 => t3.Field("name").Value(item)) as MustQuery ; mustIntroQueryKeys = mustIntroQueryKeys.Term(t3 => t3.Field("intro").Value(item)) as MustQuery ; } string cmd = new SearchCommand(indexName, indexType); string query = new QueryBuilder () //1 查询 .Query(b => b.Bool(m => m.Should(t => t.Bool(m1 => m1.Must( t2 => //t2.Term(t3=>t3.Field("name").Value("研究")) // .Term(t3=>t3.Field("name").Value("方鸿渐")) mustNameQueryKeys ) ) ) .Should(t => t.Bool(m1 => m1.Must(t2 => //t2.Term(t3 => t3.Field("intro").Value("研究")) //.Term(t3 => t3.Field("intro").Value("方鸿渐")) mustIntroQueryKeys ) ) ) ) ) //分页 .From(from) .Size(size) //排序 // .Sort(c => c.Field("age", SortDirection.desc)) //添加高亮 .Highlight(h => h .PreTags("") .PostTags("") .Fields( f => f.FieldName("intro").Order(HighlightOrder.score), f => f.FieldName("name").Order(HighlightOrder.score) ) ) .Build(); string result = Client.Post(cmd, query); var serializer = new JsonNetSerializer(); var list = serializer.ToSearchResult (result); personList datalist = new personList(); datalist.hits = list.hits.total; datalist.took = list.took; var personList = list.hits.hits.Select(c => new Supernova.Webapi.DbHelper.person() { id = c._source.id, age = c._source.age, birthday = c._source.birthday, intro = c.highlight==null||!c.highlight.Keys.Contains("intro") ? c._source.intro : string.Join("", c.highlight["intro"]), //高亮显示的内容,一条记录中出现了几次 name = c.highlight==null||!c.highlight.Keys.Contains("name") ? c._source.name : string.Join("", c.highlight["name"]), sex = c._source.sex }); datalist.list.AddRange(personList); return datalist; }
3.搜索age在1到500之间,并且字段intro 或者name 包含词组key中的所有单词。
public personList SearchFullFiledss(string indexName, string indexType, string key, int from, int size) { MustQuery mustNameQueryKeys = new MustQuery (); MustQuery mustIntroQueryKeys = new MustQuery (); var arrKeys = GetIKTokenFromStr(key); foreach (var item in arrKeys) { mustNameQueryKeys = mustNameQueryKeys.Term(t3 => t3.Field("name").Value(item)) as MustQuery ; mustIntroQueryKeys = mustIntroQueryKeys.Term(t3 => t3.Field("intro").Value(item)) as MustQuery ; } string cmd = new SearchCommand(indexName, indexType); string query = new QueryBuilder () //1 查询 .Query(b => b.Bool(m => m.Must(t => t.Range(r => r.Field("age").From("1").To("500")) .Bool(ms => ms.Should(ts => ts.Bool(m1 => m1.Must( t2 => //t2.Term(t3=>t3.Field("name").Value("研究")) // .Term(t3=>t3.Field("name").Value("方鸿渐")) // mustNameQueryKeys ) ) ) .Should(ts => ts.Bool(m1 => m1.Must(t2 => //t2.Term(t3 => t3.Field("intro").Value("研究")) //.Term(t3 => t3.Field("intro").Value("方鸿渐")) // mustIntroQueryKeys ) ) ) ) ) ) ) //分页 .From(from) .Size(size) //排序 // .Sort(c => c.Field("age", SortDirection.desc)) //添加高亮 .Highlight(h => h .PreTags("") .PostTags("") .Fields( f => f.FieldName("intro").Order(HighlightOrder.score), f => f.FieldName("name").Order(HighlightOrder.score) ) ) .Build(); string result = Client.Post(cmd, query); var serializer = new JsonNetSerializer(); var list = serializer.ToSearchResult (result); personList datalist = new personList(); datalist.hits = list.hits.total; datalist.took = list.took; var personList = list.hits.hits.Select(c => new Supernova.Webapi.DbHelper.person() { id = c._source.id, age = c._source.age, birthday = c._source.birthday, intro = c.highlight==null||!c.highlight.Keys.Contains("intro") ? c._source.intro : string.Join("", c.highlight["intro"]), //高亮显示的内容,一条记录中出现了几次 name = c.highlight==null||!c.highlight.Keys.Contains("name") ? c._source.name : string.Join("", c.highlight["name"]), sex = c._source.sex }); datalist.list.AddRange(personList); return datalist; }
需要用到的方法:将语句用ik分词,返回分词结果的集合
private ListGetIKTokenFromStr(string key) { string s = "/db_test/_analyze?analyzer=ik"; var result = Client.Post(s, "{"+key+"}"); var serializer = new JsonNetSerializer(); var list = serializer.Deserialize(result, typeof(ik)) as ik; return list.tokens.Select(c=>c.token).ToList(); }
ASP.NET WebApi 调用ElasticSearchHelper
1.首先我们添加一个基类ApiController
public class BaseApiController : ApiController { public MongoDatabase db; public MongoCollection col = null;//用于直接返回查询的json public BaseApiController() { } public BaseApiController(string collectionName) { db = DbHelper.MongodbHelper.Instance.DB; col = db.GetCollection(collectionName); } public string GetStringRequest(string paramter) { return HttpContext.Current.Request.QueryString[paramter] ?? ""; } public int? GetIntRequest(string paramter) { string tmp = HttpContext.Current.Request.QueryString[paramter] ?? ""; int tag = 0; if (!int.TryParse(tmp, out tag)) { return null; } return tag; } }
2.操作ElasticSearch的apicontroller如下:
3.索引数据的api如下:
////// 索引数据 /// ///[Route("estest/index")] [HttpGet] public object index() { int length = S.test.Length; Random rd = new Random(); Random rdName = new Random(); ParallelOptions _po = new ParallelOptions(); _po.MaxDegreeOfParallelism = 4; Parallel.For(0, 10000000, _po, c => { var start = rd.Next(0, S.test.Length - 700); var startName = rd.Next(0, S.test.Length - 30); person p = new person() { age = DateTime.Now.Millisecond, birthday = DateTime.Now, id = Guid.NewGuid().ToString(), intro = S.test.Substring(start, 629) + c, name = S.test.Substring(startName, 29) + c, sex = true }; ElasticSearchHelper.Intance.Index("db_test", "person", Guid.NewGuid().ToString(), p); }); return 1; }
索引使用的测试数据如下:
4.搜索api如下:
[Route("estest")] [HttpGet] public object Search() { //1 搜索数据 string key = GetStringRequest("Key"); int? from = GetIntRequest("from"); int? size = GetIntRequest("size"); return ElasticSearchHelper.Intance.Search("db_test", "person", key ?? "方鸿渐", from == null ? 0 : from.Value, size == null ? 20 : size.Value); } [Route("estest/SearchFullFileds")] [HttpGet] public object SearchFullFileds() { //1 搜索数据 string key = GetStringRequest("Key"); int? from = GetIntRequest("from"); int? size = GetIntRequest("size"); return ElasticSearchHelper.Intance.SearchFullFileds ("db_test", "person", key ?? "方鸿渐", from == null ? 0 : from.Value, size == null ? 20 : size.Value); } [Route("estest/SearchFullFiledss")] [HttpGet] public object SearchFullFiledss() { //1 搜索数据 string key = GetStringRequest("Key"); int? from = GetIntRequest("from"); int? size = GetIntRequest("size"); return ElasticSearchHelper.Intance.SearchFullFiledss ("db_test", "person", key ?? "方鸿渐", from == null ? 0 : from.Value, size == null ? 20 : size.Value); }
WebSite中的view视图调用webapi
说明:我是直接使用ajax(jsop格式)调用webapi,返回的数据直接用knockout绑定到table中的。
视图代码如下:
@{ ViewBag.Title = "ElasticSearch测试"; Layout = null; }
搜索结果测试如下(我是用"api/estest//SearchFullFiledss"这个api测试的,搜索age在1到500之间,并且字段intro 或者name 包含词组key中的所有单词。):
1.首先我们看一看测试数据总共有多少:
我们可以看到总共db_test中总共有两千多万条数据。
搜索测试1
测试条件:key=上海方鸿渐&from=0&size=100,key指搜索关键短语,0是从第0条开始区数据,100是指取一百条数据,隐藏条件是age大于1小于500
我们可以看到,首次搜索时,两千多万条数据大约耗时3.5秒,这里还包括取100条数据的时间,如果把数据改为20条则会更快。这里的硬件配置还只限于我的本机测试(内存8G,处理器Intel i5-4590 3.3GHZ)。
搜索测试2
测试条件同测试1:key=上海方鸿渐&from=0&size=100,key指搜索关键短语,0是从第0条开始区数据,100是指取一百条数据,隐藏条件是age大于1小于500
我们可以看到,搜索耗时降到了大约1.5秒。这说明同一个关键词搜索越频繁,搜索速度越快,这是因为ElasticSearch会自动将搜索的内容缓存到内存中。
搜索测试3
测试条件:key=香烟德国&from=0&size=20,key指搜索关键短语,0是从第0条开始区数据,20是指取一百条数据,隐藏条件是age大于1小于500
搜索测试4
测试条件同测试3,二次相同条件搜索:key=香烟德国&from=0&size=20,key指搜索关键短语,0是从第0条开始区数据,20是指取一百条数据,隐藏条件是age大于1小于500
我们可以看到,在二次搜索条件相同,搜索数据降低到20条的时候,只耗时不到0.5秒。
本章完……
ElasticSearch系列学习
ElasticSearch第一步-环境配置
ElasticSearch第二步-CRUD之Sense
ElasticSearch第三步-中文分词
ElasticSearch第四步-查询详解
ElasticSearch第五步-.net平台下c#操作ElasticSearch详解
http://www.cnblogs.com/eggTwo/p/4425269.html