ElasticSearch CRUD

说一说用c#来操作 ElasticSearch :

1 Create Document :

在这一步,我遇到了2个问题:

1)选择将文档输入到 ElasticSearch Server 的方法:查阅 Elastic 的文档发现,提供 ElasticSearch.Net和 NEST 两种方式。 其中 NEST 本身还是调用了 ElasticSearch.Net 的库,而 ElasticSearch.Net 是不需要任何依赖的,一个干净的 Client 帮我们把所有的事情都做好了。(其实我猜,ElasticSearch.net 是不是调用了 ElasticSearch 的 Restful API, 封装好了几个对象给我们用而已。暂不明确,下回再研究)。

2) 在 ElasticSearch.Net 调用 Index 的方法时候,Index 名字大写了,导致创建不成功,所以单步调试,看看返回信息还是有助于排查问题的。 这个错误信息是:

ServerError: ServerError: 400Type: invalid_index_name_exception Reason: \"Invalid index name [DailyOuput], must be lowercase\"

接下来是简单的一段代码,一个封装了部分功能的ElasticSearch.Net类:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Newtonsoft.Json;
using System.Data.SqlClient;
using System.Data.SqlTypes;
using System.IO;
using Elasticsearch.Net;
using Nest;


namespace LoadES.Model
{


    public class ElasticSearchNetClient: IDisposable
    {
        public ElasticLowLevelClient NetClient { set; get; }
        public void  EsNetClient()
        {
            var settings = new ConnectionConfiguration(
                new Uri(@"http://192.168.51.101:9500"))
                .RequestTimeout(TimeSpan.FromMinutes(2));
            var lowlevelClient = new ElasticLowLevelClient(settings);
            NetClient = lowlevelClient;
        }
        public void Dispose()
        {
            return;
        }
    }
}

接下来是从 SQL Server 加载数据到 ElasticSearch Index 库的简单示例:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Newtonsoft.Json;
using System.Data.SqlClient;
using System.Data.SqlTypes;
using System.IO;
using Elasticsearch.Net;
using Nest;
using LoadES.Model;

namespace LoadES
{
    class Program
    {
        static void Main(string[] args)
        {


            SqlConnection _sqlconnect = new SqlConnection(
                @"data source=(local)\sqlhome
                ;Integrated Security = SSPI ");
            try
            {
                _sqlconnect.Open();
            }
            catch(Exception ep)
            {
                Console.Write(ep.Message);
            }

            SqlCommand _sqlcommand = _sqlconnect.CreateCommand();
            _sqlcommand.CommandType = System.Data.CommandType.Text;
            _sqlcommand.CommandText = "SELECT row_number() over(order by rq asc) as DocId
            , * FROM tblinorout  ";

            SqlDataReader _sqlrow = _sqlcommand.ExecuteReader();

            while (_sqlrow.Read())
            {


                sqlRowData tmpRow = new sqlRowData();
                tmpRow.DocId = Convert.ToInt32(_sqlrow["DocId"]);
                tmpRow.TransDate = Convert.ToDateTime(_sqlrow["rq"]);
                tmpRow.Vol = Convert.ToDecimal(_sqlrow["vol"]);
                tmpRow.CapitalItem = Convert.ToString(_sqlrow["capitalItem"]);
                tmpRow.CapitalSubItem = Convert.ToString(_sqlrow["capitalSubItem"]);



                using (ElasticSearchNetClient myEsNetClient = new ElasticSearchNetClient())
                {
                    myEsNetClient.EsNetClient();
                    var indexresponse = myEsNetClient.NetClient.Index<byte[]>("homecapital"
                            , "Expense"
                            , tmpRow.DocId.ToString()
                            , tmpRow);
                    byte[] responseBytes =  indexresponse.Body;
                    Console.WriteLine("Response:" + responseBytes.ToString());
                    Console.WriteLine(tmpRow.DocId.ToString() + 
                        " has been inserted into {0} as a type of {1}"
                        , "expense");
                }

            }



        }
    }

    public class sqlRowData
    {
        public int DocId { get; set; }
        public DateTime TransDate { get; set; }
        public Decimal Vol { get; set; }
        public String CapitalItem { get; set; }
        public String CapitalSubItem { get; set;  }
    }
}

当然我们还要安装 kibana,检查我们的数据到底是不是被成功装载了。这个时候我碰到一个奇怪的问题,我的 ElasticSearch Server 版本是5.4.3 , 而我下载的 Kibana 却是 5.5.1 的,一开始运行 Kibana 会提示: kibana requires elasticsearch v5.5.1 on all nodes, your elasticsearch is v5.4.3. 导致无法打开 kibana. 经过摸索发现只要修改 kibana安装文件夹下面的 package.json 文件,修改 version 属性为 5.4.3 , kibana 自动编译成支持 5.5.1 版本的 ElasticSearch 了。

"version": "5.4.3"

2 . Bulk Create 索引文档
当我们有成千上万条数据需要增量更新到 ElasticSearch 的时候,一条一条的创建,会非常耗时。网络,客户端与服务器端两头的请求与回应,都会耽误不少时间。Elasticsearch 提供了 Bulk Insert,可以节省不少时间,一次通讯的成本低很多。

我们给原本的用于封装 ElasticSearch Client 的类,加一个 BulkIndex 的方法:

namespace LoadES.Model
{


    public class ElasticSearchNetClient: IDisposable
    {
        public ElasticLowLevelClient NetClient { set; get; }
        public void  EsNetClient()
        {
            var settings = new ConnectionConfiguration(new Uri(@"http://192.168.51.101:9500")).RequestTimeout(TimeSpan.FromMinutes(2));
            var lowlevelClient = new ElasticLowLevelClient(settings);
            NetClient = lowlevelClient;
        }
        public void Dispose()
        {
            return;
        }

        public ElasticsearchResponse<byte[]> Index(string DocId, PostData Document)
        {
            var response = NetClient.Index<byte[]>("homecapital", "expense", DocId, Document);
            return response;
        }

        public ElasticsearchResponse<byte[]> BulkIndex(Object[] Document)
        {
            var response = NetClient.Bulk<byte[]>(Document);
            return response;
        }
    }
} 
  

这个方法 BulkIndex 其实就是对 Bulk 原生方法的封装。 Elasticsearch Bulk 操作的 Endpoint 是_bulk, 符合格式的 Restful API Json 是:

action_and_meta_data\n
optional_source\n
action_and_meta_data\n
optional_source\n
....
action_and_meta_data\n
optional_source\n
POST _bulk
{ "index" : { "_index" : "test", "_type" : "type1", "_id" : "1" } }
{ "field1" : "value1" }
{ "delete" : { "_index" : "test", "_type" : "type1", "_id" : "2" } }
{ "create" : { "_index" : "test", "_type" : "type1", "_id" : "3" } }
{ "field1" : "value3" }
{ "update" : {"_id" : "1", "_type" : "type1", "_index" : "test"} }
{ "doc" : {"field2" : "value2"} }

而 ElasticSearch.Net 文档中是这样描述用法的:

var people = new object[]
            {
                new { index = new { _index = "people", _type = "person", _id = "1"  }},
                new { FirstName = "Martijn", LastName = "Laarman" },
                new { index = new { _index = "people", _type = "person", _id = "2"  }},
                new { FirstName = "Greg", LastName = "Marzouka" },
                new { index = new { _index = "people", _type = "person", _id = "3"  }},
                new { FirstName = "Russ", LastName = "Cam" },
            };

var indexResponse = lowlevelClient.Bulk(people);
Stream responseStream = indexResponse.Body;

所以我们也就依葫芦画瓢:

namespace LoadES
{
    class Program
    {
        static void Main(string[] args)
        {


            SqlConnection _sqlconnect = new SqlConnection(@"data source=(local)\sqlhome;Initial Catalog=Capital;Integrated Security = SSPI ");
            try
            {
                _sqlconnect.Open();
            }
            catch(Exception ep)
            {
                Console.Write(ep.Message);
            }

            SqlCommand _sqlcommand = _sqlconnect.CreateCommand();
            _sqlcommand.CommandType = System.Data.CommandType.Text;
            _sqlcommand.CommandText = "SELECT row_number() over(order by rq asc) as DocId, * FROM tblinorout  ";

            SqlDataReader _sqlrow = _sqlcommand.ExecuteReader();

            List DocRows = new List<object>();
            DateTime beginDateTime = DateTime.Now;
            while (_sqlrow.Read())
            {


                sqlRowData tmpRow = new sqlRowData();
                tmpRow.DocId = Convert.ToInt32(_sqlrow["DocId"]);
                tmpRow.TransDate = Convert.ToDateTime(_sqlrow["rq"]);
                tmpRow.Vol = Convert.ToDecimal(_sqlrow["vol"]);
                tmpRow.CapitalItem = Convert.ToString(_sqlrow["capitalItem"]);
                tmpRow.CapitalSubItem = Convert.ToString(_sqlrow["capitalSubItem"]);

                DocRows.Add( new { index =  new{  _index= "homecapital", _type="DailyExpense", _id = tmpRow.DocId.ToString()   } });
                DocRows.Add(new { DocId = tmpRow.DocId, TransDate = tmpRow.TransDate, Vol = tmpRow.Vol, CapialItem = tmpRow.CapitalItem, CapitalSubItem = tmpRow.CapitalSubItem });

            }


            ElasticSearchNetClient myEsNetClient = new ElasticSearchNetClient();
            myEsNetClient.EsNetClient();
            var indexresponse = myEsNetClient.BulkIndex(DocRows.ToArray());

            DateTime endDateTime = DateTime.Now;
            Console.WriteLine("Time Used:" + Convert.ToString(  endDateTime - beginDateTime));
            Console.WriteLine("Response:" + indexresponse.Body.ToString());


        }
    }

    public class sqlRowData
    {
        public int DocId { get; set; }
        public DateTime TransDate { get; set; }
        public Decimal Vol { get; set; }
        public String CapitalItem { get; set; }
        public String CapitalSubItem { get; set;  }
    }
} 
  

时间上可以明显感觉到,在我 8G RAM的机器上,大约用时不到1秒。而非 Bulk Insert 需要 24 秒。

。。。未完
待续

欢迎关注个人微信公众号【有关SQL】

ElasticSearch CRUD_第1张图片

你可能感兴趣的:(vs.net)