-XGET一种请求方法
-d 标识以post形式传入参数 ,写在请求正文里面
?pretty=true 以格式的形式显示结果
curl -XGET http://localhost:9200/_cluster/health?pretty --查询elasticsearch的健康信息
curl -XGET http://localhost:9200/ --查询实例的相关信息
curl -XGET http://localhost:9200/_cluster/nodes/ --得到集群中节点的相关信息
curl -XPOST http://localhost:9200/_cluster/nodes/_shutdown --关闭整个集群
curl -XPOST http://localhost:9200/_cluster/nodes/aaaa/_shutdown --关闭集群中指定节点
curl -XPOST http://localhost:9200/test --创建名为test的索引
curl -XDELETE http://localhost:9200/test --删除名为test的索引
curl -XGET ‘http://10.10.110.2:19200/benlaitest/_search?pretty=true’ -d ‘{“query”:{“multi_match”:{“query”:“法国”,“fields”:[“firstname”,“lastname”]}}}’ --查询数据(匹配firstname和lastname)
curl http://10.10.110.160:9200/benlaitest/_analyze?analyzer=standard -d 我爱你中国
postman执行请求API:
http://10.10.110.160:9200/_cat/indices?v – Get请求 查看有多少索引
http://10.10.110.160:9200/benlaitest/_analyze?analyzer=standard --查看分词结果
Elasticsearch提供丰富且灵活的查询语言叫做DSL查询(Query DSL),它允许你构建更加复杂、强大的查询。DSL(Domain Specific Language特定领域语言)以JSON请求体的形式出现。
GET /product_index/product/_search
{
"query": {
"match_all": {}
}
GET /product_index/product/_search
{
"query": {
"match": {
"product_name": "milk"
}
},
"sort": [
{
"price": "desc"
}
]
}
GET /product_index/product/_search
{
"query": {
"match_all": {}
},
"_source": [
"product_name",
"price"
],
"from": 0, ## 从第几个商品开始查,最开始是 0
"size": 1 ## 要查几个结果
}
GET /product_index/product/_search
{
"query": {
"range": {
"price": {
"gte": 30.00
}
}
}
}
GET /product_index/product/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"product_name": "pure milk"
}
}
],
"should": [
{
"match": {
"product_desc": "常温"
}
}
],
"must_not": [
{
"match": {
"product_name": "蒙牛"
}
}
],
"filter": {
"range": {
"price": {
"gte": 33.00
}
}
}
}
}
}
Elastic正在开发高级客户端,它将在REST客户端之上工作,并允许您发送DSL查询等。
// 保存爬虫原始数据
public void saveCrawlerInstanceData(List<CrawlerInstanceData> list) {
// 开启批量插入
try {
String pre = "爬虫原始数据";
StringBuilder bulkRequestBody = new StringBuilder();
//开启计数
int count = 1;
for (CrawlerInstanceData data : list) {
// 添加es信息异常
try {
Map map = BeanUtil.transBean2Map(data);
map.put("ctime", data.getCtime().getTime());
//索引名称
String esIndex = getESIndex(4);
String requestJson = JSON.toJSONString(map, WriteMapNullValue);
String actionMetaData = String.format("{ \"index\" : { \"_index\" : \"%s\", \"_type\" : \"%s\" ,\"_id\" : \"%s\"} }%n",
indexPrefix + esIndex, "crawlerdata", data.getId());
//拼装json语句
bulkRequestBody.append(actionMetaData);
bulkRequestBody.append(requestJson);
bulkRequestBody.append("\n");
String esPath = String.format("/%s/%s/%s", indexPrefix + esIndex,"crawlerdata", "_bulk");
//每次批量添加5000条数据
if (count % 5000 == 0 || count == list.size() ) {
String resultJson = RestClientUtil.getESDtats(restClient, bulkRequestBody.toString(), esPath, "POST");
if (StringUtils.isBlank(resultJson)) {
logger.error("{} es POST 批量插入数据操作不成功 ", pre);
throw new Exception(" es POST 批量插入数据操作不成功");
} else {
bulkRequestBody = new StringBuilder();
}
}
count++;
} catch (Exception e) {
// TODO: handle exception
logger.error("添加es信息异常", e);
}
}
} catch (Exception e) {
logger.error("es保存爬虫原始数据异常", e);
}
}
封装RestClient dsl执行语句
public static String getESDtats(RestClient restClient, String sql, String esPath, String requestType) {
if (null != restClient && !StringUtil.isBlank(sql) && !StringUtil.isBlank(esPath)) {
HttpEntity entity = new NStringEntity(sql, ContentType.APPLICATION_JSON);
String result = null;
try {
Response indexResponse = restClient.performRequest(null == requestType ? "GET" : requestType, esPath, Collections.emptyMap(), entity, new Header[0]);
result = EntityUtils.toString(indexResponse.getEntity());
} catch (IOException var8) {
LOGGER.error(ExceptionUtils.getStackTrace(var8));
}
return result;
} else {
return null;
}
}
public Map<String, Object> lastTaskDataCount(String crawlerInstanceId) {
String esIndex = getESIndex(4);
//拼装dsl json语句
StringBuilder requestBody = new StringBuilder();
requestBody.append("{\"size\":10,\"_source\":[\"\"],\"from\": 0,");
requestBody.append("\"query\":{\"term\":{\"crawler_instance_id\":").append(crawlerInstanceId).append("}},");
// 入库数据量统计,爬取数据量统计
requestBody.append("\"aggs\": {\"crawlTotalAgg\": {\"sum\": {\"field\":\"crawler_cnt\"}},");
requestBody.append("\"updateTotalAgg\":{ \"sum\": {\"field\":\"update_cnt\"}}}}");
//es路径
String esPath = String.format("/%s/%s/%s", indexPrefix + esIndex,"crawlerdata", "_search");
String result = RestClientUtil.getESDtats(restClient, requestBody.toString(), esPath, "Get");
Map<String, Object> map = new HashMap<String, Object>();
JSONObject jsonObject = JSONObject.parseObject(result);
if (null != jsonObject) {
String aggregations = jsonObject.getString("aggregations");
JSONObject aggregationsObject = JSONObject.parseObject(aggregations);
if (aggregationsObject != null) {
map.put("crawl_total", (int)Double.parseDouble(JSONObject.parseObject(aggregationsObject.getString("crawlTotalAgg")).getString("value")));
map.put("update_total", (int)Double.parseDouble(JSONObject.parseObject(aggregationsObject.getString("updateTotalAgg")).getString("value")));
}
JSONObject hitsObject = JSONObject.parseObject(String.valueOf(jsonObject.get("hits")));
if (null != hitsObject) {
map.put("data_total",Integer.parseInt(String.valueOf(hitsObject.get("total"))));
}
}
return map;
}
聚合查询DSL语句,_source为空,仅返回聚合查询数据,加快查询效率
{
"size": 10,
"_source": [""],
"from": 0,
"query": {
"term": {
"crawler_instance_id": crawler_instance_id
}
},
"aggs": {
"crawlTotalAgg": {
"sum": {
"field": "crawler_cnt"
}
},
"updateTotalAgg": {
"sum": {
"field": "update_cnt"
}
}
}
}
// 最近7次爬虫数据情况
public List<Map<String, Object>> lastSevenTaskCount(String crawlerCode, String mediaId) {
String esIndex = getESIndex(4);
List<Map<String, Object>> reList = new ArrayList<Map<String, Object>>();
//拼装dsl json语句
StringBuilder requestBody = new StringBuilder();
requestBody.append("{\"size\":7,\"_source\":[\"id\"],\"from\": 0,");
requestBody.append("\"query\":{\"bool\":{\"must\":[{\"term\":{\"crawler_code.keyword\":\"").append(crawlerCode).append("\"}},{");
requestBody.append("\"term\": {\"media_id\":").append(mediaId).append("}}]}},");
requestBody.append("\"aggs\": {\"crawler_instance_id_agg\": {\"terms\": {\"field\": \"crawler_instance_id\",\"size\":7,\"order\":{\"_term\": \"desc\"}},");
requestBody.append("\"aggs\": {\"crawler_code_agg\": {\"terms\": {\"field\":\"crawler_code\"},");
requestBody.append("\"aggs\": {\"media_id_agg\": {\"terms\": {\"field\":\"media_id\"},");
// 更新入库数据量统计,爬取数据量统计
requestBody.append("\"aggs\": {\"update_sum_agg\": {\"sum\": {\"field\": \"update_cnt\"}},\"crawler_cnt_agg\": {\"sum\": {\"field\": \"crawler_cnt\"").append("}}}}}}}}},");
requestBody.append("\"sort\": [{\"id\": {\"order\": \"desc\"}}]}");
//es路径
String esPath = String.format("/%s/%s/%s", indexPrefix + esIndex,"crawlerdata", "_search");
String result = RestClientUtil.getESDtats(restClient, requestBody.toString(), esPath, "Get");
List<Map> crawlerInstanceList = RestClientUtil.getAggregationsListByResult(result,"crawler_instance_id_agg");
crawlerInstanceList.stream().forEach(x->{
JSONObject crawlerCodeObject = (JSONObject)x.get("crawler_code_agg");
List<Map> crawlerCodeList =bucketsObject(crawlerCodeObject);
Map<String, Object> map = new HashMap<String, Object>();
map.put("crawler_instance_id", x.get("key"));
crawlerCodeList.stream().forEach(y->{
map.put("crawler_code", y.get("key"));
JSONObject media = (JSONObject)y.get("media_id_agg");
List<Map> mediaList =bucketsObject(media);
mediaList.stream().forEach(z->{
JSONObject updateSum = (JSONObject)z.get("update_sum_agg");
map.put("update_sum", (int)Double.parseDouble(updateSum.getString("value")));
JSONObject crawlTotal = (JSONObject)z.get("crawler_cnt_agg");
map.put("crawl_total", (int)Double.parseDouble(crawlTotal.getString("value")));
});
});
reList.add(map);
});
return reList;
}
聚合查询DSL语句
{
"size": 7,
"_source": ["id"],
"from": 0,
"query": {
"bool": {
"must": [{
"term": {
"crawler_code.keyword": "新闻"//爬虫类型
}
}, {
"term": {
"media_id": 4//网站编号
}
}]
}
},
"aggs": {
"crawler_instance_id_agg": {
"terms": {
"field": "crawler_instance_id",
"size": 7,
"order": {
"_term": "desc"
}
},
"aggs": {
"crawler_code_agg": {
"terms": {
"field": "crawler_code"
},
"aggs": {
"media_id_agg": {
"terms": {
"field": "media_id"
},
"aggs": {
"update_sum_agg": {
"sum": {
"field": "update_cnt"
}
},
"crawler_cnt_agg": {
"sum": {
"field": "crawler_cnt"
}
}
}
}
}
}
}
}
},
"sort": [{
"id": {
"order": "desc"
}
}]
} {
"range": {
"update_cnt": {
"gt": 0
}
}
}]
}
}
}
//组装es指定字段查询语句
public Map<String, Object> getEsQueryInfo() {
Map<String, Object> infoMap = new HashMap<>(16);
String esQuery = "";
StringBuilder requestBody = new StringBuilder();
requestBody.append("{\"size\":\"").append(esSize).append("\",");
requestBody.append("\"query\": {\"bool\":{\"must\":{\"term\":{\"").append(field).append("\":\"").append(value);
requestBody.append("\"}}}}}");
infoMap.put("esQuery", requestBody.toString());
infoMap.put("oldEsUrl", String.format("/%s/_search?scroll=%s", oldEsUrl, esScrollTime));
LOGGER.info("配置信息 {}", JSON.toJSON(infoMap));
return infoMap;
}
//组装es范围查询语句
public Map<String, Object> getRangeEsQueryInfo(long time) {
Map<String, Object> infoMap = new HashMap<>(16);
String esQuery = "";
StringBuilder requestBody = new StringBuilder();
requestBody.append("{\"size\":\"").append(esSize).append("\",");
requestBody.append("\"query\": {\"bool\":{\"must\":{\"range\":{\"gt\":").append(time);
requestBody.append("}}}}}");
infoMap.put("esQuery", requestBody.toString());
infoMap.put("oldEsUrl", String.format("/%s/_search?scroll=%s", oldEsUrl, esScrollTime));
LOGGER.info("配置信息 {}", JSON.toJSON(infoMap));
return infoMap;
}
//查询es数据
public List<Map> query( Map<String, Object> infoMap) {
List<Map> resultList = new ArrayList<>();
String esQuery = MapUtils.getString(infoMap, "esQuery");
String oldEsUrl = MapUtils.getString(infoMap, "oldEsUrl");
// 是否滚动查询
if (StringUtils.isNotBlank(esScrollId)) {
String scrollQueryUrl = String.format("/_search/scroll");
String scrollQuerySql = String.format("{\"scroll\":\"%s\",\"scroll_id\":\"%s\"}", esScrollTime, esScrollId);
LOGGER.info("es 滚动查询 sql:{} path:{} ", scrollQuerySql, scrollQueryUrl);
String scrollDateResult = RestClientUtil.getESDtats(restClient, scrollQuerySql, scrollQueryUrl, "Get");
resultList = RestClientUtil.getHitWithIdListByResult(scrollDateResult);
if (resultList.size()==0) {
esScrollId = "";
}
}
else {
LOGGER.info("es 查询 sql:{} path:{}", esQuery, oldEsUrl);
String dateResult = RestClientUtil.getESDtats(restClient, esQuery, oldEsUrl, "Get");
resultList = RestClientUtil.getHitWithIdListByResult(dateResult);
if (resultList!=null && resultList.size()>0) {
String scrollId = RestClientUtil.getScrollIdByResult(dateResult);
esScrollId = scrollId;
}
}
return resultList;
}
//添加es index数据
public void save(List<Map> list) {
String pre = "es数据迁移";
//开启计数
int count = 1;
StringBuilder bulkRequestBody = new StringBuilder();
for (Map<String, Object> map : list) {
String actionMetaData = String.format("{ \"index\" : { \"_index\" : \"%s\", \"_type\" : \"%s\" ,\"_id\" : \"%s\"} }%n",
newEsUrl, type, map.get("_id"));
map.remove("_id");
map.remove("_type");
map.remove("_index");
String requestJson = JSON.toJSONString(map, WriteMapNullValue);
//拼装json语句
bulkRequestBody.append(actionMetaData);
bulkRequestBody.append(requestJson);
bulkRequestBody.append("\n");
String esPath = String.format("/%s/%s/%s", newEsUrl,type, "_bulk");
//每次批量添加5000条数据
if (count % 5000 == 0 || count == list.size() ) {
String resultJson = RestClientUtil.getESDtats(newRestClient, bulkRequestBody.toString(), esPath, "PUT");
if (StringUtils.isBlank(resultJson)) {
LOGGER.error("{} es PUT 批量插入数据操作不成功 ", pre);
} else {
bulkRequestBody = new StringBuilder();
}
}
count++;
}
}
NodeClient和TranspotClient是基本的Java客户端。它们不能被Perl,Python,Ruby等访问。
REST HTTP客户端公开NodeClient,以便所有具有REST客户端的语言都可以使用ES 。
公司的爬虫项目之前使用TranspotClient去操作ES,近期由于业务的需求,准备开始使用阿里ES,为了切换成阿里ES,方便后续ES迁移和兼容更高版本ES,就使用RESTClient通过DSL语句去操作ES,为此本人开始接触ES,在项目需求中慢慢摸索,学一点皮毛,特此分享,记录自己的成长历程,也是一件快乐的事情,对于文章的不足之处,请谅解,望共同进步。