嵌套聚合求百分比——es记一次关于 bucket_script 根据需求不断改进的DSL及jestAPI

  1. 需求:求每个科室下不同病历级别的百分比。
  2. 思路:按每个科室分组,再按级别分组,再求占比。
  3. 开动。

第一次尝试:

POST /medicalrecord/my_type/_search?size=0
{
  "size": 0,
  "aggs": {
    "department_group": {
      "terms": {
        "field": "department_secondary",
        "size": 1000000
      },
      "aggs": {
        "total_record": {
          "value_count": {
            "field": "emr_id"
          }
        },
        "levels": {
          "filter": {
            "terms": {
              "has_bug": [
                "1",
                "2",
                "3"
              ]
            }
          },
          "aggs": {
            "level_group": {
              "terms": {
                "field": "has_bug",
                "size": 3
              },
              "aggs": {
                "every_level_count": {
                  "value_count": {
                    "field": "emr_id"
                  }
                },
                "level_percentage": {
                  "bucket_script": {
                    "buckets_path": {
                      "every_level_count": "every_level_count",
                      "department_total_record": "total_record"
                    },
                    "script": "params.every_level_count / params.department_total_record"
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

报错:

{
  "error": {
    "root_cause": [
      {
        "type": "illegal_argument_exception",
        "reason": "No aggregation found for path [total_record]"
      }
    ],
    "type": "search_phase_execution_exception",
    "reason": "all shards failed",
    "phase": "query",
    "grouped": true,
    "failed_shards": [
      {
        "shard": 0,
        "index": "medicalrecord",
        "node": "p9ZUXdr9R3mxulJKxgNtGA",
        "reason": {
          "type": "illegal_argument_exception",
          "reason": "No aggregation found for path [total_record]"
        }
      }
    ]
  },
  "status": 400
}

第二次尝试:

POST /medicalrecord/my_type/_search?size=0
{
  "size": 0,
  "aggs": {
    "department_group": {
      "terms": {
        "field": "department_secondary",
        "size": 1000000
      },
      "aggs": {
        "total_record": {
          "value_count": {
            "field": "emr_id"
          }
        },
        "levels": {
          "filter": {
            "terms": {
              "has_bug": [
                "1",
                "2",
                "3"
              ]
            }
          },
          "aggs": {
            "level_group": {
              "terms": {
                "field": "has_bug",
                "size": 3
              },
              "aggs": {
                "every_level_count": {
                  "value_count": {
                    "field": "emr_id"
                  }
                }
              }
            }
          }
        },
        "level_percentage": {
          "bucket_script": {
            "buckets_path": {
              "every_level_count": "levels  >  level_group> every_level_count",
              "department_total_record": "total_record"
            },
            "script": "params.every_level_count / params.department_total_record"
          }
        }
      }
    }
  }
}

报错:

{
  "error": {
    "root_cause": [],
    "type": "search_phase_execution_exception",
    "reason": "",
    "phase": "fetch",
    "grouped": true,
    "failed_shards": [],
    "caused_by": {
      "type": "aggregation_execution_exception",
      "reason": "buckets_path must reference either a number value or a single value numeric metric aggregation, got: java.lang.Object[]"
    }
  },
  "status": 503
}

错误总结:buckets_path 不能向上 取值,因为他说是相对路径,返回值必须是个数字类型。

 


哎,好失望啊。最终能完成需求的版本,has_bug 其实就是级别。

#求所有科室病历某个或者某几个级别的占比
POST /medicalrecord/my_type/_search?size=0
{
  "size": 0,
  "aggs": {
    "department_group": {
      "terms": {
        "field": "department_secondary",
        "size": 1000000
      },
      "aggs": {
        "levels": {
          "filter": {
            "terms": {
              "has_bug": [
                "1",
                "2"
              ]
            }
          }
        },
        "level_percentage": {
          "bucket_script": {
            "buckets_path": {
              "level_count": "levels > _count",
              "department_total_record": "_count"
            },
            "script": "params.level_count / params.department_total_record"
          }
        }
      }
    }
  }
}

过了一天,我突然想到多加一个filter怎么样:

POST /medicalrecord/my_type/_search?size=0
{
  "size": 0,
  "aggs": {
    "department_group": {
      "terms": {
        "field": "department_secondary",
        "size": 1000000
      },
      "aggs": {
        "levels_2_3": {
          "filter": {
            "terms": {
              "has_bug": [
                "2",
                "3"
              ]
            }
          }
        },
        "level_2_3_percentage": {
          "bucket_script": {
            "buckets_path": {
              "level_count": "levels_2_3 > _count",
              "department_total_record": "_count"
            },
            "format": "#.####",
            "script": "params.level_count / params.department_total_record"
          }
        },
        "levels_1": {
          "filter": {
            "terms": {
              "has_bug": [
                "1"
              ]
            }
          }
        },
        "level_1_percentage": {
          "bucket_script": {
            "buckets_path": {
              "level_count": "levels_1 > _count",
              "department_total_record": "_count"
            },
            "format": "#.####",
            "script": "params.level_count / params.department_total_record"
          }
        }
      }
    }
  }
}

 

返回结果:

 

{
  "took": 390,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 13212,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "department_group": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "乳腺甲状腺科一病区",
          "doc_count": 770,
          "levels_2_3": {
            "doc_count": 770
          },
          "levels_1": {
            "doc_count": 0
          },
          "level_2_3_percentage": {
            "value": 1,
            "value_as_string": "1"
          },
          "level_1_percentage1": {
            "value": 0,
            "value_as_string": "0"
          }
        },
        {
          "key": "内三",
          "doc_count": 710,
          "levels_2_3": {
            "doc_count": 708
          },
          "levels_1": {
            "doc_count": 2
          },
          "level_2_3_percentage": {
            "value": 0.9971830985915493,
            "value_as_string": "0.9972"
          },
          "level_1_percentage1": {
            "value": 0.0028169014084507044,
            "value_as_string": "0.0028"
          }
        },
        {
          "key": "康复医学科病区",
          "doc_count": 668,
          "levels_2_3": {
            "doc_count": 668
          },
          "levels_1": {
            "doc_count": 0
          },
          "level_2_3_percentage": {
            "value": 1,
            "value_as_string": "1"
          },
          "level_1_percentage1": {
            "value": 0,
            "value_as_string": "0"
          }
        },
        {
          "key": "内一西病区",
          "doc_count": 604,
          "levels_2_3": {
            "doc_count": 602
          },
          "levels_1": {
            "doc_count": 2
          },
          "level_2_3_percentage": {
            "value": 0.9966887417218543,
            "value_as_string": "0.9967"
          },
          "level_1_percentage1": {
            "value": 0.0033112582781456954,
            "value_as_string": "0.0033"
          }
        }
...

es6.3有一个buckets_sort 功能 可以 看看 能不能将聚合后的结果排序。

另外jestAPI的写法放在下面:

 @Test
    public void test06() {
        HashMap bucketsPathHashMap23 = new HashMap<>(2);
        bucketsPathHashMap23.put("level_count", "levels_2_3 > _count");
        bucketsPathHashMap23.put("department_total_record", "_count");

        HashMap bucketsPathHashMap1 = new HashMap<>(2);
        bucketsPathHashMap1.put("level_count", "levels_1 > _count");
        bucketsPathHashMap1.put("department_total_record", "_count");

        TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("department_group").field("department_secondary").size(Integer.MAX_VALUE)
                .subAggregation(AggregationBuilders.filter("levels_2_3", QueryBuilders.termsQuery("has_bug", "2", "3")))
                .subAggregation(PipelineAggregatorBuilders.bucketScript("level_2_3_percentage", bucketsPathHashMap23, new Script("params.level_count / params.department_total_record")).format("#.####"))
                .subAggregation(AggregationBuilders.filter("levels_1", QueryBuilders.termsQuery("has_bug", "1")))
                .subAggregation(PipelineAggregatorBuilders.bucketScript("level_1_percentage", bucketsPathHashMap1, new Script("params.level_count / params.department_total_record")).format("#.####"));

        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
                //聚合
                .aggregation(termsAggregationBuilder)
                // 设置 不排序 按照文档存入顺序返回
                .sort("_doc");

        Search.Builder builder = new Search.Builder(searchSourceBuilder.toString())
                .addIndex("index")
                .addType("type")
                .setParameter(Parameters.SIZE, GlobalConstants.HITS_SIZE)
                .setParameter(Parameters.SCROLL, "5m")
                //可以屏蔽元数据
                .setParameter("filter_path", "hits.hits._source,hits.hits._id,hits.hits,_scroll_id");

        System.out.println(searchSourceBuilder.toString());


    }

 

你可能感兴趣的:(elasticsearch)