(大体就是分组统计,去重累加)
select *
from(
select
a.reDay,
count(distinct a.deviceNum) deviceNumber,
b.deviceCount deviceCount,
sum(a.newOrderNum) newOrderNum,
sum(a.paidOrderNum) paidOrderNum,
sum(a.refundedOrderNum) refundedOrderNum,
sum(a.receiveAmount) receiveAmount,
sum(a.refundedAmount) refundedAmount,
sum(a.costAmount) costAmount,
sum(a.couponAmount) couponAmount
from mortals_iot_stat_device_day_revenue a
LEFT JOIN (
SELECT
reDay,
sum(deviceCount) as deviceCount
FROM (
SELECT
a.reDay,
max( deviceCount ) deviceCount
FROM
mortals_iot_stat_device_day_revenue a
WHERE
a.reDay >= 20200715
AND a.reDay <= 20200721
GROUP BY
deviceNum,
reDay
) as t
GROUP BY reDay
) as b
on a.reDay = b.reDay
where
a.reDay >= 20200715
and a.reDay <= 20200721
GROUP BY reDay
) as o
100W的数据量,耗时7s(分组的天数区间越大耗时越恐怖)
(因为ES的cardinality去重有误差 文章2,所以转换角度实现,对结果需要稍微加减处理)
着重说明去重实现:
在group_by_deviceNum分组中通过bucket_selector,把重复数大于1的取出来(前提是重复的不是太多的场景,例子中只取了10条
# 筛选20200715<=reDay<=20200721,并按reDay升序排列,计算总和(和去重计算总和)
GET /revenue/day/_search
{
"size":0,
"query": {
"bool": {
"filter": [
{ "range": { "reDay": { "gte": 20200715 } } },
{ "range": { "reDay": { "lte": 20200721 } } }
]
}
},
"aggs": {
"group_by_day": {
"terms": {
"field": "reDay",
"size": 1000,
"order": {
"_term": "asc"
}
},
"aggs": {
"additionalDeviceCount" : {
"sum" : {
"field" : "deviceCount"
}
},
"totalOrderNum": {
"sum": {
"field": "newOrderNum"
}
},
"totalPaidOrderNum": {
"sum": {
"field": "paidOrderNum"
}
},
"totalRefundedOrderNum": {
"sum": {
"field": "refundedOrderNum"
}
},
"receiveAmount": {
"sum": {
"field": "receiveAmount"
}
},
"totalRefundAmount": {
"sum": {
"field": "refundedAmount"
}
},
"couponAmount": {
"sum": {
"field": "couponAmount"
}
},
"costAmount": {
"sum": {
"field": "costAmount"
}
},
"group_by_deviceNum": {
"terms": {
"field": "deviceNum.keyword",
"size": 10,
"shard_size": 20000,
"order": {
"_count": "desc"
}
},
"aggs": {
"deviceCount": {
"min": {
"field": "deviceCount",
"missing": 1
}
},
"device_count_filter": {
"bucket_selector": {
"buckets_path": {
"deviceNumCount": "_count"
},
"script": "params.deviceNumCount>1"
}
}
}
},
"deviceCount-无用警示" : {
"cardinality" : {
"field" : "deviceNum.keyword"
}
}
}
}
}
}
耗时200多毫秒,(当然分组的天数区间越大耗时也会有一定增加,相比数据库效率已经很猛烈了)
{
"took": 99,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 87668,
"max_score": 0,
"hits": []
},
"aggregations": {
"group_by_day": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 20200715,
"doc_count": 12521,
"receiveAmount": {
"value": 280
},
"couponAmount": {
"value": 0
},
"totalRefundedOrderNum": {
"value": 0
},
"group_by_deviceNum": {
"doc_count_error_upper_bound": 5,
"sum_other_doc_count": 12511,
"buckets": []
},
"additionalDeviceCount": {
"value": 14057
},
"costAmount": {
"value": 0
},
"totalOrderNum": {
"value": 24
},
"totalRefundAmount": {
"value": 0
},
"totalPaidOrderNum": {
"value": 19
}
},
{
"key": 20200716,
"doc_count": 12523,
"receiveAmount": {
"value": 330
},
"couponAmount": {
"value": 0
},
"totalRefundedOrderNum": {
"value": 0
},
"group_by_deviceNum": {
"doc_count_error_upper_bound": 5,
"sum_other_doc_count": 12513,
"buckets": []
},
"additionalDeviceCount": {
"value": 14059
},
"costAmount": {
"value": 0
},
"totalOrderNum": {
"value": 61
},
"totalRefundAmount": {
"value": 0
},
"totalPaidOrderNum": {
"value": 33
}
},
{
"key": 20200717,
"doc_count": 12526,
"receiveAmount": {
"value": 170
},
"couponAmount": {
"value": 0
},
"totalRefundedOrderNum": {
"value": 0
},
"group_by_deviceNum": {
"doc_count_error_upper_bound": 5,
"sum_other_doc_count": 12515,
"buckets": [
{
"key": "1010010000000055",
"doc_count": 2,
"deviceCount": {
"value": 3
}
}
]
},
"additionalDeviceCount": {
"value": 14064
},
"costAmount": {
"value": 0
},
"totalOrderNum": {
"value": 21
},
"totalRefundAmount": {
"value": 0
},
"totalPaidOrderNum": {
"value": 8
}
},
{
"key": 20200718,
"doc_count": 12525,
"receiveAmount": {
"value": 0
},
"couponAmount": {
"value": 0
},
"totalRefundedOrderNum": {
"value": 0
},
"group_by_deviceNum": {
"doc_count_error_upper_bound": 5,
"sum_other_doc_count": 12515,
"buckets": []
},
"additionalDeviceCount": {
"value": 14061
},
"costAmount": {
"value": 0
},
"totalOrderNum": {
"value": 0
},
"totalRefundAmount": {
"value": 0
},
"totalPaidOrderNum": {
"value": 0
}
},
{
"key": 20200719,
"doc_count": 12525,
"receiveAmount": {
"value": 0
},
"couponAmount": {
"value": 0
},
"totalRefundedOrderNum": {
"value": 0
},
"group_by_deviceNum": {
"doc_count_error_upper_bound": 5,
"sum_other_doc_count": 12515,
"buckets": []
},
"additionalDeviceCount": {
"value": 14061
},
"costAmount": {
"value": 0
},
"totalOrderNum": {
"value": 0
},
"totalRefundAmount": {
"value": 0
},
"totalPaidOrderNum": {
"value": 0
}
},
{
"key": 20200720,
"doc_count": 12525,
"receiveAmount": {
"value": 390
},
"couponAmount": {
"value": 0
},
"totalRefundedOrderNum": {
"value": 0
},
"group_by_deviceNum": {
"doc_count_error_upper_bound": 5,
"sum_other_doc_count": 12515,
"buckets": []
},
"additionalDeviceCount": {
"value": 14061
},
"costAmount": {
"value": 92120
},
"totalOrderNum": {
"value": 40
},
"totalRefundAmount": {
"value": 0
},
"totalPaidOrderNum": {
"value": 20
}
},
{
"key": 20200721,
"doc_count": 12523,
"receiveAmount": {
"value": 190
},
"couponAmount": {
"value": 0
},
"totalRefundedOrderNum": {
"value": 0
},
"group_by_deviceNum": {
"doc_count_error_upper_bound": 5,
"sum_other_doc_count": 12513,
"buckets": []
},
"additionalDeviceCount": {
"value": 14059
},
"costAmount": {
"value": 37600
},
"totalOrderNum": {
"value": 43
},
"totalRefundAmount": {
"value": 0
},
"totalPaidOrderNum": {
"value": 17
}
}
]
}
}
}
。。。。
。。。。
进行中,头大