SQL参考&梳理:
aggregate function over(partition by expression)
对冗余结果去重--一个SQL根据分组统计:平均值、最大值、最大值对应的日期、最小值、最小值对应的日期、最新值、最新值对应的日期、最早(老)值、最早值对应的日期
WITH tmp AS
(SELECT
a.gd_code,
a.name,
avg(cod_concentration) AS avg_value,
max(cod_concentration)AS max_value,
min(cod_concentration) AS min_value,
max(record_date) AS latest_date,
min(record_date) AS oldest_date
FROM (SELECT * FROM cod_sampling_point_data) a GROUP BY a.gd_code, a.name),
-- 在这里的查询中,下面的统计形式和上面一样的,这里上面的执行效率高,用上面的;但如果需要某些非聚合字段的展示,就需要下面的形式了。
-- (SELECT
-- a.id,
-- a.gd_code,
-- a.name,
-- a.cod_concentration,
-- avg(cod_concentration) over (partition by gd_code, name) AS avg_value,
-- max(cod_concentration) over (partition by gd_code, name) AS max_value,
-- min(cod_concentration) over (partition by gd_code, name) AS min_value,
-- a.record_date,
-- max(record_date) over (partition by gd_code, name) AS latest_date,
-- min(record_date) over (partition by gd_code, name) AS oldest_date
-- FROM (SELECT * FROM cod_sampling_point_data) a
-- ORDER BY gd_code, name),
latest AS (SELECT *
FROM cod_sampling_point_data),
oldest AS (SELECT *
FROM cod_sampling_point_data),
mx AS (SELECT *
FROM cod_sampling_point_data),
mi AS (SELECT *
FROM cod_sampling_point_data)
SELECT DISTINCT result.*
FROM (
SELECT
tmp.gd_code,
tmp.name,
tmp.avg_value,
-- 通过 partition by 对冗余结果去重
-- mx.record_date AS max_date,
max(mx.record_date) over(partition by mx.gd_code, mx.name) AS max_date,
tmp.max_value,
-- 通过 partition by 对冗余结果去重
-- mi.record_date AS min_date,
min(mi.record_date) over(partition by mi.gd_code, mi.name) AS min_date,
tmp.min_value,
tmp.latest_date,
-- 通过 partition by 对冗余结果去重
-- latest.cod_concentration AS latest_value,
max(latest.cod_concentration) over(partition by latest.gd_code, latest.name) AS latest_value,
tmp.oldest_date,
-- 通过 partition by 对冗余结果去重
--oldest.cod_concentration AS oldest_value,
min(oldest.cod_concentration) over(partition by oldest.gd_code, oldest.name) AS oldest_value
FROM tmp,
latest,
oldest,
mx,
mi
WHERE tmp.gd_code = latest.gd_code
AND tmp.gd_code = oldest.gd_code
AND tmp.gd_code = mx.gd_code
AND tmp.gd_code = mi.gd_code
AND tmp.name = latest.name
AND tmp.name = oldest.name
AND tmp.name = mx.name
AND tmp.name = mi.name
AND tmp.latest_date = latest.record_date
AND tmp.oldest_date = oldest.record_date
AND tmp.max_value = mx.cod_concentration
AND tmp.min_value = mi.cod_concentration
) AS result
ORDER BY gd_code, name;
表结构参考:
-- auto-generated definition
create table cod_sampling_point_data
(
id varchar(36) not null
primary key,
create_by varchar(32),
create_time timestamp,
update_by varchar(32),
update_time timestamp,
sub_company_name varchar(100),
netname varchar(100),
ssjd varchar(100),
gd_code varchar(100),
point_x varchar(50),
point_y varchar(50),
record_date timestamp,
record_period integer,
cod_concentration numeric(10, 4),
name varchar(255),
sys_org_code varchar(64),
code integer,
city varchar(10),
region varchar(15)
);
comment on column cod_sampling_point_data.create_by is '创建人';
comment on column cod_sampling_point_data.create_time is '创建时间';
comment on column cod_sampling_point_data.update_by is '更新人';
comment on column cod_sampling_point_data.update_time is '更新时间';
comment on column cod_sampling_point_data.sub_company_name is '分公司名称';
comment on column cod_sampling_point_data.netname is '网格名称';
comment on column cod_sampling_point_data.ssjd is '所属街道';
comment on column cod_sampling_point_data.gd_code is '管点编号';
comment on column cod_sampling_point_data.point_x is '坐标x';
comment on column cod_sampling_point_data.point_y is '坐标y';
comment on column cod_sampling_point_data.record_date is '日期(年.月.日)';
comment on column cod_sampling_point_data.record_period is '时段(00-24)';
comment on column cod_sampling_point_data.cod_concentration is 'cod浓度(mg/L)';
comment on column cod_sampling_point_data.name is '水质采样点名称';
comment on column cod_sampling_point_data.sys_org_code is '部门编码';
comment on column cod_sampling_point_data.city is '市';
comment on column cod_sampling_point_data.region is '区';
alter table cod_sampling_point_data
owner to postgres;