create table tmp_summer1(id string,name string brith string);
insert into tmp_summer1 values('001','A','20211202');
insert into tmp_summer1 values('001','B','20211202');
insert into tmp_summer1 values('002','A','20211202');
insert into tmp_summer1 values('001','B','20211202');
-- 如果字段不是string类型则转: cast(c1 as string)
create table tmp_summer2 as
select id
,concat_ws('&',collect_set(name)) as name
,max(brith) as brith
from tmp_summer1 a
group by id;
select id
,concat_ws('&',collect_list(name)) as name
,max(brith) as brith
from tmp_summer1 a
group by id;
create table tmp_summer3 as
select id,name_split,brith
from tmp_summer2
lateral view explode(split(name,'&')) tmpTable as name_split
create table tmp_summer4 as
select id
,max(case when name_split='A' then name_split else null end) as name_a
,max(case when name_split='B' then name_split else null end) as name_b
,max(case when name_split='C' then name_split else null end) as name_c
,max(brith) as brith
from tmp_summer3
group by id;
-- get_json_object(json_string, '$.key')
-- 功能:解析json的字符串json_string,返回key指定的内容。如果输入的json字符串无效,那么返回NULL。这个函数每次只能返回一个数据项。
GET_JSON_OBJECT('{"level":"2","time":1650973942596,"type":"0"}','$.level' ) as level ;
2021-09-16 00:00:00 ——> 20210916
to_char(ZHTCJSJ,'yyyymmdd') ——> from_unixtime(unix_timestamp(ZHTCJSJ),'yyyyMMdd')
-- 当前日期
select current_date; --// 2021-12-23
select unix_timestamp(); --// 1640224807
-- 建议使用current_timestamp,有没有括号都可以
select current_timestamp(); --// 2021-12-23 09:57:57.638
-- 时间戳转日期
select from_unixtime(1505456567); --//2017-09-15 14:22:47
select from_unixtime(1505456567, 'yyyyMMdd'); --//20170915
select from_unixtime(1505456567, 'yyyy-MM-dd HH:mm:ss'); --// 2017-09-15 14:22:47
-- 日期转时间戳
select unix_timestamp('2019-09-15 14:23:00'); --//1568528580
-- 计算时间差
select datediff('2020-04-18','2019-11-21'); --//149
select datediff('2019-11-21', '2020-04-18'); --//-149
-- 查询该天是该月第几天
select dayofmonth(current_date); --//23
select dayofmonth('2021-12-23'); --//21
-- 计算月末:
select last_day(current_date); --//2021-12-31
select last_day('2021-12-15'); --//2021-12-31
-- 当月第1天:
select date_sub(current_date, dayofmonth(current_date)-1) --//2021-12-01
-- 下个月第1天:
select add_months(date_sub(current_date, dayofmonth(current_date)-1), 1);
-- 字符串转时间(字符串必须为:yyyy-MM-dd格式)
select to_date('2020-01-01');
select to_date('2020-01-01 12:12:12');
-- 日期、时间戳、字符串类型格式化输出标准时间格式
select date_format(current_timestamp(), 'yyyy-MM-dd HH:mm:ss');
select date_format(current_date(), 'yyyyMMdd');
select date_format('2020-06-01', 'yyyy-MM-dd HH:mm:ss');
-- 计算emp表中,每个人的工龄
select *, round(datediff(current_date, hiredate)/365,1) as workingyears
from emp;
-- 转小写。lower
select lower("HELLO WORLD");
-- 转大写。upper
select lower(ename), ename from emp;
-- 求字符串长度。length
select length(ename), ename from emp;
-- 字符串拼接。 concat / ||
select empno || " " ||ename idname from emp;
select concat(empno, " " ,ename) idname from emp;
-- 指定分隔符。concat_ws(separator, [string | array(string)]+)
SELECT concat_ws('.', 'www', array('lagou', 'com'));
select concat_ws(" ", ename, job) from emp;
-- 求子串。substr
SELECT substr('', 5);
SELECT substr('', -5);
SELECT substr('', 5, 5);
-- 字符串切分。split,注意 '.' 要转义
select split("", "\\.");
-- 四舍五入。round
select round(314.15926);
select round(314.15926, 2);
select round(314.15926, -2);
-- 向上取整。ceil
select ceil(3.1415926);
-- 向下取整。floor
select floor(3.1415926);
-- 其他数学函数包括:绝对值、平方、开方、对数运算、三角运算等
-- if (boolean testCondition, T valueTrue, T valueFalseOrNull)
select sal, if (sal<1500, 1, if (sal < 3000, 2, 3)) from emp;
-- 将emp表的员工工资等级分类:0-1500、1500-3000、3000以上
select sal, if (sal<=1500, 1, if (sal <= 3000, 2, 3)) from emp;
-- CASE WHEN a THEN b [WHEN c THEN d]* [ELSE e] END -- 复杂条件用 case when 更直观
select sal, case when sal<=1500 then 1
when sal<=3000 then 2
else 3 end sallevel
from emp;
-- 以下语句等价
select ename, deptno,
case deptno when 10 then 'accounting'
when 20 then 'research'
when 30 then 'sales'
else 'unknown' end deptname
from emp;
select ename, deptno,
case when deptno=10 then 'accounting'
when deptno=20 then 'research'
when deptno=30 then 'sales'
else 'unknown' end deptname
from emp;
-- COALESCE(T v1, T v2, ...)。返回参数中的第一个非空值;如果所有值都为 NULL,那么返回NULL
select sal, coalesce(comm, 0) from emp;
-- isnull(a) isnotnull(a)
select * from emp where isnull(comm);
select * from emp where isnotnull(comm);
-- nvl(T value, T default_value)
select empno, ename, job, mgr, hiredate, deptno, sal + nvl(comm,0) sumsal from emp;
-- nullif(x, y) 相等为空,否则为a
SELECT nullif("b", "b"), nullif("b", "a");
User Defined Table-Generating Functions
-- explode,炸裂函数
-- 就是将一行中复杂的 array 或者 map 结构拆分成多行
select explode(array('A','B','C')) as col;
select explode(map('a', 8, 'b', 88, 'c', 888));
-- UDTF's are not supported outside the SELECT clause, nor nested in expressions
-- SELECT pageid, explode(adid_list) AS myCol... is not supported
-- SELECT explode(explode(adid_list)) AS myCol... is not supported
-- lateral view 常与 表生成函数explode结合使用
-- lateral view 语法:
--Lateral View 用于和UDTF函数【explode,split】结合来使用。
lateralView: LATERAL VIEW udtf(expression) tableAlias AS columnAlias (',' columnAlias)*
fromClause: FROM baseTable (lateralView)*
-- lateral view 的基本使用
with t1 as (
select 'OK' cola, split('', '\\.') colb
select cola, colc from t1 lateral view explode(colb) t2 as colc;
select sum(age) from tb1;
错误: select id,sum(age) as sum_age from tb1;
select id,sum(age) over() as sum_age from tb1;
select id,sum(age) over(partition by id) as sum_age from tb1;
-- order by
select id
,sum(age) over(partition by id order by age) as sum_age
from tb1;
--Window子句 rows between ... and ...
select id
,sum(age) over(partition by id order by age
between 1 preceding and 1 following) as sum_age
from tb1;
row_number()。排名顺序增加不会重复;如1、2、3、4、… …
rank()。 排名相等会在名次中留下空位;如1、2、2、4、5、… …
dense_rank()。 排名相等会在名次中不会留下空位 ;如1、2、2、3、4、… …
select id,name,age
,row_number() over(partition by id order by age desc) as rank1
,rank() over(partition by id order by age desc) as rank2
,dense_rank() over(partition by id order by age desc) as rank3
from tb1;
select id, name, age,
ntile(2) over(partition by id order by age) as ntile
from tb1;
with tmp as (
select id, name, age,
dense_rank() over (partition by id order by age desc) as rank
from tb1)
select id, score, rank,
nvl(score - lag(score) over (partition by class order by score desc), 0) lagscore
from tmp
where rank<=3;
--WITH AS 语句可以为一个子查询语句块定义一个名称,功能类似临时表
-- with子句只能被select查询块引用
-- with子句的返回结果存到用户的临时表空间中,只做一次查询,反复使用,提高效率
-- 在同级select前有多个查询定义的时候,第1个用with,后面的不用with,并且用逗号隔开
-- 最后一个with 子句与下面的查询之间不能有逗号,只通过右括号分割,with 子句的查询必须用括号括起来
a as (select * from tb1),
b as (select * from tb2)
select * from a, b where =;