drop table if exists mydatabase.test;
create table mydatabase.test
(id int, name string, timestring string, salary double, bonus double)
row format delimited
fields terminated by '\t'
stored as textfile;
方式1
vim test.txt
hadoop fs -mkdir test
hadoop fs -put test.txt /user/myname/test
load data inpath '/user/myname/test'
overwrite into table mydatabase.test;方式2
insert into mydatabase.test values
(1,' J ','2018-01-08 10:11:32',128.54,-45.23),
(2,' J ','2018-02-09 10:51:12',128.54,-78.25),
(3,' J ','2018-03-05 11:22:21',128.52,null),
(4,' J ','2018-04-08 15:40:51',256.23,345.23),
(5,' J ','2018-05-08 10:21:21',128.54,267.12),
(6,' J ','2018-06-08 10:00:50',256.27,-78.49),
(7,'Rose','2018-01-08 10:11:32',512.65,-76.44),
(8,'Rose','2018-02-09 10:51:12',512.54,-45.30),
(9,'Rose','2018-03-05 11:22:21',512.13,-87.09),
(10,'Rose','2018-04-08 15:40:51',512.34,19.12),
(11,'Dickson','2018-01-08 10:21:21',256.87,null),
(12,'Dickson','2018-02-08 10:00:50',256.52,null),
(13,'Dickson','2018-04-08 11:00:00',256.12,3.69);
四舍五入
select id, round(salary) from mydatabase.test;四舍五入,小数保留
select id, round(salary, 1) from mydatabase.test;向下,向上取整
select id, floor(salary), ceil(salary) from mydatabase.test;随机数(0~1)
select id, salary*(1+rand()*0.1) from mydatabase.test;指数,对数,取模
select id, pow(e(), salary), log(e(), salary), pmod(id, 3) from mydatabase.test;绝对值,最大值,最小值
select id, abs(bonus), greatest(salary, bonus), least(salary, bonus) from mydatabase.test;
select id, cast(salary as int) from mydatabase.test;
当前时间
select id, name, unix_timestamp() from mydatabase.test;时间戳转换为字符串
select id, name, from_unixtime(unix_timestamp(), 'yyyy-MM-dd hh:mm:ss') from mydatabase.test;字符串转换为时间戳
select id, name, unix_timestamp('2019-02-13 11:22:33') from mydatabase.test;字符串转换为时间戳
select id, name, unix_timestamp('20190213 11:22:33', 'yyyyMMdd HH:mm:ss') from mydatabase.test;时间子元素
select id, name, to_date(timestring), year(timestring), month(timestring), day(timestring), hour(timestring), minute(timestring), second(timestring) from mydatabase.test;
IF条件
select id, if(bonus > 0, 'yes', 'no') from mydatabase.test;NULL判断
select id, isnull(bonus) from mydatabase.test;NULL条件,第二参数为默认值
select id, nvl(bonus, 0) from mydatabase.test;非空查找函数
select id, coalesce(bonus, 0, null) from mydatabase.test;CASE匹配条件
select id, name,
(case name
when 'Jack' then 'A'
when 'Rose' then 'B'
else 'C'
end)
from mydatabase.test;CASE搜索条件
select id, salary, bonus,
(case
when salary > 500 then 'A'
when salary > 100 and bonus > 0 then 'B'
else 'C'
end)
from mydatabase.test;
去重
select distinct(name) from mydatabase.test;计数
select count(*) from mydatabase.test;条件计数
select count(bonus > 0) from mydatabase.test;求和,求平均,最大,最小,方差
select name, sum(salary), avg(salary), min(salary), max(salary), variance(salary) from mydatabase.test group by name;生成列表
select name, collect_list(salary) from mydatabase.test group by name;生成非重列表
select name, collect_set(salary) from mydatabase.test group by name;
长度
select name, length(name) from mydatabase.test;查找
select name, locate('o', name) from mydatabase.test;左填充,右填充
select name, lpad(name, 4, '_'), rpad(name, 4, '_') from mydatabase.test;去除左空格,去除右空格,去除左右空格
select name, ltrim(name), rtrim(name), trim(name) from mydatabase.test;字符距离
select n1, n2, levenshtein(n1, n2) from
(select distinct(name) as n1 from mydatabase.test)db0
join
(select distinct(name) as n2 from mydatabase.test)db1
on n1 != n2;分割
select name, split(timestring, '-'), size(split(timestring, '-')) from mydatabase.test;分列
select name, timesplit from from mydatabase.test lateral view explode(split(timestring, '-')) s as timesplit;子字符串
select substr(name, 1, -1), substr(timestring, -8) from mydatabase.test;替换(注意转义替换可能需要四个斜杆)
select regexp_replace(timestring, '\\d+-\\d+-\\d+', '###') from mydatabase.test;提取(注意转义替换可能需要四个斜杆)
select regexp_extract(timestring, '\\d+', 1) from mydatabase.test;拼接
select name, concat(year(timestring), '|', month(timestring), '|', cast(salary as string)) from mydatabase.test;拼接列表
select name,
concat_ws('|',
collect_list(cast(salary as string))
) from mydatabase.test group by name;拼接有序列表
select name,
concat_ws('|',
sort_array(
collect_list(cast(salary as string))
)) from mydatabase.test group by name;拼接有序列表并去除排序因子(注意转义替换可能需要两个/四个斜杆)
select regexp_replace(
concat_ws('|',
sort_array(
collect_list(
concat(lpad(cast(rank as string), 3, '0'), ':', salary)
))), '\\d+\:', '') as lst
from (select name, salary, row_number() over(order by salary) as rank from mydatabase.test) db0;
EXPLODE
select id, part from mydatabase.test lateral view explode(split(timestring,' ')) t as part;
IN
select id, name from mydatabase.test where name in('Dickson', 'Rose');
ROW_NUMBER
select id, name, salary, row_number() over(partition by name order by salary desc) rank from mydatabase.test;RANK
select id, name, salary, rank() over(partition by name order by salary desc) rank from mydatabase.test;DENSE_RANK
select id, name, salary, dense_rank() over(partition by name order by salary desc) rank from mydatabase.test;SUM
select_id, name, sum(salary) over(partition by name order by timestring asc) sum_salary from mydatabase.test;BEFORE
select id, name, salary, lag(salary, 1) over(partition by name order by timestring asc) before_salary from mydatabase.test;AFTER
select id, name, salary, lead(salary, 1) over(partition by name order by timestring asc) after_salary from mydatabase.test;
GROUPING SETS
select month, day, sum(salary) from
(select month(timestring) month, day(timestring) day, salary from mydatabase.test) db
group by month, day grouping sets(month, (month, day)) order by month asc, day asc;CUBE
select month, day, sum(salary) from
(select month(timestring) month, day(timestring) day, salary from mydatabase.test) db
group by month, day with cube order by month asc, day asc;ROLLUP
select month, day, sum(salary) from
(select month(timestring) month, day(timestring) day, salary from mydatabase.test) db
group by month, day with rollup order by month asc, day asc;
参考文献:
https://www.cnblogs.com/MOBIN/p/5618747.html#7
https://blog.csdn.net/zhanaolu4821/article/details/81871041
https://baijiahao.baidu.com/s?id=1613382585734336695&wfr=spider&for=pc
https://blog.csdn.net/guodong2k/article/details/79459282
https://www.cnblogs.com/zhaohz/p/4672943.html
https://www.cnblogs.com/Allen-rg/p/9268627.html