准备数据
zhangsa dfsadsa323 new 67.1 2
lisi 543gfd old 43.32 1
wanger 65ghf new 88.88 3
liiu fdsfagwe new 66.0 1
qibaqiu fds new 54.32 1
wangshi f332 old 77.77 2
liwei hfd old 88.44 3
wutong 543gdfsd new 56.55 6
lilisi dsfgg new 88.88 5
qishili fds new 66.66 5
create external table if not exists order_detail(user_id string,device_id string,user_type string, price decimal,sales int) row format delimited fields terminated by '\t' location '/hive-data/data';
## > < =
##注意: String 的比较要注意(常用的时间比较可以先 to_date 之后再比较)
select long_time>short_time, long_time<short_time,long_time=short_time, to_date(long_time)=to_date(short_time)
from
(
select '2017-01-11 00:00:00' as long_time, '2017-01-11' as short_time
from
order_detail limit 1
)bb;
result:
true false false true
select 1 from order_detail where NULL is Null limit 1;
select 1 from order_detail where 1 is not null limit 1;
LIKE
语法: A LIKE B
描述: 字符串A符合表达式B的正则语法,则为TRUE;否则为FALSE. B中字符”_”表示任意单个字符,而字符”%”表示任意数量的字符。
RLIKE
语法: A RLIKE B
描述: 字符串A符合JAVA正则表达式 B 的正则语法,则为 TRUE;否则为 FALSE。
REGEXP
语法: A REGEXP B
描述: 功能与 RLIKE 相同
注意:
精度在 hive 中是个很大的问题,类似这样的操作最好通过round 指定精度
select 8.4 % 4,round(8.4 % 4 , 2) from order_detail limit 1;
--round(xxx,2),小数点后一位
用decimal可以表示任意精度的带符号小数;
select 4&6, 8&4, 4|8,6|8,4^8,6^4,~6,~3 from order_detail limit 1;
--4 0 12 14 12 2 -7 -4
--00000100(4)
--00000110(6)
--00001000(8)
--00000011(3)
取整: round
指定精度取整: round
向下取整: floor
向上取整: ceil
向上取整: ceiling
取随机数: rand
自然指数: exp 自然对数: ln
以10为底对数: log10 以2为底对数: log2
对数: log
select log10(100),log2(8),log(4,256) from order_detail limit 1;
幂运算: pow, power 开平方: sqrt
二进制: bin 十六进制: hex 反转十六进制: unhex
进制转换: conv
绝对值:abs 正取余:pmod 正弦:sin 反正弦:asin 余弦:cos 反余弦:acos 返回A的值:positive 返回A的相反数:negative
UNIX时间戳转日期: from_unixtime
日期转UNIX时间戳,指定格式日期转UNIX 时间戳,获取当前UNIX时间戳: unix_timestamp
说明: 转换格式为"yyyy-MM-dd HH:mm:ss"的日期到 UNIX 时间戳。如果转化失败,则返回 0。
select
from_unixtime(1323308943),
from_unixtime(1323308943,'yyyyMMdd'),
unix_timestamp(),
unix_timestamp('2017-12-07 16:01:03'),
unix_timestamp('20171207 16-01-03','yyyyMMdd HH-mm-ss')
from
order_detail limit 1;
--2011-12-08 09:49:03 20111208 1566829811 1512633663 1512633663
当前时间:current_timestamp()(注意:unix_timestamp(void)已经过时,用curren_timestamp替代)
--2019-08-26 22:17:32.622
select
to_date('2016-12-08 10:03:01'),
year('2016-12-08 10:03:01'),
month('2016-12-08'),
day('2016-12-08 10:03:01'),
hour('2016-12-08 10:03:01'),
minute('2016-12-08 10:03:01'),
second('2016-12-08 10:03:01')
from
order_detail limit 1;
select to_date(current_timestamp());
select
weekofyear('2016-12-08 10:03:01'),
datediff('2016-12-08','2016-11-27')
from order_detail limit 1;
--49 11
select date_add('2016-12-08',10),date_add('2016-12-08',-10),
date_sub('2016-12-08',-10),date_sub('2016-12-08',10) from order_detail limit 1;
--2016-12-18 | 2016-11-28 | 2016-12-18 | 2016-11-28
select
date_add('20161208',10),
from_unixtime(unix_timestamp(date_add('2016-12-08',10)),'yyyyMMdd'),
from_unixtime(unix_timestamp(date_add('2016-12-08',10),'yyyy-MM-dd'),'yyyyMMdd')
from order_detail limit 1;
select user_id,device_id,user_type,sales,
if(user_type='new',user_id,'***'),
COALESCE(null,user_id,device_id,user_type),
COALESCE(null,null,device_id,user_type),
case user_type
when 'new' then 'new_user'
when 'old' then 'old_user'
else 'others' end,
case
when user_type='new' and sales>=5 then 'gold_user'
when user_type='old' and sales<3 then 'bronze_user'
else 'silver_user' end
from order_detail;
select
user_id,device_id,user_type,length(user_id),
reverse(user_id),
concat(user_id,device_id,user_type),
concat_ws('_',user_id,device_id,user_type)
from order_detail;
select user_id,regexp_replace(user_id, 'li|ng', '**'),
regexp_extract(user_id,'li(.*?)(si)',1),
regexp_extract(user_id,'li(.*?)(si)',2),
regexp_extract(user_id,'li(.*?)(si)',0)
from order_detail;
select
parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'HOST'),
parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'PATH'),
parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'QUERY'),
parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'QUERY','k2'),
parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'REF'),
parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'PROTOCOL'),
parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'AUTHORITY'),
parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'FILE')
from order_detail limit 1;
select
get_json_object(
'{"store":
{"fruit":\[{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}],
"bicycle":{"price":19.95,"color":"red"}
},
"email":"amy@only_for_json_udf_test.net",
"owner":"amy"
}',
'$.owner'),
get_json_object(
'{"store":
{"fruit":\[{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}],
"bicycle":{"price":19.95,"color":"red"}
},
"email":"amy@only_for_json_udf_test.net",
"owner":"amy"
}',
'$.store.fruit[0].type')
from order_detail limit 1;
select a.user_id, b.*
from order_detail a
lateral view
json_tuple('{"store":
{"fruit":\[{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}],
"bicycle":{"price":19.95,"color":"red"}
},
"email":"amy@only_for_json_udf_test.net",
"owner":"amy"
}', 'email', 'owner') b as email, owner limit 1;
SELECT b.*
from (
select 'http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1' as urlstr
from
order_detail
limit 1
)a
LATERAL VIEW
parse_url_tuple(a.urlstr, 'HOST', 'PATH', 'QUERY', 'QUERY:k1') b
as host, path, query, query_k1
LIMIT 1;
--facebook.com | /path1/p.php | k1=v1&k2=v2 | v1
string转map:str_to_map
select str_to_map('aaa:11&bbb:22', '&', ':')
select
count(*),count(user_type),count(distinct user_type),
sum(sales),sum(distinct sales)
from order_detail;
测试数据集:
tony 1338 hello,woddd 1,2 a1,a2,a3 k1:1.0,k2:2.0,k3:3.0 s1,s2,s3,4
mark 5453 kke,ladyg 2,3 a4,a5,a6 k4:4.0,k5:5.0,k2:6.0 s4,s5,s6,6
ivyfd 4323 aa,thq,dsx 3,6 a7,a8,a9 k7:7.0,k8:8.0,k2:9.0 s7,s8,s9,9
drop table employees;
create external table if not exists employees(
name string,
salary string,
happy_word string,
happy_num array<int>,
subordinates array<string>,
deductions map<string,float>,
address struct<street:string,city:string,state:string,zip:int>
)
row format delimited fields terminated by '\t'
collection items terminated by ','
map keys terminated by ':'
lines terminated by '\n'
stored as textfile;
hdfs dfs -put /home/liguodong/data/muldata.txt /temp/lgd
load data inpath '/temp/lgd/muldata.txt' overwrite into table employees;
select * from employees;
Getting log thread is interrupted, since query is done!
+--------+---------+--------------+------------+-------------------+-------------------------------+---------------------------------------------------+--+
| name | salary | happy_word | happy_num | subordinates | deductions | address |
+--------+---------+--------------+------------+-------------------+-------------------------------+---------------------------------------------------+--+
| tony | 1338 | hello,woddd | [1,2] | ["a1","a2","a3"] | {"k1":1.0,"k2":2.0,"k3":3.0} | {"street":"s1","city":"s2","state":"s3","zip":4} |
| mark | 5453 | kke,ladyg | [2,3] | ["a4","a5","a6"] | {"k4":4.0,"k5":5.0,"k2":6.0} | {"street":"s4","city":"s5","state":"s6","zip":6} |
| ivyfd | 4323 | aa,thq,dsx | [3,6] | ["a7","a8","a9"] | {"k7":7.0,"k8":8.0,"k2":9.0} | {"street":"s7","city":"s8","state":"s9","zip":9} |
+--------+---------+--------------+------------+-------------------+-------------------------------+---------------------------------------------------+--+
## 访问数组 Map 结构体
select
name,salary,
subordinates[1],deductions['k2'],deductions['k3'],address.city
from employees;
+--------+---------+------+------+-------+-------+--+
| name | salary | _c2 | _c3 | _c4 | city |
+--------+---------+------+------+-------+-------+--+
| tony | 1338 | a2 | 2.0 | 3.0 | s2 |
| mark | 5453 | a5 | 6.0 | NULL | s5 |
| ivyfd | 4323 | a8 | 9.0 | NULL | s8 |
+--------+---------+------+------+-------+-------+--+
## Map类型长度 Array类型长度
select size(deductions),size(subordinates) from employees limit 1;
+------+------+--+
| _c0 | _c1 |
+------+------+--+
| 3 | 3 |
+------+------+--+
## 类型转换: cast
select cast(salary as int),cast(deductions['k2'] as bigint) from employees;
+---------+------+--+
| salary | _c1 |
+---------+------+--+
| 1338 | 2 |
| 5453 | 6 |
| 4323 | 9 |
+---------+------+--+
### LATERAL VIEW 行转列
SELECT
name, ad_subordinate
FROM employees
LATERAL VIEW explode(subordinates) addTable AS ad_subordinate;
+--------+-----------------+--+
| name | ad_subordinate |
+--------+-----------------+--+
| tony | a1 |
| tony | a2 |
| tony | a3 |
| mark | a4 |
| mark | a5 |
| mark | a6 |
| ivyfd | a7 |
| ivyfd | a8 |
| ivyfd | a9 |
+--------+-----------------+--+
SELECT
name, count(1)
FROM employees
LATERAL VIEW explode(subordinates) addTable AS ad_subordinate
group by name;
+--------+------+--+
| name | _c1 |
+--------+------+--+
| ivyfd | 3 |
| mark | 3 |
| tony | 3 |
+--------+------+--+
SELECT ad_subordinate, ad_num
FROM employees
LATERAL VIEW explode(subordinates) addTable AS ad_subordinate
LATERAL VIEW explode(happy_num) addTable2 AS ad_num;
+-----------------+---------+--+
| ad_subordinate | ad_num |
+-----------------+---------+--+
| a1 | 1 |
| a1 | 2 |
| a2 | 1 |
| a2 | 2 |
| a3 | 1 |
| a3 | 2 |
| a4 | 2 |
| a4 | 3 |
| a5 | 2 |
| a5 | 3 |
| a6 | 2 |
| a6 | 3 |
| a7 | 3 |
| a7 | 6 |
| a8 | 3 |
| a8 | 6 |
| a9 | 3 |
| a9 | 6 |
+-----------------+---------+--+
### 多个LATERAL VIEW
SELECT
name, count(1)
FROM employees
LATERAL VIEW explode(subordinates) addTable AS ad_subordinate
LATERAL VIEW explode(happy_num) addTable2 AS ad_num
group by name;
+--------+------+--+
| name | _c1 |
+--------+------+--+
| ivyfd | 6 |
| mark | 6 |
| tony | 6 |
+--------+------+--+
### 不满足条件产生空行
SELECT AA.name, BB.* FROM employees AA
LATERAL VIEW
explode(array()) BB AS a limit 10;
+-------+----+--+
| name | a |
+-------+----+--+
+-------+----+--+
### OUTER 避免永远不产生结果,无满足条件的行,在该列会产生NULL值。
SELECT AA.name, BB.* FROM employees AA
LATERAL VIEW
OUTER explode(array()) BB AS a limit 10;
+--------+-------+--+
| name | a |
+--------+-------+--+
| tony | NULL |
| mark | NULL |
| ivyfd | NULL |
+--------+-------+--+
### 字符串切分成多列
SELECT
name, word
FROM employees
LATERAL VIEW explode(split(happy_word,',')) addTable AS word;
+--------+--------+--+
| name | word |
+--------+--------+--+
| tony | hello |
| tony | woddd |
| mark | kke |
| mark | ladyg |
| ivyfd | aa |
| ivyfd | thq |
| ivyfd | dsx |
+--------+--------+--+
### OUTER 避免永远不产生结果,无满足条件的行,在该列会产生NULL值。
SELECT AA.name, BB.* FROM employees AA
LATERAL VIEW
OUTER explode(array()) BB AS a limit 10;
+--------+-------+--+
| name | a |
+--------+-------+--+
| tony | NULL |
| mark | NULL |
| ivyfd | NULL |
+--------+-------+--+
### 字符串切分成多列
SELECT
name, word
FROM employees
LATERAL VIEW explode(split(happy_word,',')) addTable AS word;
+--------+--------+--+
| name | word |
+--------+--------+--+
| tony | hello |
| tony | woddd |
| mark | kke |
| mark | ladyg |
| ivyfd | aa |
| ivyfd | thq |
| ivyfd | dsx |
+--------+--------+--+