hadoop-hive-关于IN与NOT IN

此为自关联, NOT IN 则需要左外关联,u_sina.user is null;


echo a,sinax,1 >> USER_URL_VAL.txt

echo a,163x,2 >> USER_URL_VAL.txt
echo a,sohu,3 >> USER_URL_VAL.txt
echo b,sinax,4 >> USER_URL_VAL.txt
echo b,haha,5 >> USER_URL_VAL.txt
echo a,sinoix,6 >> USER_URL_VAL.txt

create table user_url_val (user string, url string, val string) row format delimited fields temporary by ','; 

load data local inpath '/tmp/USER_URL_VAL.txt' into table user_url_val;  

select * from user_url_val u join
(select user from user_url_val where url='163x') u_sina
on u.user=u_sina.user
where u.url='sinax';

a sinax 1 a


select * from user_url_val u left outer join 
(select user from user_url_val where url='163x') u_sina
on u.user=u_sina.user
where u.url='sinax' and u_sina.user is null;

b sinax 4 NULL

你可能感兴趣的:(Hadoop)