hive -e "create database tmp_houzz";
hive -e "
use tmp_houzz;
drop table if exists testJoinA;
CREATE EXTERNAL TABLE testJoinA(
id string,
name string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n'
STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'hdfs://webdm-cluster/user/houzhizhen/warehouse/tmp_houzz.db/testJoinA';
alter table testJoinA set serdeproperties('serialization.null.format' = '');
exit;
"
hive -e "
use tmp_houzz;
drop table if exists testJoinB;
CREATE EXTERNAL TABLE testJoinB(
id string,
name string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n'
STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'hdfs://webdm-cluster/user/houzhizhen/warehouse/tmp_houzz.db/testJoinB';
alter table testJoinB set serdeproperties('serialization.null.format' = '');
exit;
"
cat <<EOF >testJoinA.txt
1,a1
2,a2
3,a3
EOF
cat <<EOF >testJoinB.txt
2,b2
3,b3
4,b4
EOF
hive -e "
use tmp_houzz;
load data local inpath 'testJoinA.txt' overwrite into table testJoinA;
load data local inpath 'testJoinB.txt' overwrite into table testJoinB;
"
hive -e "
use tmp_houzz;
select a.id,a.name,b.id,b.name
from testJoinA a
inner join testJoinB b
on a.id=b.id
"
2 a2 2 b2
3 a3 3 b3
hive -e "
use tmp_houzz;
select a.id,a.name,b.id,b.name
from testJoinA a
left outer join testJoinB b
on a.id=b.id
"
1 a1 NULL NULL
2 a2 2 b2
3 a3 3 b3
hive -e "
use tmp_houzz;
select a.id,a.name,b.id,b.name
from testJoinA a
left outer join testJoinB b
on a.id=b.id
where a.id<4
"
1 a1 NULL NULL
2 a2 2 b2
3 a3 3 b3
hive -e "
use tmp_houzz;
select a.id,a.name,b.id,b.name
from testJoinA a
left outer join testJoinB b
on a.id=b.id
where a.id<3 and b.id=2
"
2 a2 2 b2
hive -e "
use tmp_houzz;
select a.id,a.name,b.id,b.name
from testJoinA a
left outer join testJoinB b
on a.id=b.id and b.id=2
where a.id<3
"
1 a1 NULL NULL
2 a2 2 b2
hive -e "
use tmp_houzz;
explain
select a.id,a.name,b.id,b.name
from testJoinA a
left outer join testJoinB b
on a.id=b.id
where a.id<3 and b.id=2
"
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 is a root stage
STAGE PLANS:
Stage: Stage-1
Map Reduce
Alias -> Map Operator Tree:
a
TableScan
alias: a
Filter Operator
predicate:
expr: (id < 3.0)
type: boolean
Reduce Output Operator
key expressions:
expr: id
type: string
sort order: +
Map-reduce partition columns:
expr: id
type: string
tag: 0
value expressions:
expr: id
type: string
expr: name
type: string
b
TableScan
alias: b
Filter Operator
predicate:
expr: (id < 3.0)
type: boolean
Reduce Output Operator
key expressions:
expr: id
type: string
sort order: +
Map-reduce partition columns:
expr: id
type: string
tag: 1
value expressions:
expr: id
type: string
expr: name
type: string
Reduce Operator Tree:
Join Operator
condition map:
Left Outer Join0 to 1
condition expressions:
0 {VALUE._col0} {VALUE._col1}
1 {VALUE._col0} {VALUE._col1}
handleSkewJoin: false
outputColumnNames: _col0, _col1, _col4, _col5
Filter Operator
predicate:
expr: (_col4 = 2.0)
type: boolean
Select Operator
expressions:
expr: _col0
type: string
expr: _col1
type: string
expr: _col4
type: string
expr: _col5
type: string
outputColumnNames: _col0, _col1, _col2, _col3
File Output Operator
compressed: false
GlobalTableId: 0
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Stage: Stage-0
Fetch Operator
limit: -1
Time taken: 1.378 seconds
hive -e "
use tmp_houzz;
explain
select a.id,a.name,b.id,b.name
from testJoinA a
left outer join testJoinB b
on a.id=b.id and b.id=2
where a.id<3
"
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 is a root stage
STAGE PLANS:
Stage: Stage-1
Map Reduce
Alias -> Map Operator Tree:
a
TableScan
alias: a
Filter Operator
predicate:
expr: (id < 3.0)
type: boolean
Reduce Output Operator
key expressions:
expr: id
type: string
sort order: +
Map-reduce partition columns:
expr: id
type: string
tag: 0
value expressions:
expr: id
type: string
expr: name
type: string
b
TableScan
alias: b
Filter Operator
predicate:
expr: ((id = 2.0) and (id < 3.0))
type: boolean
Reduce Output Operator
key expressions:
expr: id
type: string
sort order: +
Map-reduce partition columns:
expr: id
type: string
tag: 1
value expressions:
expr: id
type: string
expr: name
type: string
Reduce Operator Tree:
Join Operator
condition map:
Left Outer Join0 to 1
condition expressions:
0 {VALUE._col0} {VALUE._col1}
1 {VALUE._col0} {VALUE._col1}
handleSkewJoin: false
outputColumnNames: _col0, _col1, _col4, _col5
Select Operator
expressions:
expr: _col0
type: string
expr: _col1
type: string
expr: _col4
type: string
expr: _col5
type: string
outputColumnNames: _col0, _col1, _col2, _col3
File Output Operator
compressed: false
GlobalTableId: 0
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Stage: Stage-0
Fetch Operator
limit: -1
Time taken: 1.377 seconds