hive left join test

hive -e "create database tmp_houzz";


hive -e "
use tmp_houzz;

drop table if exists  testJoinA;
CREATE EXTERNAL TABLE testJoinA(
id  string,
name string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n'
STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'hdfs://webdm-cluster/user/houzhizhen/warehouse/tmp_houzz.db/testJoinA';
alter table testJoinA set serdeproperties('serialization.null.format' = '');
exit;
"

hive -e "
use tmp_houzz;
drop table if exists  testJoinB;
CREATE EXTERNAL TABLE testJoinB(
id  string,
name string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n'
STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'hdfs://webdm-cluster/user/houzhizhen/warehouse/tmp_houzz.db/testJoinB';
alter table testJoinB set serdeproperties('serialization.null.format' = '');
exit;
"

cat <<EOF >testJoinA.txt
1,a1
2,a2
3,a3
EOF


cat <<EOF >testJoinB.txt
2,b2
3,b3
4,b4
EOF


hive -e "
use tmp_houzz;
load data local inpath 'testJoinA.txt' overwrite into table testJoinA;
load data local inpath 'testJoinB.txt' overwrite into table testJoinB;
"

hive -e "
use tmp_houzz;
select a.id,a.name,b.id,b.name
from testJoinA a
inner join testJoinB b
on a.id=b.id
"
2    a2    2    b2
3    a3    3    b3


hive -e "
use tmp_houzz;
select a.id,a.name,b.id,b.name
from testJoinA a
left outer join testJoinB b
on a.id=b.id
"
1    a1    NULL    NULL
2    a2    2    b2
3    a3    3    b3

hive -e "
use tmp_houzz;
select a.id,a.name,b.id,b.name
from testJoinA a
left outer join testJoinB b
on a.id=b.id
where a.id<4
"
1    a1    NULL    NULL
2    a2    2    b2
3    a3    3    b3

hive -e "
use tmp_houzz;
select a.id,a.name,b.id,b.name
from testJoinA a
left outer join testJoinB b
on a.id=b.id
where a.id<3 and b.id=2
"
2    a2    2    b2


hive -e "
use tmp_houzz;
select a.id,a.name,b.id,b.name
from testJoinA a
left outer join testJoinB b
on a.id=b.id and b.id=2
where a.id<3
"
1    a1    NULL    NULL
2    a2    2    b2

hive -e "
use tmp_houzz;
explain
select a.id,a.name,b.id,b.name
from testJoinA a
left outer join testJoinB b
on a.id=b.id
where a.id<3 and b.id=2
"

STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 is a root stage

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Alias -> Map Operator Tree:
        a
          TableScan
            alias: a
            Filter Operator
              predicate:
                  expr: (id < 3.0)
                  type: boolean
              Reduce Output Operator
                key expressions:
                      expr: id
                      type: string
                sort order: +
                Map-reduce partition columns:
                      expr: id
                      type: string
                tag: 0
                value expressions:
                      expr: id
                      type: string
                      expr: name
                      type: string
        b
          TableScan
            alias: b
            Filter Operator
              predicate:
                  expr: (id < 3.0)
                  type: boolean
              Reduce Output Operator
                key expressions:
                      expr: id
                      type: string
                sort order: +
                Map-reduce partition columns:
                      expr: id
                      type: string
                tag: 1
                value expressions:
                      expr: id
                      type: string
                      expr: name
                      type: string
      Reduce Operator Tree:
        Join Operator
          condition map:
               Left Outer Join0 to 1
          condition expressions:
            0 {VALUE._col0} {VALUE._col1}
            1 {VALUE._col0} {VALUE._col1}
          handleSkewJoin: false
          outputColumnNames: _col0, _col1, _col4, _col5
          Filter Operator
            predicate:
                expr: (_col4 = 2.0)
                type: boolean
            Select Operator
              expressions:
                    expr: _col0
                    type: string
                    expr: _col1
                    type: string
                    expr: _col4
                    type: string
                    expr: _col5
                    type: string
              outputColumnNames: _col0, _col1, _col2, _col3
              File Output Operator
                compressed: false
                GlobalTableId: 0
                table:
                    input format: org.apache.hadoop.mapred.TextInputFormat
                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

  Stage: Stage-0
    Fetch Operator
      limit: -1


Time taken: 1.378 seconds



hive -e "
use tmp_houzz;
explain
select a.id,a.name,b.id,b.name
from testJoinA a
left outer join testJoinB b
on a.id=b.id and b.id=2
where a.id<3
"


STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 is a root stage

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Alias -> Map Operator Tree:
        a
          TableScan
            alias: a
            Filter Operator
              predicate:
                  expr: (id < 3.0)
                  type: boolean
              Reduce Output Operator
                key expressions:
                      expr: id
                      type: string
                sort order: +
                Map-reduce partition columns:
                      expr: id
                      type: string
                tag: 0
                value expressions:
                      expr: id
                      type: string
                      expr: name
                      type: string
        b
          TableScan
            alias: b
            Filter Operator
              predicate:
                  expr: ((id = 2.0) and (id < 3.0))
                  type: boolean
              Reduce Output Operator
                key expressions:
                      expr: id
                      type: string
                sort order: +
                Map-reduce partition columns:
                      expr: id
                      type: string
                tag: 1
                value expressions:
                      expr: id
                      type: string
                      expr: name
                      type: string
      Reduce Operator Tree:
        Join Operator
          condition map:
               Left Outer Join0 to 1
          condition expressions:
            0 {VALUE._col0} {VALUE._col1}
            1 {VALUE._col0} {VALUE._col1}
          handleSkewJoin: false
          outputColumnNames: _col0, _col1, _col4, _col5
          Select Operator
            expressions:
                  expr: _col0
                  type: string
                  expr: _col1
                  type: string
                  expr: _col4
                  type: string
                  expr: _col5
                  type: string
            outputColumnNames: _col0, _col1, _col2, _col3
            File Output Operator
              compressed: false
              GlobalTableId: 0
              table:
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

  Stage: Stage-0
    Fetch Operator
      limit: -1


Time taken: 1.377 seconds

你可能感兴趣的:(hive)