CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE

load data local inpath 'seed_data_files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin
Copying file: file:/root/bigtop/bigtop-tests/test-execution/smokes/hive/target/seed_data_files/srcbucket20.txt

load data local inpath 'seed_data_files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin
Copying file: file:/root/bigtop/bigtop-tests/test-execution/smokes/hive/target/seed_data_files/srcbucket21.txt


CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE

load data local inpath 'seed_data_files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08')
Copying file: file:/root/bigtop/bigtop-tests/test-execution/smokes/hive/target/seed_data_files/srcbucket20.txt

load data local inpath 'seed_data_files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08')
Copying file: file:/root/bigtop/bigtop-tests/test-execution/smokes/hive/target/seed_data_files/srcbucket21.txt

load data local inpath 'seed_data_files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08')
Copying file: file:/root/bigtop/bigtop-tests/test-execution/smokes/hive/target/seed_data_files/srcbucket22.txt

load data local inpath 'seed_data_files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08')
Copying file: file:/root/bigtop/bigtop-tests/test-execution/smokes/hive/target/seed_data_files/srcbucket23.txt

load data local inpath 'seed_data_files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09')
Copying file: file:/root/bigtop/bigtop-tests/test-execution/smokes/hive/target/seed_data_files/srcbucket20.txt

load data local inpath 'seed_data_files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09')
Copying file: file:/root/bigtop/bigtop-tests/test-execution/smokes/hive/target/seed_data_files/srcbucket21.txt

load data local inpath 'seed_data_files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09')
Copying file: file:/root/bigtop/bigtop-tests/test-execution/smokes/hive/target/seed_data_files/srcbucket22.txt

load data local inpath 'seed_data_files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09')
Copying file: file:/root/bigtop/bigtop-tests/test-execution/smokes/hive/target/seed_data_files/srcbucket23.txt


CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE

load data local inpath 'seed_data_files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08')
Copying file: file:/root/bigtop/bigtop-tests/test-execution/smokes/hive/target/seed_data_files/srcbucket22.txt

load data local inpath 'seed_data_files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08')
Copying file: file:/root/bigtop/bigtop-tests/test-execution/smokes/hive/target/seed_data_files/srcbucket23.txt

load data local inpath 'seed_data_files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09')
Copying file: file:/root/bigtop/bigtop-tests/test-execution/smokes/hive/target/seed_data_files/srcbucket22.txt

load data local inpath 'seed_data_files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09')
Copying file: file:/root/bigtop/bigtop-tests/test-execution/smokes/hive/target/seed_data_files/srcbucket23.txt


create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint)

create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint)
set hive.optimize.bucketmapjoin = true

create table bucketmapjoin_tmp_result (key string , value1 string, value2 string)


explain extended
insert overwrite table bucketmapjoin_tmp_result 
select /*+mapjoin(a)*/ a.key, a.value, b.value 
from srcbucket_mapjoin a join srcbucket_mapjoin_part b 
on a.key=b.key
ABSTRACT SYNTAX TREE:
  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME bucketmapjoin_tmp_result))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value)))))

STAGE DEPENDENCIES:
  Stage-9 is a root stage
  Stage-1 depends on stages: Stage-9
  Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
  Stage-4
  Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
  Stage-2 depends on stages: Stage-0
  Stage-3
  Stage-5
  Stage-6 depends on stages: Stage-5

STAGE PLANS:
  Stage: Stage-9
    Map Reduce Local Work
      Alias -> Map Local Tables:
        a 
          Fetch Operator
            limit: -1
      Alias -> Map Local Operator Tree:
        a 
          TableScan
            alias: a
            GatherStats: false
            HashTable Sink Operator
              condition expressions:
                0 {key} {value}
                1 {value}
              handleSkewJoin: false
              keys:
                0 [Column[key]]
                1 [Column[key]]
              Position of Big Table: 1
      Bucket Mapjoin Context:
          Alias Bucket Base File Name Mapping:
            a {ds=2008-04-08/srcbucket20.txt=[srcbucket20.txt], ds=2008-04-08/srcbucket21.txt=[srcbucket21.txt], ds=2008-04-08/srcbucket22.txt=[srcbucket20.txt], ds=2008-04-08/srcbucket23.txt=[srcbucket21.txt], ds=2008-04-09/srcbucket20.txt=[srcbucket20.txt], ds=2008-04-09/srcbucket21.txt=[srcbucket21.txt], ds=2008-04-09/srcbucket22.txt=[srcbucket20.txt], ds=2008-04-09/srcbucket23.txt=[srcbucket21.txt]}
          Alias Bucket File Name Mapping:
            a {hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt=[hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin/srcbucket20.txt], hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt=[hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin/srcbucket21.txt], hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt=[hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin/srcbucket20.txt], hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt=[hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin/srcbucket21.txt], hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket20.txt=[hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin/srcbucket20.txt], hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket21.txt=[hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin/srcbucket21.txt], hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket22.txt=[hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin/srcbucket20.txt], hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket23.txt=[hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin/srcbucket21.txt]}
          Alias Bucket Output File Name Mapping:
            hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt 0
            hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt 1
            hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt 2
            hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt 3
            hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket20.txt 0
            hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket21.txt 1
            hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket22.txt 2
            hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket23.txt 3

  Stage: Stage-1
    Map Reduce
      Alias -> Map Operator Tree:
        b 
          TableScan
            alias: b
            GatherStats: false
            Map Join Operator
              condition map:
                   Inner Join 0 to 1
              condition expressions:
                0 {key} {value}
                1 {value}
              handleSkewJoin: false
              keys:
                0 [Column[key]]
                1 [Column[key]]
              outputColumnNames: _col0, _col1, _col5
              Position of Big Table: 1
              Select Operator
                expressions:
                      expr: _col0
                      type: int
                      expr: _col1
                      type: string
                      expr: _col5
                      type: string
                outputColumnNames: _col0, _col1, _col5
                Select Operator
                  expressions:
                        expr: _col0
                        type: int
                        expr: _col1
                        type: string
                        expr: _col5
                        type: string
                  outputColumnNames: _col0, _col1, _col2
                  File Output Operator
                    compressed: false
                    GlobalTableId: 1
                    directory: hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-00-39_907_3836130325622754442/-ext-10002
                    NumFilesPerFileSink: 1
                    Stats Publishing Key Prefix: hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-00-39_907_3836130325622754442/-ext-10000/
                    table:
                        input format: org.apache.hadoop.mapred.TextInputFormat
                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                        properties:
                          bucket_count -1
                          columns key,value1,value2
                          columns.types string:string:string
                          file.inputformat org.apache.hadoop.mapred.TextInputFormat
                          file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                          location hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/bucketmapjoin_tmp_result
                          name default.bucketmapjoin_tmp_result
                          serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
                          serialization.format 1
                          serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                          transient_lastDdlTime 1366743639
                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                        name: default.bucketmapjoin_tmp_result
                    TotalFiles: 1
                    GatherStats: true
                    MultiFileSpray: false
      Local Work:
        Map Reduce Local Work
      Needs Tagging: false
      Path -> Alias:
        hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 [b]
        hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part/ds=2008-04-09 [b]
      Path -> Partition:
        hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 
          Partition
            base file name: ds=2008-04-08
            input format: org.apache.hadoop.mapred.TextInputFormat
            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
            partition values:
              ds 2008-04-08
            properties:
              bucket_count 4
              bucket_field_name key
              columns key,value
              columns.types int:string
              file.inputformat org.apache.hadoop.mapred.TextInputFormat
              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
              location hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part/ds=2008-04-08
              name default.srcbucket_mapjoin_part
              numFiles 4
              numPartitions 2
              numRows 0
              partition_columns ds
              rawDataSize 0
              serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value}
              serialization.format 1
              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              totalSize 5812
              transient_lastDdlTime 1366743633
            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
          
              input format: org.apache.hadoop.mapred.TextInputFormat
              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
              properties:
                bucket_count 4
                bucket_field_name key
                columns key,value
                columns.types int:string
                file.inputformat org.apache.hadoop.mapred.TextInputFormat
                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                location hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part
                name default.srcbucket_mapjoin_part
                numFiles 8
                numPartitions 2
                numRows 0
                partition_columns ds
                rawDataSize 0
                serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value}
                serialization.format 1
                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                totalSize 11624
                transient_lastDdlTime 1366743636
              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              name: default.srcbucket_mapjoin_part
            name: default.srcbucket_mapjoin_part
        hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part/ds=2008-04-09 
          Partition
            base file name: ds=2008-04-09
            input format: org.apache.hadoop.mapred.TextInputFormat
            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
            partition values:
              ds 2008-04-09
            properties:
              bucket_count 4
              bucket_field_name key
              columns key,value
              columns.types int:string
              file.inputformat org.apache.hadoop.mapred.TextInputFormat
              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
              location hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part/ds=2008-04-09
              name default.srcbucket_mapjoin_part
              numFiles 4
              numPartitions 2
              numRows 0
              partition_columns ds
              rawDataSize 0
              serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value}
              serialization.format 1
              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              totalSize 5812
              transient_lastDdlTime 1366743636
            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
          
              input format: org.apache.hadoop.mapred.TextInputFormat
              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
              properties:
                bucket_count 4
                bucket_field_name key
                columns key,value
                columns.types int:string
                file.inputformat org.apache.hadoop.mapred.TextInputFormat
                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                location hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part
                name default.srcbucket_mapjoin_part
                numFiles 8
                numPartitions 2
                numRows 0
                partition_columns ds
                rawDataSize 0
                serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value}
                serialization.format 1
                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                totalSize 11624
                transient_lastDdlTime 1366743636
              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              name: default.srcbucket_mapjoin_part
            name: default.srcbucket_mapjoin_part
      Truncated Path -> Alias:
        /srcbucket_mapjoin_part/ds=2008-04-08 [b]
        /srcbucket_mapjoin_part/ds=2008-04-09 [b]

  Stage: Stage-7
    Conditional Operator

  Stage: Stage-4
    Move Operator
      files:
          hdfs directory: true
          source: hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-00-39_907_3836130325622754442/-ext-10002
          destination: hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-00-39_907_3836130325622754442/-ext-10000

  Stage: Stage-0
    Move Operator
      tables:
          replace: true
          source: hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-00-39_907_3836130325622754442/-ext-10000
          table:
              input format: org.apache.hadoop.mapred.TextInputFormat
              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
              properties:
                bucket_count -1
                columns key,value1,value2
                columns.types string:string:string
                file.inputformat org.apache.hadoop.mapred.TextInputFormat
                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                location hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/bucketmapjoin_tmp_result
                name default.bucketmapjoin_tmp_result
                serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
                serialization.format 1
                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                transient_lastDdlTime 1366743639
              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              name: default.bucketmapjoin_tmp_result
          tmp directory: hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-00-39_907_3836130325622754442/-ext-10001

  Stage: Stage-2
    Stats-Aggr Operator
      Stats Aggregation Key Prefix: hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-00-39_907_3836130325622754442/-ext-10000/

  Stage: Stage-3
    Map Reduce
      Alias -> Map Operator Tree:
        hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-00-39_907_3836130325622754442/-ext-10002 
            File Output Operator
              compressed: false
              GlobalTableId: 0
              directory: hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-00-39_907_3836130325622754442/-ext-10000
              NumFilesPerFileSink: 1
              table:
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                  properties:
                    bucket_count -1
                    columns key,value1,value2
                    columns.types string:string:string
                    file.inputformat org.apache.hadoop.mapred.TextInputFormat
                    file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                    location hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/bucketmapjoin_tmp_result
                    name default.bucketmapjoin_tmp_result
                    serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
                    serialization.format 1
                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    transient_lastDdlTime 1366743639
                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                  name: default.bucketmapjoin_tmp_result
              TotalFiles: 1
              GatherStats: false
              MultiFileSpray: false
      Needs Tagging: false
      Path -> Alias:
        hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-00-39_907_3836130325622754442/-ext-10002 [hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-00-39_907_3836130325622754442/-ext-10002]
      Path -> Partition:
        hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-00-39_907_3836130325622754442/-ext-10002 
          Partition
            base file name: -ext-10002
            input format: org.apache.hadoop.mapred.TextInputFormat
            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
            properties:
              bucket_count -1
              columns key,value1,value2
              columns.types string:string:string
              file.inputformat org.apache.hadoop.mapred.TextInputFormat
              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
              location hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/bucketmapjoin_tmp_result
              name default.bucketmapjoin_tmp_result
              serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
              serialization.format 1
              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              transient_lastDdlTime 1366743639
            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
          
              input format: org.apache.hadoop.mapred.TextInputFormat
              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
              properties:
                bucket_count -1
                columns key,value1,value2
                columns.types string:string:string
                file.inputformat org.apache.hadoop.mapred.TextInputFormat
                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                location hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/bucketmapjoin_tmp_result
                name default.bucketmapjoin_tmp_result
                serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
                serialization.format 1
                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                transient_lastDdlTime 1366743639
              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              name: default.bucketmapjoin_tmp_result
            name: default.bucketmapjoin_tmp_result
      Truncated Path -> Alias:
        hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-00-39_907_3836130325622754442/-ext-10002 [hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-00-39_907_3836130325622754442/-ext-10002]

  Stage: Stage-5
    Map Reduce
      Alias -> Map Operator Tree:
        hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-00-39_907_3836130325622754442/-ext-10002 
            File Output Operator
              compressed: false
              GlobalTableId: 0
              directory: hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-00-39_907_3836130325622754442/-ext-10000
              NumFilesPerFileSink: 1
              table:
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                  properties:
                    bucket_count -1
                    columns key,value1,value2
                    columns.types string:string:string
                    file.inputformat org.apache.hadoop.mapred.TextInputFormat
                    file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                    location hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/bucketmapjoin_tmp_result
                    name default.bucketmapjoin_tmp_result
                    serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
                    serialization.format 1
                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    transient_lastDdlTime 1366743639
                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                  name: default.bucketmapjoin_tmp_result
              TotalFiles: 1
              GatherStats: false
              MultiFileSpray: false
      Needs Tagging: false
      Path -> Alias:
        hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-00-39_907_3836130325622754442/-ext-10002 [hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-00-39_907_3836130325622754442/-ext-10002]
      Path -> Partition:
        hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-00-39_907_3836130325622754442/-ext-10002 
          Partition
            base file name: -ext-10002
            input format: org.apache.hadoop.mapred.TextInputFormat
            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
            properties:
              bucket_count -1
              columns key,value1,value2
              columns.types string:string:string
              file.inputformat org.apache.hadoop.mapred.TextInputFormat
              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
              location hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/bucketmapjoin_tmp_result
              name default.bucketmapjoin_tmp_result
              serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
              serialization.format 1
              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              transient_lastDdlTime 1366743639
            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
          
              input format: org.apache.hadoop.mapred.TextInputFormat
              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
              properties:
                bucket_count -1
                columns key,value1,value2
                columns.types string:string:string
                file.inputformat org.apache.hadoop.mapred.TextInputFormat
                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                location hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/bucketmapjoin_tmp_result
                name default.bucketmapjoin_tmp_result
                serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
                serialization.format 1
                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                transient_lastDdlTime 1366743639
              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              name: default.bucketmapjoin_tmp_result
            name: default.bucketmapjoin_tmp_result
      Truncated Path -> Alias:
        hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-00-39_907_3836130325622754442/-ext-10002 [hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-00-39_907_3836130325622754442/-ext-10002]

  Stage: Stage-6
    Move Operator
      files:
          hdfs directory: true
          source: hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-00-39_907_3836130325622754442/-ext-10002
          destination: hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-00-39_907_3836130325622754442/-ext-10000




insert overwrite table bucketmapjoin_tmp_result 
select /*+mapjoin(a)*/ a.key, a.value, b.value 
from srcbucket_mapjoin a join srcbucket_mapjoin_part b 
on a.key=b.key
Deleted /user/hive/warehouse/bucketmapjoin_tmp_result


select count(1) from bucketmapjoin_tmp_result
928

insert overwrite table bucketmapjoin_hash_result_1
select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result
Deleted /user/hive/warehouse/bucketmapjoin_hash_result_1
set hive.optimize.bucketmapjoin = false

insert overwrite table bucketmapjoin_tmp_result 
select /*+mapjoin(a)*/ a.key, a.value, b.value 
from srcbucket_mapjoin a join srcbucket_mapjoin_part b 
on a.key=b.key
Deleted /user/hive/warehouse/bucketmapjoin_tmp_result


select count(1) from bucketmapjoin_tmp_result
928

insert overwrite table bucketmapjoin_hash_result_2
select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result
Deleted /user/hive/warehouse/bucketmapjoin_hash_result_2


select a.key-b.key, a.value1-b.value1, a.value2-b.value2
from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b
on a.key = b.key
0	0	0
set hive.optimize.bucketmapjoin = true

explain extended
insert overwrite table bucketmapjoin_tmp_result 
select /*+mapjoin(a)*/ a.key, a.value, b.value 
from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b 
on a.key=b.key
ABSTRACT SYNTAX TREE:
  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME bucketmapjoin_tmp_result))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value)))))

STAGE DEPENDENCIES:
  Stage-9 is a root stage
  Stage-1 depends on stages: Stage-9
  Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
  Stage-4
  Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
  Stage-2 depends on stages: Stage-0
  Stage-3
  Stage-5
  Stage-6 depends on stages: Stage-5

STAGE PLANS:
  Stage: Stage-9
    Map Reduce Local Work
      Alias -> Map Local Tables:
        a 
          Fetch Operator
            limit: -1
      Alias -> Map Local Operator Tree:
        a 
          TableScan
            alias: a
            GatherStats: false
            HashTable Sink Operator
              condition expressions:
                0 {key} {value}
                1 {value}
              handleSkewJoin: false
              keys:
                0 [Column[key]]
                1 [Column[key]]
              Position of Big Table: 1
      Bucket Mapjoin Context:
          Alias Bucket Base File Name Mapping:
            a {ds=2008-04-08/srcbucket22.txt=[srcbucket20.txt], ds=2008-04-08/srcbucket23.txt=[srcbucket21.txt], ds=2008-04-09/srcbucket22.txt=[srcbucket20.txt], ds=2008-04-09/srcbucket23.txt=[srcbucket21.txt]}
          Alias Bucket File Name Mapping:
            a {hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt=[hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin/srcbucket20.txt], hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt=[hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin/srcbucket21.txt], hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-09/srcbucket22.txt=[hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin/srcbucket20.txt], hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-09/srcbucket23.txt=[hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin/srcbucket21.txt]}
          Alias Bucket Output File Name Mapping:
            hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt 0
            hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt 1
            hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-09/srcbucket22.txt 0
            hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-09/srcbucket23.txt 1

  Stage: Stage-1
    Map Reduce
      Alias -> Map Operator Tree:
        b 
          TableScan
            alias: b
            GatherStats: false
            Map Join Operator
              condition map:
                   Inner Join 0 to 1
              condition expressions:
                0 {key} {value}
                1 {value}
              handleSkewJoin: false
              keys:
                0 [Column[key]]
                1 [Column[key]]
              outputColumnNames: _col0, _col1, _col5
              Position of Big Table: 1
              Select Operator
                expressions:
                      expr: _col0
                      type: int
                      expr: _col1
                      type: string
                      expr: _col5
                      type: string
                outputColumnNames: _col0, _col1, _col5
                Select Operator
                  expressions:
                        expr: _col0
                        type: int
                        expr: _col1
                        type: string
                        expr: _col5
                        type: string
                  outputColumnNames: _col0, _col1, _col2
                  File Output Operator
                    compressed: false
                    GlobalTableId: 1
                    directory: hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-03-31_286_2769775196968737402/-ext-10002
                    NumFilesPerFileSink: 1
                    Stats Publishing Key Prefix: hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-03-31_286_2769775196968737402/-ext-10000/
                    table:
                        input format: org.apache.hadoop.mapred.TextInputFormat
                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                        properties:
                          bucket_count -1
                          columns key,value1,value2
                          columns.types string:string:string
                          file.inputformat org.apache.hadoop.mapred.TextInputFormat
                          file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                          location hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/bucketmapjoin_tmp_result
                          name default.bucketmapjoin_tmp_result
                          numFiles 1
                          numPartitions 0
                          numRows 0
                          rawDataSize 0
                          serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
                          serialization.format 1
                          serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                          totalSize 17966
                          transient_lastDdlTime 1366743737
                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                        name: default.bucketmapjoin_tmp_result
                    TotalFiles: 1
                    GatherStats: true
                    MultiFileSpray: false
      Local Work:
        Map Reduce Local Work
      Needs Tagging: false
      Path -> Alias:
        hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08 [b]
        hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-09 [b]
      Path -> Partition:
        hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08 
          Partition
            base file name: ds=2008-04-08
            input format: org.apache.hadoop.mapred.TextInputFormat
            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
            partition values:
              ds 2008-04-08
            properties:
              bucket_count 2
              bucket_field_name key
              columns key,value
              columns.types int:string
              file.inputformat org.apache.hadoop.mapred.TextInputFormat
              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
              location hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08
              name default.srcbucket_mapjoin_part_2
              numFiles 2
              numPartitions 2
              numRows 0
              partition_columns ds
              rawDataSize 0
              serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
              serialization.format 1
              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              totalSize 3062
              transient_lastDdlTime 1366743638
            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
          
              input format: org.apache.hadoop.mapred.TextInputFormat
              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
              properties:
                bucket_count 2
                bucket_field_name key
                columns key,value
                columns.types int:string
                file.inputformat org.apache.hadoop.mapred.TextInputFormat
                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                location hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part_2
                name default.srcbucket_mapjoin_part_2
                numFiles 4
                numPartitions 2
                numRows 0
                partition_columns ds
                rawDataSize 0
                serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
                serialization.format 1
                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                totalSize 6124
                transient_lastDdlTime 1366743639
              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              name: default.srcbucket_mapjoin_part_2
            name: default.srcbucket_mapjoin_part_2
        hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-09 
          Partition
            base file name: ds=2008-04-09
            input format: org.apache.hadoop.mapred.TextInputFormat
            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
            partition values:
              ds 2008-04-09
            properties:
              bucket_count 2
              bucket_field_name key
              columns key,value
              columns.types int:string
              file.inputformat org.apache.hadoop.mapred.TextInputFormat
              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
              location hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-09
              name default.srcbucket_mapjoin_part_2
              numFiles 2
              numPartitions 2
              numRows 0
              partition_columns ds
              rawDataSize 0
              serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
              serialization.format 1
              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              totalSize 3062
              transient_lastDdlTime 1366743639
            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
          
              input format: org.apache.hadoop.mapred.TextInputFormat
              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
              properties:
                bucket_count 2
                bucket_field_name key
                columns key,value
                columns.types int:string
                file.inputformat org.apache.hadoop.mapred.TextInputFormat
                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                location hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/srcbucket_mapjoin_part_2
                name default.srcbucket_mapjoin_part_2
                numFiles 4
                numPartitions 2
                numRows 0
                partition_columns ds
                rawDataSize 0
                serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
                serialization.format 1
                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                totalSize 6124
                transient_lastDdlTime 1366743639
              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              name: default.srcbucket_mapjoin_part_2
            name: default.srcbucket_mapjoin_part_2
      Truncated Path -> Alias:
        /srcbucket_mapjoin_part_2/ds=2008-04-08 [b]
        /srcbucket_mapjoin_part_2/ds=2008-04-09 [b]

  Stage: Stage-7
    Conditional Operator

  Stage: Stage-4
    Move Operator
      files:
          hdfs directory: true
          source: hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-03-31_286_2769775196968737402/-ext-10002
          destination: hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-03-31_286_2769775196968737402/-ext-10000

  Stage: Stage-0
    Move Operator
      tables:
          replace: true
          source: hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-03-31_286_2769775196968737402/-ext-10000
          table:
              input format: org.apache.hadoop.mapred.TextInputFormat
              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
              properties:
                bucket_count -1
                columns key,value1,value2
                columns.types string:string:string
                file.inputformat org.apache.hadoop.mapred.TextInputFormat
                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                location hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/bucketmapjoin_tmp_result
                name default.bucketmapjoin_tmp_result
                numFiles 1
                numPartitions 0
                numRows 0
                rawDataSize 0
                serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
                serialization.format 1
                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                totalSize 17966
                transient_lastDdlTime 1366743737
              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              name: default.bucketmapjoin_tmp_result
          tmp directory: hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-03-31_286_2769775196968737402/-ext-10001

  Stage: Stage-2
    Stats-Aggr Operator
      Stats Aggregation Key Prefix: hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-03-31_286_2769775196968737402/-ext-10000/

  Stage: Stage-3
    Map Reduce
      Alias -> Map Operator Tree:
        hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-03-31_286_2769775196968737402/-ext-10002 
            File Output Operator
              compressed: false
              GlobalTableId: 0
              directory: hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-03-31_286_2769775196968737402/-ext-10000
              NumFilesPerFileSink: 1
              table:
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                  properties:
                    bucket_count -1
                    columns key,value1,value2
                    columns.types string:string:string
                    file.inputformat org.apache.hadoop.mapred.TextInputFormat
                    file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                    location hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/bucketmapjoin_tmp_result
                    name default.bucketmapjoin_tmp_result
                    numFiles 1
                    numPartitions 0
                    numRows 0
                    rawDataSize 0
                    serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
                    serialization.format 1
                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    totalSize 17966
                    transient_lastDdlTime 1366743737
                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                  name: default.bucketmapjoin_tmp_result
              TotalFiles: 1
              GatherStats: false
              MultiFileSpray: false
      Needs Tagging: false
      Path -> Alias:
        hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-03-31_286_2769775196968737402/-ext-10002 [hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-03-31_286_2769775196968737402/-ext-10002]
      Path -> Partition:
        hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-03-31_286_2769775196968737402/-ext-10002 
          Partition
            base file name: -ext-10002
            input format: org.apache.hadoop.mapred.TextInputFormat
            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
            properties:
              bucket_count -1
              columns key,value1,value2
              columns.types string:string:string
              file.inputformat org.apache.hadoop.mapred.TextInputFormat
              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
              location hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/bucketmapjoin_tmp_result
              name default.bucketmapjoin_tmp_result
              numFiles 1
              numPartitions 0
              numRows 0
              rawDataSize 0
              serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
              serialization.format 1
              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              totalSize 17966
              transient_lastDdlTime 1366743737
            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
          
              input format: org.apache.hadoop.mapred.TextInputFormat
              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
              properties:
                bucket_count -1
                columns key,value1,value2
                columns.types string:string:string
                file.inputformat org.apache.hadoop.mapred.TextInputFormat
                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                location hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/bucketmapjoin_tmp_result
                name default.bucketmapjoin_tmp_result
                numFiles 1
                numPartitions 0
                numRows 0
                rawDataSize 0
                serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
                serialization.format 1
                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                totalSize 17966
                transient_lastDdlTime 1366743737
              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              name: default.bucketmapjoin_tmp_result
            name: default.bucketmapjoin_tmp_result
      Truncated Path -> Alias:
        hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-03-31_286_2769775196968737402/-ext-10002 [hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-03-31_286_2769775196968737402/-ext-10002]

  Stage: Stage-5
    Map Reduce
      Alias -> Map Operator Tree:
        hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-03-31_286_2769775196968737402/-ext-10002 
            File Output Operator
              compressed: false
              GlobalTableId: 0
              directory: hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-03-31_286_2769775196968737402/-ext-10000
              NumFilesPerFileSink: 1
              table:
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                  properties:
                    bucket_count -1
                    columns key,value1,value2
                    columns.types string:string:string
                    file.inputformat org.apache.hadoop.mapred.TextInputFormat
                    file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                    location hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/bucketmapjoin_tmp_result
                    name default.bucketmapjoin_tmp_result
                    numFiles 1
                    numPartitions 0
                    numRows 0
                    rawDataSize 0
                    serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
                    serialization.format 1
                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    totalSize 17966
                    transient_lastDdlTime 1366743737
                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                  name: default.bucketmapjoin_tmp_result
              TotalFiles: 1
              GatherStats: false
              MultiFileSpray: false
      Needs Tagging: false
      Path -> Alias:
        hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-03-31_286_2769775196968737402/-ext-10002 [hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-03-31_286_2769775196968737402/-ext-10002]
      Path -> Partition:
        hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-03-31_286_2769775196968737402/-ext-10002 
          Partition
            base file name: -ext-10002
            input format: org.apache.hadoop.mapred.TextInputFormat
            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
            properties:
              bucket_count -1
              columns key,value1,value2
              columns.types string:string:string
              file.inputformat org.apache.hadoop.mapred.TextInputFormat
              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
              location hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/bucketmapjoin_tmp_result
              name default.bucketmapjoin_tmp_result
              numFiles 1
              numPartitions 0
              numRows 0
              rawDataSize 0
              serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
              serialization.format 1
              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              totalSize 17966
              transient_lastDdlTime 1366743737
            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
          
              input format: org.apache.hadoop.mapred.TextInputFormat
              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
              properties:
                bucket_count -1
                columns key,value1,value2
                columns.types string:string:string
                file.inputformat org.apache.hadoop.mapred.TextInputFormat
                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                location hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/user/hive/warehouse/bucketmapjoin_tmp_result
                name default.bucketmapjoin_tmp_result
                numFiles 1
                numPartitions 0
                numRows 0
                rawDataSize 0
                serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
                serialization.format 1
                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                totalSize 17966
                transient_lastDdlTime 1366743737
              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              name: default.bucketmapjoin_tmp_result
            name: default.bucketmapjoin_tmp_result
      Truncated Path -> Alias:
        hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-03-31_286_2769775196968737402/-ext-10002 [hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-03-31_286_2769775196968737402/-ext-10002]

  Stage: Stage-6
    Move Operator
      files:
          hdfs directory: true
          source: hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-03-31_286_2769775196968737402/-ext-10002
          destination: hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_12-03-31_286_2769775196968737402/-ext-10000




insert overwrite table bucketmapjoin_tmp_result 
select /*+mapjoin(a)*/ a.key, a.value, b.value 
from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b 
on a.key=b.key
Deleted /user/hive/warehouse/bucketmapjoin_tmp_result


select count(1) from bucketmapjoin_tmp_result
0

insert overwrite table bucketmapjoin_hash_result_1
select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result
Deleted /user/hive/warehouse/bucketmapjoin_hash_result_1
set hive.optimize.bucketmapjoin = false

insert overwrite table bucketmapjoin_tmp_result 
select /*+mapjoin(a)*/ a.key, a.value, b.value 
from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b 
on a.key=b.key
Deleted /user/hive/warehouse/bucketmapjoin_tmp_result


select count(1) from bucketmapjoin_tmp_result
0

insert overwrite table bucketmapjoin_hash_result_2
select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result
Deleted /user/hive/warehouse/bucketmapjoin_hash_result_2


select a.key-b.key, a.value1-b.value1, a.value2-b.value2
from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b
on a.key = b.key
NULL	NULL	NULL
