set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat
set mapred.min.split.size = 64


CREATE TABLE T1(name STRING) STORED AS TEXTFILE


LOAD DATA LOCAL INPATH 'seed_data_files/kv1.txt' INTO TABLE T1
Copying file: file:/root/bigtop/bigtop-tests/test-execution/smokes/hive/target/seed_data_files/kv1.txt


CREATE TABLE T2(name STRING) STORED AS SEQUENCEFILE


EXPLAIN INSERT OVERWRITE TABLE T2 SELECT * FROM (
SELECT tmp1.name as name FROM (
  SELECT name, 'MMM' AS n FROM T1) tmp1 
  JOIN (SELECT 'MMM' AS n FROM T1) tmp2
  JOIN (SELECT 'MMM' AS n FROM T1) tmp3
  ON tmp1.n = tmp2.n AND tmp1.n = tmp3.n) ttt LIMIT 5000000
ABSTRACT SYNTAX TREE:
  (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL name)) (TOK_SELEXPR 'MMM' n)))) tmp1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'MMM' n)))) tmp2)) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'MMM' n)))) tmp3) (AND (= (. (TOK_TABLE_OR_COL tmp1) n) (. (TOK_TABLE_OR_COL tmp2) n)) (= (. (TOK_TABLE_OR_COL tmp1) n) (. (TOK_TABLE_OR_COL tmp3) n))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp1) name) name)))) ttt)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME T2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 5000000)))

STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-2 depends on stages: Stage-1
  Stage-3 depends on stages: Stage-2
  Stage-0 depends on stages: Stage-3
  Stage-4 depends on stages: Stage-0

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Alias -> Map Operator Tree:
        ttt:tmp1:t1 
          TableScan
            alias: t1
            Select Operator
              expressions:
                    expr: name
                    type: string
                    expr: 'MMM'
                    type: string
              outputColumnNames: _col0, _col1
              Reduce Output Operator
                sort order: 
                tag: 0
                value expressions:
                      expr: _col0
                      type: string
                      expr: _col1
                      type: string
        ttt:tmp2:t1 
          TableScan
            alias: t1
            Select Operator
              expressions:
                    expr: 'MMM'
                    type: string
              outputColumnNames: _col0
              Reduce Output Operator
                sort order: 
                tag: 1
                value expressions:
                      expr: _col0
                      type: string
      Reduce Operator Tree:
        Join Operator
          condition map:
               Inner Join 0 to 1
          condition expressions:
            0 {VALUE._col0} {VALUE._col1}
            1 {VALUE._col0}
          handleSkewJoin: false
          outputColumnNames: _col0, _col1, _col2
          Filter Operator
            predicate:
                expr: (_col1 = _col2)
                type: boolean
            File Output Operator
              compressed: false
              GlobalTableId: 0
              table:
                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

  Stage: Stage-2
    Map Reduce
      Alias -> Map Operator Tree:
        $INTNAME 
            Reduce Output Operator
              key expressions:
                    expr: _col1
                    type: string
              sort order: +
              Map-reduce partition columns:
                    expr: _col1
                    type: string
              tag: 0
              value expressions:
                    expr: _col0
                    type: string
        ttt:tmp3:t1 
          TableScan
            alias: t1
            Select Operator
              expressions:
                    expr: 'MMM'
                    type: string
              outputColumnNames: _col0
              Reduce Output Operator
                key expressions:
                      expr: _col0
                      type: string
                sort order: +
                Map-reduce partition columns:
                      expr: _col0
                      type: string
                tag: 1
      Reduce Operator Tree:
        Join Operator
          condition map:
               Inner Join 0 to 1
          condition expressions:
            0 {VALUE._col1}
            1 
          handleSkewJoin: false
          outputColumnNames: _col1
          Select Operator
            expressions:
                  expr: _col1
                  type: string
            outputColumnNames: _col0
            Select Operator
              expressions:
                    expr: _col0
                    type: string
              outputColumnNames: _col0
              Limit
                File Output Operator
                  compressed: false
                  GlobalTableId: 0
                  table:
                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

  Stage: Stage-3
    Map Reduce
      Alias -> Map Operator Tree:
        hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_11-41-40_405_3858259332039900449/-mr-10003 
            Reduce Output Operator
              sort order: 
              tag: -1
              value expressions:
                    expr: _col0
                    type: string
      Reduce Operator Tree:
        Extract
          Limit
            File Output Operator
              compressed: false
              GlobalTableId: 1
              table:
                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                  name: default.t2

  Stage: Stage-0
    Move Operator
      tables:
          replace: true
          table:
              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              name: default.t2

  Stage: Stage-4
    Stats-Aggr Operator





INSERT OVERWRITE TABLE T2 SELECT * FROM (
SELECT tmp1.name as name FROM (
  SELECT name, 'MMM' AS n FROM T1) tmp1 
  JOIN (SELECT 'MMM' AS n FROM T1) tmp2
  JOIN (SELECT 'MMM' AS n FROM T1) tmp3
  ON tmp1.n = tmp2.n AND tmp1.n = tmp3.n) ttt LIMIT 5000000
Deleted /user/hive/warehouse/t2


EXPLAIN SELECT COUNT(1) FROM T2
ABSTRACT SYNTAX TREE:
  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION COUNT 1)))))

STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 is a root stage

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Alias -> Map Operator Tree:
        t2 
          TableScan
            alias: t2
            Select Operator
              Group By Operator
                aggregations:
                      expr: count(1)
                bucketGroup: false
                mode: hash
                outputColumnNames: _col0
                Reduce Output Operator
                  sort order: 
                  tag: -1
                  value expressions:
                        expr: _col0
                        type: bigint
      Reduce Operator Tree:
        Group By Operator
          aggregations:
                expr: count(VALUE._col0)
          bucketGroup: false
          mode: mergepartial
          outputColumnNames: _col0
          Select Operator
            expressions:
                  expr: _col0
                  type: bigint
            outputColumnNames: _col0
            File Output Operator
              compressed: false
              GlobalTableId: 0
              table:
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

  Stage: Stage-0
    Fetch Operator
      limit: -1



SELECT COUNT(1) FROM T2
5000000


CREATE TABLE T3(name STRING) STORED AS TEXTFILE

LOAD DATA LOCAL INPATH 'seed_data_files/kv1.txt' INTO TABLE T3
Copying file: file:/root/bigtop/bigtop-tests/test-execution/smokes/hive/target/seed_data_files/kv1.txt

LOAD DATA LOCAL INPATH 'seed_data_files/kv2.txt' INTO TABLE T3
Copying file: file:/root/bigtop/bigtop-tests/test-execution/smokes/hive/target/seed_data_files/kv2.txt


EXPLAIN SELECT COUNT(1) FROM T3
ABSTRACT SYNTAX TREE:
  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T3))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION COUNT 1)))))

STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 is a root stage

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Alias -> Map Operator Tree:
        t3 
          TableScan
            alias: t3
            Select Operator
              Group By Operator
                aggregations:
                      expr: count(1)
                bucketGroup: false
                mode: hash
                outputColumnNames: _col0
                Reduce Output Operator
                  sort order: 
                  tag: -1
                  value expressions:
                        expr: _col0
                        type: bigint
      Reduce Operator Tree:
        Group By Operator
          aggregations:
                expr: count(VALUE._col0)
          bucketGroup: false
          mode: mergepartial
          outputColumnNames: _col0
          Select Operator
            expressions:
                  expr: _col0
                  type: bigint
            outputColumnNames: _col0
            File Output Operator
              compressed: false
              GlobalTableId: 0
              table:
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

  Stage: Stage-0
    Fetch Operator
      limit: -1



SELECT COUNT(1) FROM T3
1000
