-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements.  See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to You under the Apache License, Version 2.0
-- (the "License") you may not use this file except in compliance with
-- the License.  You may obtain a copy of the License at
--
--     http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
load data local inpath 'seed_data_files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
load data local inpath 'seed_data_files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;

CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
load data local inpath 'seed_data_files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
load data local inpath 'seed_data_files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
load data local inpath 'seed_data_files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
load data local inpath 'seed_data_files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
load data local inpath 'seed_data_files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09');
load data local inpath 'seed_data_files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09');
load data local inpath 'seed_data_files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09');
load data local inpath 'seed_data_files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09');

CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
load data local inpath 'seed_data_files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
load data local inpath 'seed_data_files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
load data local inpath 'seed_data_files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09');
load data local inpath 'seed_data_files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09');

create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint);
create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint);

set hive.optimize.bucketmapjoin = true;
create table bucketmapjoin_tmp_result (key string , value1 string, value2 string);

explain extended
insert overwrite table bucketmapjoin_tmp_result 
select /*+mapjoin(a)*/ a.key, a.value, b.value 
from srcbucket_mapjoin a join srcbucket_mapjoin_part b 
on a.key=b.key;

insert overwrite table bucketmapjoin_tmp_result 
select /*+mapjoin(a)*/ a.key, a.value, b.value 
from srcbucket_mapjoin a join srcbucket_mapjoin_part b 
on a.key=b.key;

select count(1) from bucketmapjoin_tmp_result;
insert overwrite table bucketmapjoin_hash_result_1
select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result;

set hive.optimize.bucketmapjoin = false;
insert overwrite table bucketmapjoin_tmp_result 
select /*+mapjoin(a)*/ a.key, a.value, b.value 
from srcbucket_mapjoin a join srcbucket_mapjoin_part b 
on a.key=b.key;

select count(1) from bucketmapjoin_tmp_result;
insert overwrite table bucketmapjoin_hash_result_2
select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result;

select a.key-b.key, a.value1-b.value1, a.value2-b.value2
from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b
on a.key = b.key;


set hive.optimize.bucketmapjoin = true;
explain extended
insert overwrite table bucketmapjoin_tmp_result 
select /*+mapjoin(a)*/ a.key, a.value, b.value 
from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b 
on a.key=b.key;

insert overwrite table bucketmapjoin_tmp_result 
select /*+mapjoin(a)*/ a.key, a.value, b.value 
from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b 
on a.key=b.key;

select count(1) from bucketmapjoin_tmp_result;
insert overwrite table bucketmapjoin_hash_result_1
select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result;

set hive.optimize.bucketmapjoin = false;
insert overwrite table bucketmapjoin_tmp_result 
select /*+mapjoin(a)*/ a.key, a.value, b.value 
from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b 
on a.key=b.key;

select count(1) from bucketmapjoin_tmp_result;
insert overwrite table bucketmapjoin_hash_result_2
select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result;

select a.key-b.key, a.value1-b.value1, a.value2-b.value2
from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b
on a.key = b.key;
