HIVE Codes
HIVE Codes
profession string, reviews int) row format delimited fields terminated by "|" lines
terminated by "\n" stored as textfile ;
aws s3 cp s3://ml-cloud-dataset/u.user .
create external table if not exists user_info (id int, age int, gender string,
profession string, reviews int) row format delimited fields terminated by "|" lines
terminated by "\n" store as textfile ;
describe user_info ;
create table if not exists male_users (id int, gender string, profession string) ;
create table if not exists female_users (id int, gender string, profession
string) ;
from user_info insert into table male_users select id, gender, profession where
gender = 'M' insert into table female_users select id, gender, profession where
gender = 'F' ;
Dynamic Partitioning
create table if not exists buck_user_info (id int, age int, profession string,
ratings int) partitioned by (gender string) clustered by (age) into 7 buckets row
format delimited fields terminated by ' |' lines terminated by '\n' stored as
textfile ;
desc buck_user_info ;
CASE STUDY
wget https://e-commerce-events-ml.s3.amazonaws.com/2019-Oct.csv
wget https://e-commerce-events-ml.s3.amazonaws.com/2019-Nov.csv
hive
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.dynamic.partition=true;
set hive.enforce.bucketing=true;
CREATE TABLE IF NOT EXISTS ecom_no_cls(event_time timestamp, product_id string,
category_id string,
category_code string, brand string, price float, user_id bigint, user_session
string)
PARTITIONED BY (event_type string)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES
( "separatorChar" = ",", "quoteChar" = "\"", "escapeChar" = "\\")
STORED AS TEXTFILE;
Queries:
** select brand, sum(price) as Total_Sales from Sales where brand != '' group by
brand order by Total_Sales desc limit 1;
* select brand, sum(price) as Total_Sales from ecom_bucket where brand != '' group
by brand order by Total_Sales desc limit 1;
select brand, sum(price) as Total_Sales from ecom_no_cls where brand != '' group by
brand limit 1;
With sale as (select brand, sum (case when date_format(event_time,'MM') =10 then
price else 0 end) as oct_s,
sum (case when date_format(event_time,'MM')=11 then price else 0 end) as nov_s from
Sales
where event_type = 'purchase' and brand != "" group by brand) select *,
nov_s - oct_s as sales_increase from sale where nov_s > oct_s order by
sales_increase desc;
* With sale as (select brand, sum (case when date_format(event_time,'MM') =10 then
price else 0 end) as oct_s,
sum (case when date_format(event_time,'MM')=11 then price else 0 end) as nov_s from
ecom_bucket
where event_type = 'purchase' and brand != "" group by brand) select *,
nov_s - oct_s as sales_increase from sale where nov_s > oct_s order by
sales_increase desc;
** With sale as (select brand, sum (case when date_format(event_time,'MM') =10 then
price else 0 end) as oct_s,
sum (case when date_format(event_time,'MM')=11 then price else 0 end) as nov_s from
ecom_no_cls
where event_type = 'purchase' and brand != "" group by brand) select *,
nov_s - oct_s as sales_increase from sale where nov_s > oct_s order by
sales_increase desc;
Your company wants to reward the top 10 users of its website with a
Golden Customer plan. Write a query to generate a list of top 10 users who spend
the most.