Hive Code BDM2 Assignment
Hive Code BDM2 Assignment
hive
create table ratings(user_id INT, item_id INT , rating INT , timestamp INT)
create table movies(movie_id INT , movie_title STRING, release_date STRING ,video release date
STRING, IMDb_URL STRING , unknown INT, Action INT, Adventure INT, Animation INT, Children INT ,
Comedy INT, Crime INT, Documentry INT, Drama INT, Fantasy INT, Film_Noir INT, Horror INT, Musical
INT, Mystery INT, Romance INT, Sci-Fi INT, Thriller INT , War INT, Western INT)
create table user(user_id INT, age INT, gender STRING, occupation STRING, zip code STRING)
row format delimited
Q2 b)
iii) Top 30 movies having atleast 50,20 and 100 raters with their average rating
atleast 50 users:
create table t1 as
#atleast 20 users:
create table t2 as
create table t3 as
Q2 c)
create table t4 as
create table t5 as
end) as Unknown,
( case
end) as Action,
( case
end) as Adventure,
( case
end) as Animation,
( case
end) as Children,
( case
end) as Comedy,
( case
end) as Crime,
( case
end) as Documentry,
( case
when b.Drama = 1 then "Drama"
end) as Drama,
( case
end) as Fantasy,
( case
end) as Film_Noir,
( case
end) as Horror,
( case
end) as Musical,
( case
end) as Mystery,
( case
end) as Romance,
( case
when b.Sci_Fi = 1 then "Sci-Fi"
end) as Sci_Fi,
( case
end) as Thriller,
( case
end) as War,
( case
end) as Western
create table t6 as
Q2 c) a)
#Creating a column column for genre by concatenating the all the genres.
create table t7 as
#For Males
create table t8 as
select gender,Genre,avg(rating) as avg_rating from t7 where gender ="M" group by gender,Genre order
by avg_rating desc limit 30;
#For females
create table t9 as
select gender,Genre,avg(rating) as avg_rating from t7 where gender ="F" group by gender,Genre order
by avg_rating desc limit 30;