0% found this document useful (0 votes)

29 views8 pages

Hive Practice - New

Uploaded by

chandhu194

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

29 views8 pages

Hive Practice - New

Uploaded by

chandhu194

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

You are on page 1/ 8

To run hive in debug mode:

--------------------------
hive -hiveconf hive.root.logger=DEBUG,console

A. Create Database
------------------
create database retail_hive;

B. Select Database
------------------
use retail_hive;

C. Create table for storing transactional records

-------------------------------------------------
create table retail_hive.sales_data_tbl (TransID INT, TransDate STRING, Product
STRING, Price DOUBLE, PaymentType STRING, CustName STRING, City STRING, State
STRING, Country STRING) row format delimited FIELDS TERMINATED BY ',' LINES
TERMINATED BY '\n' LOCATION '/apps/hive/warehouse/retail_hive.db/sales_data_tbl';

DBS:
Database Name : retail_hive

TBLS:
Name of the table : sales_data_tbl
Table Type : default: MANAGED_TABLE [Optional]

COLUMNS_V2:
Column Names : TransID, TransDate, etc..
Column Data Type : INT, STRING, etc..

SDS:
Column Delimiter : ,
Row Delimiter : \n
Table Data Location :
/apps/hive/warehouse/retail_hive.db/sales_data_tbl [Optional]

create EXTERNAL table sales_data_tbl1 (TransID INT, TransDate STRING, Product

STRING, Price DOUBLE, PaymentType STRING, CustName STRING, City STRING, State
STRING, Country STRING) row format delimited FIELDS TERMINATED BY ',' LINES
TERMINATED BY '\n' LOCATION '/project1/data/pig/pigoutput/sales_data';

/project1/data/pig/pigoutput/sales_data/

hdfs dfs -cp /project1/data/pig/pigoutput/sales_data/part* /project1/data/pig/;

LOAD DATA INPATH '/project1/data/pig/pigoutput/sales_data/part*' INTO TABLE

sales_data_tbl;

D. Describing metadata or schema of the table

---------------------------------------------
describe sales_data_tbl;

alter table sales_data_tbl SET TBLPROPERTIES('EXTERNAL'='TRUE');

E. Load the data into the table

-------------------------------
F. Counting no of records
-------------------------
select count(*) from sales_data_tbl;

G. Counting total spending by product

-------------------------------------
select year(transdate),sum(Price) from sales_data_tbl group by year(transdate);
select month(transdate),sum(Price) from sales_data_tbl group by month(transdate);
select year(transdate),month(transdate),sum(Price) from sales_data_tbl group by
year(transdate),month(transdate);
select country, sum(price) from sales_data_tbl group by country;
select product, sum(price) from sales_data_tbl group by product;

H. Summary Tables
-----------------

sales_summary_by_city:
----------------------
drop table sales_summary_by_city;

create table sales_summary_by_city (TransDate STRING, Amount DOUBLE, City STRING,

State STRING, Country STRING) row format delimited FIELDS TERMINATED BY ',' LINES
TERMINATED BY '\n';

insert into sales_summary_by_city select to_date(transdate), sum(price), city,

state, country from sales_data_tbl group by to_date(transdate), city, state,
country;

select * from sales_summary_by_city;

sales_summary_by_state:
----------------------
drop table sales_summary_by_state;

create table sales_summary_by_state (TransDate STRING, Amount DOUBLE, State STRING,

Country STRING) row format delimited FIELDS TERMINATED BY ',' LINES TERMINATED BY
'\n';

insert into sales_summary_by_state select to_date(transdate), sum(price), state,

country from sales_data_tbl group by to_date(transdate), state, country;

select * from sales_summary_by_state;

sales_summary_by_country:
-------------------------
drop table sales_summary_by_country;

create table sales_summary_by_country (TransDate STRING, Amount DOUBLE, Country

STRING) row format delimited FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n';

insert into sales_summary_by_country select to_date(transdate), sum(price), country

from sales_data_tbl group by to_date(transdate), country;

select * from sales_summary_by_country;

sales_summary_by_month_by_city:
-------------------------------
drop table sales_summary_by_month_by_city;

create table sales_summary_by_month_by_city (TransMonth STRING, Amount DOUBLE, City

STRING, State STRING, Country STRING) row format delimited FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n';

insert into sales_summary_by_month_by_city select

date_format(to_date(transdate),'MM-yyyy'), sum(price), city, state, country from
sales_data_tbl group by date_format(to_date(transdate),'MM-yyyy'), city, state,
country;

select * from sales_summary_by_month_by_city;

sales_summary_by_month_by_state:
--------------------------------
drop table sales_summary_by_month_by_state;

create table sales_summary_by_month_by_state (TransMonth STRING, Amount DOUBLE,

State STRING, Country STRING) row format delimited FIELDS TERMINATED BY ',' LINES
TERMINATED BY '\n';

insert into sales_summary_by_month_by_state select

date_format(to_date(transdate),'MM-yyyy'), sum(price), state, country from
sales_data_tbl group by date_format(to_date(transdate),'MM-yyyy'), state, country;

select * from sales_summary_by_month_by_state;

sales_summary_by_month_by_country:
----------------------------------
drop table sales_summary_by_month_by_country;

create table sales_summary_by_month_by_country (TransMonth STRING, Amount DOUBLE,

Country STRING) row format delimited FIELDS TERMINATED BY ',' LINES TERMINATED BY
'\n';

insert into sales_summary_by_month_by_country select

date_format(to_date(transdate),'MM-yyyy'), sum(price), country from sales_data_tbl
group by date_format(to_date(transdate),'MM-yyyy'), country;

select * from sales_summary_by_month_by_country;

sales_summary_by_month
----------------------
drop table sales_summary_by_month;

create table sales_summary_by_month (TransYear STRING,TransMonth STRING, Amount

DOUBLE) row format delimited FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n';

insert into sales_summary_by_month select

year(transdate),month(transdate),sum(Price) from sales_data_tbl group by
year(transdate),month(transdate);

select * from sales_summary_by_month;

I. Create partitioned table
---------------------------

create external table sales_by_country (TransID INT, TransDate STRING, Product

STRING, Price DOUBLE, PaymentType STRING, CustName STRING, State STRING, City
STRING) partitioned by (Country STRING) row format delimited fields terminated by
',' stored as textfile;

describe formatted sales_by_country;

set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.dynamic.partition=true;
set hive.enforce.bucketing=true;
set hive.vectorized.execution.enabled = true;
set hive.vectorized.execution.reduce.enabled = true;

INSERT OVERWRITE TABLE sales_by_country PARTITION(country) select txn.TransID,

txn.TransDate,txn.Product,txn.Price,txn.PaymentType,txn.CustName,txn.State,txn.city
,txn.Country from sales_data_tbl txn ;

create external table sales_summary (TransDate STRING, Amount DOUBLE, State STRING,
City STRING) row format delimited FIELDS TERMINATED BY ',' LINES TERMINATED BY '\
n';

select to_date(transdate), sum(price), state, country from sales_data_tbl group by

to_date(transdate),state,country;

create table txnrecords(txnno INT, txndate STRING, custno INT, amount DOUBLE,
product STRING,category STRING, city STRING, state STRING, spendby STRING)

create table txnrecsByCity(txnno INT, txndate STRING, custno INT, amount DOUBLE,
product STRING, city STRING, state STRING, spendby STRING) partitioned by (category
STRING) clustered by (state) INTO 10 buckets row format delimited fields terminated
by ',' stored as textfile;

describe formatted txnrecsByCat;

Beeline:
========
beeline

!connect jdbc:hive2://hn2.hadoop.com:10000/default
User Name: hdpuser
Password: welcome1

set -v;

J. Configure Hive to allow partitions

-------------------------------------

However, a query across all partitions could trigger an enormous MapReduce job if
the table data and number of partitions are large. A highly suggested safety
measure is putting Hive into strict mode, which prohibits queries of partitioned
tables without a WHERE clause that filters on partitions. You can set the mode to
nonstrict, as in the following session:

set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.dynamic.partition=true;
set hive.enforce.bucketing=true;

K. Load data into partition table

----------------------------------
set hive.vectorized.execution.enabled = true;
set hive.vectorized.execution.reduce.enabled = true;

from txnrecords txn INSERT OVERWRITE TABLE txnrecsByCat PARTITION(category) select

txn.txnno, txn.txndate,txn.custno, txn.amount,txn.product,txn.city,txn.state,
txn.spendby, txn.category DISTRIBUTE BY category;

-----------------=======
find sales based on age group
-----------------=======

create table customer(custno string, firstname string, lastname string, age

int,profession string) row format delimited fields terminated by ',';

load data local inpath '/home/hdpuser/custs.txt' into table customer;

create table out1 (custno int,firstname string,age int,profession string,amount

double,product string) row format delimited fields terminated by ',';

insert overwrite table out1 select

a.custno,a.firstname,a.age,a.profession,b.amount,b.product from customer a JOIN
txnrecords b ON a.custno = b.custno;

select * from out1 limit 100;

create table out2 (custno int,firstname string,age int,profession string,amount

double,product string, level string) row format delimited fields terminated by ',';

insert overwrite table out2

select * , case
when age<30 then 'young'
when age>=30 and age < 50 then 'middle'
when age>=50 then 'old'
else 'others'
end
from out1;

select * from out2 limit 100;

describe out2;

create table out3 (level string, amount double) row format delimited fields
terminated by ',';

insert overwrite table out3 select level,sum(amount) from out2 group by level;

select * from out3 limit 100;

==============
simple join
==============

create table employee(name string, salary float,city string) row format delimited
fields terminated by ',';

load data local inpath '/home/hdpuser/emp.txt' into table employee;

select * from employee where name='tarun';

create table mailid (name string, email string) row format delimited fields
terminated by ',';

load data local inpath '/home/hdpuser/email.txt' into table mailid;

select a.name,a.city,a.salary,b.email from employee a join mailid b on a.name =

b.name;

select a.name,a.city,a.salary,b.email from employee a left outer join mailid b on

a.name = b.name;

select a.name,a.city,a.salary,b.email from employee a right outer join mailid b on

a.name = b.name;

select a.name,a.city,a.salary,b.email from employee a full outer join mailid b on

a.name = b.name;

----------------------------------===
Custom Mapper Code to manipulate unix timestamp
----------------------------------===

CREATE TABLE u_data ( userid INT, movieid INT, rating INT, unixtime STRING) ROW
FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE;

And load it into the table that was just created:

LOAD DATA LOCAL INPATH '/home/hdpuser/u.data.txt' OVERWRITE INTO TABLE u_data;

Count the number of rows in table u_data:

SELECT COUNT(*) FROM u_data;

****Create�weekday_mapper.py:

import sys
import datetime
for line in sys.stdin:
line = line.strip()
userid, movieid, rating, unixtime = line.split('\t')
weekday = datetime.datetime.fromtimestamp(float(unixtime)).isoweekday()
print '\t'.join([userid, movieid, rating, str(weekday)])

CREATE TABLE u_data_new (userid INT, movieid INT, rating INT, weekday INT) ROW
FORMAT DELIMITED FIELDS TERMINATED BY '\t';

add FILE /home/hdpuser/weekday_mapper.py;

****Note that columns will be transformed to string and delimited

****by TAB before feeding to the user script, and the standard output
****of the user script will be treated as TAB-separated string columns.

****The following command uses the TRANSFORM clause to embed the mapper scripts.

INSERT OVERWRITE TABLE u_data_new SELECT TRANSFORM (userid, movieid, rating,

unixtime) USING 'python weekday_mapper.py' AS (userid, movieid, rating, weekday)
FROM u_data;

SELECT weekday, COUNT(*) FROM u_data_new GROUP BY weekday;

===========
UDF
===========

import java.util.Date;
import java.text.DateFormat;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
public class UnixtimeToDate extends UDF{
public Text evaluate(Text text){
if(text==null) return null;
long timestamp = Long.parseLong(text.toString());
return new Text(toDate(timestamp));
}
private String toDate(long timestamp) {
Date date = new Date (timestamp*1000);
return DateFormat.getInstance().format(date).toString();
}
}

/usr/bin/javac -classpath /usr/local/hadoop-2.6.4/share/hadoop/common/hadoop-

common-2.6.4.jar:/etc/hadoop/apache-hive-0.13.0-bin/apache-hive-0.13.0-bin/lib/
hive-exec-0.13.0.jar UnixtimeToDate.java

****Pack this class file into a jar:�

$/usr/bin/jar -cvf convert.jar UnixtimeToDate.class

****Verify jar using command :�

$/usr/bin/jar -tvf convert.jar

****add this jar in hive prompt

ADD JAR /home/hdpuser/convert.jar;

****Then you create your custom function as follows:

create temporary function�userdate�as 'UnixtimeToDate';

****one,1386023259550
****two,1389523259550
****three,1389523259550
****four,1389523259550

create table testing(id string,id_time string) row format delimited fields

terminated by ',';

load data inpath '/data/counter' into table testing;

hive> select * from testing;

****OK
****one 1386023259550
****two 1389523259550
****three 1389523259550
****four 1389523259550

****Then use function 'userdate' in sql command

select id,userdate(id_time) from testing;

****OK
****four 3/28/02 8:12 PM
****one 4/30/91 1:59 PM
****two 3/28/02 8:12 PM
****three 3/28/02 8:12 PM

Hive View:
----------
If you are getting S020 Data storage error, please check ambari-server log and find
out which table is missing and create table as below.

mysql -u root -pwelcome1

CREATE TABLE ambari.DS_JOBIMPL_15 (DS_id VARCHAR(255) NOT NULL, DS_applicationId

TEXT, DS_confFile TEXT, DS_dagId TEXT, DS_dagName TEXT, DS_dataBase TEXT,
DS_dateSubmitted BIGINT, DS_duration BIGINT, DS_forcedContent TEXT,
DS_globalSettings TEXT, DS_guid TEXT, DS_hiveQueryId TEXT, DS_logFile TEXT,
DS_owner TEXT, DS_queryFile TEXT, DS_queryId TEXT, DS_referrer TEXT, DS_sessionTag
TEXT, DS_sqlState TEXT, DS_status TEXT, DS_statusDir TEXT, DS_statusMessage TEXT,
DS_title TEXT, PRIMARY KEY (ds_id));

Bankingdatabase
No ratings yet
Bankingdatabase
13 pages
Bcomdbmsrecord
No ratings yet
Bcomdbmsrecord
24 pages
ADBMS
No ratings yet
ADBMS
111 pages
Assignment 2 Marking
No ratings yet
Assignment 2 Marking
4 pages
DQL Demo Oracle19c 1738294641858
No ratings yet
DQL Demo Oracle19c 1738294641858
31 pages
DBMS Main File Dheeraj
100% (1)
DBMS Main File Dheeraj
32 pages
SQL Interview Questions For A Data Engineer
No ratings yet
SQL Interview Questions For A Data Engineer
11 pages
Lan Munual Database First Lab
No ratings yet
Lan Munual Database First Lab
14 pages
Data Base Tables
No ratings yet
Data Base Tables
7 pages
SQL Day1
No ratings yet
SQL Day1
5 pages
Tables - Diagram in Dbdiagram
No ratings yet
Tables - Diagram in Dbdiagram
3 pages
12-02-2024
No ratings yet
12-02-2024
4 pages
RAMAN SINGH - SQL Assignment III
No ratings yet
RAMAN SINGH - SQL Assignment III
4 pages
Hive-Hands On - Bucketing Table
No ratings yet
Hive-Hands On - Bucketing Table
1 page
Martingale Trading Strategy - Afl
100% (1)
Martingale Trading Strategy - Afl
9 pages
GTBNMBHJ
No ratings yet
GTBNMBHJ
2 pages
"You Can Do It" Datawarehouse: Beginner To Advanced in Two Hours
No ratings yet
"You Can Do It" Datawarehouse: Beginner To Advanced in Two Hours
59 pages
Oracle 1st Lab
No ratings yet
Oracle 1st Lab
8 pages
Query 1
No ratings yet
Query 1
12 pages
SQL Practice Table
No ratings yet
SQL Practice Table
1 page
4.4.tuning SQL Execution-Plan
No ratings yet
4.4.tuning SQL Execution-Plan
56 pages
Bucketing Hadoop Class Notes
No ratings yet
Bucketing Hadoop Class Notes
1 page
Question 3
No ratings yet
Question 3
59 pages
Hive-Static Partition
No ratings yet
Hive-Static Partition
1 page
Dbms Experiment 2,3
No ratings yet
Dbms Experiment 2,3
4 pages
SQL Commands Create Table
No ratings yet
SQL Commands Create Table
3 pages
Practical 2 DBMS
No ratings yet
Practical 2 DBMS
12 pages
Hive Commands
No ratings yet
Hive Commands
7 pages
Q1 Dbi202
No ratings yet
Q1 Dbi202
7 pages
DW Exno 6
No ratings yet
DW Exno 6
15 pages
Ecommerce
No ratings yet
Ecommerce
3 pages
DBMSBCOM
No ratings yet
DBMSBCOM
24 pages
Testing
No ratings yet
Testing
17 pages
SQL 2
No ratings yet
SQL 2
7 pages
Bank MGMT DDL, DML Datas
No ratings yet
Bank MGMT DDL, DML Datas
7 pages
DBML 2
No ratings yet
DBML 2
9 pages
HIVE Codes
No ratings yet
HIVE Codes
6 pages
Database Lab Program
No ratings yet
Database Lab Program
6 pages
1) Create Table Client - Master45
No ratings yet
1) Create Table Client - Master45
5 pages
BANK
No ratings yet
BANK
26 pages
DEMAND
No ratings yet
DEMAND
9 pages
DBMS
No ratings yet
DBMS
11 pages
SQL Final
No ratings yet
SQL Final
76 pages
Zfap410dk Service Manual PDF
100% (3)
Zfap410dk Service Manual PDF
85 pages
Hive Exercise
No ratings yet
Hive Exercise
7 pages
SQL 1
No ratings yet
SQL 1
12 pages
WIT-Color Ultra 9000 High Definition Printer Operations Manual
100% (1)
WIT-Color Ultra 9000 High Definition Printer Operations Manual
95 pages
DBMS Mini Notes
No ratings yet
DBMS Mini Notes
2 pages
Hive Commands Simplin
No ratings yet
Hive Commands Simplin
5 pages
Week2 Assessment
No ratings yet
Week2 Assessment
3 pages
Zeigarnik Effect 1
100% (4)
Zeigarnik Effect 1
8 pages
Practical 2 Analytical Queries
No ratings yet
Practical 2 Analytical Queries
5 pages
TDSQL
No ratings yet
TDSQL
5 pages
Cse2074 DBMS Lab Manual
No ratings yet
Cse2074 DBMS Lab Manual
19 pages
Lab6 To Lab9
No ratings yet
Lab6 To Lab9
19 pages
Segmentspace Management
No ratings yet
Segmentspace Management
15 pages
Bank SQL Examples and Dashboard
No ratings yet
Bank SQL Examples and Dashboard
4 pages
Bank Database: Table Queries
No ratings yet
Bank Database: Table Queries
22 pages
L04 1a
No ratings yet
L04 1a
11 pages
DB 1 10
No ratings yet
DB 1 10
3 pages
SQL
No ratings yet
SQL
12 pages
Create Database Bank': Banking Example (Primary Key Is Underlined)
No ratings yet
Create Database Bank': Banking Example (Primary Key Is Underlined)
6 pages
Manual: High Pressure Cleaner MC 300/21
No ratings yet
Manual: High Pressure Cleaner MC 300/21
46 pages
04 Samss 035
No ratings yet
04 Samss 035
16 pages
Situational Leadership Theory Proposes That Effective Leadership Requires A Rational Understanding of The Situation and An Appropriate Response
No ratings yet
Situational Leadership Theory Proposes That Effective Leadership Requires A Rational Understanding of The Situation and An Appropriate Response
6 pages
Instruction Manual: Digital Genset Controller DGC-500
No ratings yet
Instruction Manual: Digital Genset Controller DGC-500
151 pages
TEDtalk Transcript - How To Spot A Liar
No ratings yet
TEDtalk Transcript - How To Spot A Liar
9 pages
اسس الاتصالات مرحلة ثانية د حمود
No ratings yet
اسس الاتصالات مرحلة ثانية د حمود
131 pages
DE09 Sol
No ratings yet
DE09 Sol
157 pages
Echoes of The Tambaran Masculinity History and The Subject in The Work of Donald F Tuzin David Lipset Instant Download
No ratings yet
Echoes of The Tambaran Masculinity History and The Subject in The Work of Donald F Tuzin David Lipset Instant Download
85 pages
Guideline Answers To The Concept Check Questions Chapter 8: Capital Budgeting
No ratings yet
Guideline Answers To The Concept Check Questions Chapter 8: Capital Budgeting
8 pages
Performance Management System in Nigeria: An Evaluation of New Aper in Federal Civil Service of Nigeria Pillah, Tyodzer Patrick, PHD
No ratings yet
Performance Management System in Nigeria: An Evaluation of New Aper in Federal Civil Service of Nigeria Pillah, Tyodzer Patrick, PHD
9 pages
Arden University Guide To Harvard Citation
No ratings yet
Arden University Guide To Harvard Citation
15 pages
Week4 EnhancedSystemDecomposition Part2
No ratings yet
Week4 EnhancedSystemDecomposition Part2
22 pages
Cloud and Emerging Technologies
No ratings yet
Cloud and Emerging Technologies
5 pages
Chapter 4
No ratings yet
Chapter 4
49 pages
Activity On The Waves
No ratings yet
Activity On The Waves
1 page
Safety Data Sheet: 1 Identification of The Substance/Mixture and of The Company/Undertaking
No ratings yet
Safety Data Sheet: 1 Identification of The Substance/Mixture and of The Company/Undertaking
6 pages
Moss Concrete
No ratings yet
Moss Concrete
6 pages
CSTP 2 Completed UPDATED
No ratings yet
CSTP 2 Completed UPDATED
11 pages
Chinese Pidgin English - Bibliography PDF
No ratings yet
Chinese Pidgin English - Bibliography PDF
7 pages
1.0 Executive Summary: Abdm3313 Entrepreneurship
No ratings yet
1.0 Executive Summary: Abdm3313 Entrepreneurship
17 pages
Grade 8 Revision
No ratings yet
Grade 8 Revision
11 pages
BACTERIAL QUALITY AND DEPURATION OF THE GREEN MUSSEL Perna Viridis From Natural Beds
No ratings yet
BACTERIAL QUALITY AND DEPURATION OF THE GREEN MUSSEL Perna Viridis From Natural Beds
6 pages
Zoning Map
No ratings yet
Zoning Map
1 page
Final Mid Term Risk Factore Including Nag
No ratings yet
Final Mid Term Risk Factore Including Nag
11 pages
Dipak Jha Booking - Com - Confirmation
No ratings yet
Dipak Jha Booking - Com - Confirmation
2 pages
MySQL Crash Course: A Hands-on Introduction to Database Development
From Everand
MySQL Crash Course: A Hands-on Introduction to Database Development
Rick Silva
No ratings yet
ISA Certified Automation Professional (CAP) Associate Study Notes: 500 Study Notes for Accelerated Certification Success
From Everand
ISA Certified Automation Professional (CAP) Associate Study Notes: 500 Study Notes for Accelerated Certification Success
Steve Brown
No ratings yet

Hive Practice - New

Uploaded by

Hive Practice - New

Uploaded by

To run hive in debug mode:

C. Create table for storing transactional records

create EXTERNAL table sales_data_tbl1 (TransID INT, TransDate STRING, Product

hdfs dfs -cp /project1/data/pig/pigoutput/sales_data/part* /project1/data/pig/;

LOAD DATA INPATH '/project1/data/pig/pigoutput/sales_data/part*' INTO TABLE

D. Describing metadata or schema of the table

alter table sales_data_tbl SET TBLPROPERTIES('EXTERNAL'='TRUE');

E. Load the data into the table

G. Counting total spending by product

create table sales_summary_by_city (TransDate STRING, Amount DOUBLE, City STRING,

insert into sales_summary_by_city select to_date(transdate), sum(price), city,

select * from sales_summary_by_city;

create table sales_summary_by_state (TransDate STRING, Amount DOUBLE, State STRING,

insert into sales_summary_by_state select to_date(transdate), sum(price), state,

select * from sales_summary_by_state;

create table sales_summary_by_country (TransDate STRING, Amount DOUBLE, Country

insert into sales_summary_by_country select to_date(transdate), sum(price), country

select * from sales_summary_by_country;

create table sales_summary_by_month_by_city (TransMonth STRING, Amount DOUBLE, City

insert into sales_summary_by_month_by_city select

select * from sales_summary_by_month_by_city;

create table sales_summary_by_month_by_state (TransMonth STRING, Amount DOUBLE,

insert into sales_summary_by_month_by_state select

select * from sales_summary_by_month_by_state;

create table sales_summary_by_month_by_country (TransMonth STRING, Amount DOUBLE,

insert into sales_summary_by_month_by_country select

select * from sales_summary_by_month_by_country;

create table sales_summary_by_month (TransYear STRING,TransMonth STRING, Amount

insert into sales_summary_by_month select

select * from sales_summary_by_month;

create external table sales_by_country (TransID INT, TransDate STRING, Product

describe formatted sales_by_country;

INSERT OVERWRITE TABLE sales_by_country PARTITION(country) select txn.TransID,

select to_date(transdate), sum(price), state, country from sales_data_tbl group by

describe formatted txnrecsByCat;

J. Configure Hive to allow partitions

K. Load data into partition table

from txnrecords txn INSERT OVERWRITE TABLE txnrecsByCat PARTITION(category) select

create table customer(custno string, firstname string, lastname string, age

load data local inpath '/home/hdpuser/custs.txt' into table customer;

create table out1 (custno int,firstname string,age int,profession string,amount

insert overwrite table out1 select

select * from out1 limit 100;

create table out2 (custno int,firstname string,age int,profession string,amount

insert overwrite table out2

select * from out2 limit 100;

select * from out3 limit 100;

load data local inpath '/home/hdpuser/emp.txt' into table employee;

select * from employee where name='tarun';

load data local inpath '/home/hdpuser/email.txt' into table mailid;

select a.name,a.city,a.salary,b.email from employee a join mailid b on a.name =

select a.name,a.city,a.salary,b.email from employee a left outer join mailid b on

select a.name,a.city,a.salary,b.email from employee a right outer join mailid b on

select a.name,a.city,a.salary,b.email from employee a full outer join mailid b on

And load it into the table that was just created:

LOAD DATA LOCAL INPATH '/home/hdpuser/u.data.txt' OVERWRITE INTO TABLE u_data;

Count the number of rows in table u_data:

SELECT COUNT(*) FROM u_data;

add FILE /home/hdpuser/weekday_mapper.py;

****Note that columns will be transformed to string and delimited

INSERT OVERWRITE TABLE u_data_new SELECT TRANSFORM (userid, movieid, rating,

SELECT weekday, COUNT(*) FROM u_data_new GROUP BY weekday;

/usr/bin/javac -classpath /usr/local/hadoop-2.6.4/share/hadoop/common/hadoop-

****Pack this class file into a jar:�

****Verify jar using command :�

****add this jar in hive prompt

****Then you create your custom function as follows:

create table testing(id string,id_time string) row format delimited fields

load data inpath '/data/counter' into table testing;

hive> select * from testing;

****Then use function 'userdate' in sql command

select id,userdate(id_time) from testing;

mysql -u root -pwelcome1

CREATE TABLE ambari.DS_JOBIMPL_15 (DS_id VARCHAR(255) NOT NULL, DS_applicationId

You might also like