0% found this document useful (0 votes)
15 views3 pages

Commande HIVE

Uploaded by

aicha benslimen
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
15 views3 pages

Commande HIVE

Uploaded by

aicha benslimen
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 3

create table records (year string, temperature int, quality int) row format

delimited fields TERMINATED by '\t';


load data local inpath '/home/cloudera/Desktop/sample-with-tab.txt'
OVERWRITE INTO TABLE records;
show tables ;
describe records ;
select * from records ;
select distinct year from records ;
CREATE TABLE stations (usaf STRING, wban STRING, name STRING) ROW
FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe' WITH
SERDEPROPERTIES ("input.regex" = "(\\d{6})\\s(\\d{5})\\s(.{29}).*");
load data local inpath '/home/cloudera/Desktop/stations-fixed-width.txt'
OVERWRITE INTO TABLE stations;
select * from stations where name like 'PARIS%' ;
Drop table records ;

create external table records (year string, temperature int, quality int) row
format delimited fields TERMINATED by '/t' location '/user/cloudera/records'
load data local inpath '/home/cloudera/Desktop/sample-with-tab.txt'
OVERWRITE INTO TABLE records;
describe formatted records ;
create table records2 (station string, year string, temperature int, quality int)
row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe' with
SERDEPROPERTIES ("input.regex"=".{4}(.{6}).{5}(.{4}).{68}\\+?(-?\\d{4})(\\d).*")
location '/user/cloudera/records2';
Code python
import re
import sys
for line in sys.stdin:
(year, temp, q) = line.strip().split()
if(temp != "9999" and re.match("[01459]",q)):
print ("%s\t%s" % (year, temp))
add file /home/cloudera/Desktop/is_good_quality.py;
From records SELECT TRANSFORM(year, temperature, quality) USING
'is_good_quality' AS year, temperature;

SELECT year, max(temperature) FROM records2 WHERE temperature != 9999


AND quality IN (0, 1, 4, 5, 9) GROUP BY year;
SELECT year, MAX(temperature) FROM records2 WHERE temperature != 9999
AND quality IN (0, 1, 4, 5, 9) GROUP BY year;
SELECT rec.year, MAX(CAST(rec.temperature AS INT))FROM (FROM records2
SELECT TRANSFORM(year, temperature, quality) USING 'is_good_quality' AS
year,temperature) rec GROUP BY rec.year;
SELECT year, AVG(temperature) FROM records2 WHERE temperature != 9999
AND quality IN (0, 1, 4, 5, 9) GROUP BY year;
SELECT AVG(max_temperature) FROM (SELECT station, year,
MAX(temperature) AS max_temperature FROM records2 WHERE
temperature != 9999 AND quality IN (0, 1, 4, 5, 9) GROUP BY station, year) mt;
SELECT FLOOR(avg_temperature/50), COUNT(station) FROM (SELECT station,
AVG(temperature) AS avg_temperature FROM records2 WHERE temperature !
= 9999 AND quality IN (0, 1, 4, 5, 9) GROUP BY station) mt GROUP BY
FLOOR(avg_temperature/50);
SELECT DISTINCT name FROM stations JOIN records2 ON (records2.station =
stations.usaf) WHERE year = '1901' OR year = '1902';
SELECT AVG(temperature) FROM stations JOIN records2 ON (records2.station =
stations.usaf) WHERE name LIKE 'UT0%' OR name LIKE 'TURKU%';

SELECT name, AVG(temperature) FROM stations JOIN records2 ON


(records2.station = stations.usaf) WHERE name LIKE 'UT0%' OR name LIKE
'TURKU%' GROUP BY name;

You might also like