Commande HIVE
Commande HIVE
create external table records (year string, temperature int, quality int) row
format delimited fields TERMINATED by '/t' location '/user/cloudera/records'
load data local inpath '/home/cloudera/Desktop/sample-with-tab.txt'
OVERWRITE INTO TABLE records;
describe formatted records ;
create table records2 (station string, year string, temperature int, quality int)
row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe' with
SERDEPROPERTIES ("input.regex"=".{4}(.{6}).{5}(.{4}).{68}\\+?(-?\\d{4})(\\d).*")
location '/user/cloudera/records2';
Code python
import re
import sys
for line in sys.stdin:
(year, temp, q) = line.strip().split()
if(temp != "9999" and re.match("[01459]",q)):
print ("%s\t%s" % (year, temp))
add file /home/cloudera/Desktop/is_good_quality.py;
From records SELECT TRANSFORM(year, temperature, quality) USING
'is_good_quality' AS year, temperature;