0% found this document useful (0 votes)
46 views

SF Dataloading Commands

The document describes how to load data from files stored in different data sources like S3, Azure Blob Storage and external tables in Snowflake. It shows creating stages in Snowflake, loading data into staging tables from files, creating external tables on files with auto-refresh enabled, and using Snowpipe to automatically ingest files into target tables from stages on a scheduled basis or when new files are detected.

Uploaded by

Avinash Reddy
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
46 views

SF Dataloading Commands

The document describes how to load data from files stored in different data sources like S3, Azure Blob Storage and external tables in Snowflake. It shows creating stages in Snowflake, loading data into staging tables from files, creating external tables on files with auto-refresh enabled, and using Snowpipe to automatically ingest files into target tables from stages on a scheduled basis or when new files are detected.

Uploaded by

Avinash Reddy
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 4

--ubuntu

create or replace stage TRIPS_STG file_format = (format_name =


'flatfiles_db.public.csv_format');
put file:///home/ubuntu/trips_data//JC-202009-citibike-tripdata.csv @TRIPS_STG
parallel=10;

create or replace table TRIPS_STG (


tripduration varchar(16777216) NULL,
starttime varchar(16777216) NULL,
stoptime varchar(16777216) NULL,
startstationid varchar(16777216) NULL,
startstationname varchar(16777216) NULL,
startstationlatitude varchar(16777216) NULL,
startstationlongitude varchar(16777216) NULL,
endstationid varchar(16777216) NULL,
endstationname varchar(16777216) NULL,
endstationlatitude varchar(16777216) NULL,
endstationlongitude varchar(16777216) NULL,
bikeid varchar(16777216) NULL,
usertype varchar(16777216) NULL,
birthyear varchar(16777216) NULL,
gender varchar(16777216) NULL);

copy into stage.TRIPS_STG (tripduration, starttime, stoptime, startstationid,


startstationname, startstationlatitude, startstationlongitude, endstationid,
endstationname, endstationlatitude, endstationlongitude, bikeid, usertype,
birthyear, gender)
from (select t.$1, t.$2, t.$3, t.$4, t.$5, t.$6, t.$7, t.$8, t.$9, t.$10, t.$11, t.
$12, t.$13, t.$14, t.$15 from @stage.TRIPS_STG t)
file_format = (format_name = 'flatfiles_db.public.csv_format');

insert overwrite into TARGET.TRIPS ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11,
$12, $13, $14, $15)
select t.$1, t.$2, t.$3, t.$4, t.$5, t.$6, t.$7, t.$8, t.$9, t.$10, t.$11, t.$12,
t.$13, t.$14, t.$15
from stage.TRIPS_STG t;

--------------------------------------------------------

--windows

create or replace stage TRIPS_STG file_format = (format_name =


'flatfiles_db.public.csv_format');

put file://D:/trips_data//JC-202009-citibike-tripdata.csv @TRIPS_STG parallel=10;

create or replace table TRIPS_STG (


tripduration varchar(16777216) NULL,
starttime varchar(16777216) NULL,
stoptime varchar(16777216) NULL,
startstationid varchar(16777216) NULL,
startstationname varchar(16777216) NULL,
startstationlatitude varchar(16777216) NULL,
startstationlongitude varchar(16777216) NULL,
endstationid varchar(16777216) NULL,
endstationname varchar(16777216) NULL,
endstationlatitude varchar(16777216) NULL,
endstationlongitude varchar(16777216) NULL,
bikeid varchar(16777216) NULL,
usertype varchar(16777216) NULL,
birthyear varchar(16777216) NULL,
gender varchar(16777216) NULL);

copy into stage.TRIPS_STG (tripduration, starttime, stoptime, startstationid,


startstationname, startstationlatitude, startstationlongitude, endstationid,
endstationname, endstationlatitude, endstationlongitude, bikeid, usertype,
birthyear, gender)
from (select t.$1, t.$2, t.$3, t.$4, t.$5, t.$6, t.$7, t.$8, t.$9, t.$10, t.$11, t.
$12, t.$13, t.$14, t.$15 from @stage.TRIPS_STG t)
file_format = (format_name = 'flatfiles_db.public.csv_format');

insert overwrite into TARGET.TRIPS ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11,
$12, $13, $14, $15)
select t.$1, t.$2, t.$3, t.$4, t.$5, t.$6, t.$7, t.$8, t.$9, t.$10, t.$11, t.$12,
t.$13, t.$14, t.$15
from stage.TRIPS_STG t;

s3-snowflake-event-based
-------------------------

CREATE OR REPLACE STAGE CDS_S3DB.STAGE.S3_STAGE


URL ='S3://cds-s3tosf-demo/tables' CREDENTIALS = (AWS_KEY_ID =
'AKIA27V2MBPWBMMCIGHH' AWS_SECRET_KEY = 'W4kZh95tbNu89Qc9McOBZyT2qtpD9qIgkrWBnwoY')

CREATE OR REPLACE TABLE CDS_S3DB.TARGET.S3_TABLE (


id STRING,
name STRING,
salary STRING,
joining_date STRING
);

Create SnowPipe:
----------------
CREATE OR REPLACE PIPE CDS_S3DB.TARGET.S3_TABLE_snowpipe AUTO_INGEST = TRUE
AS COPY INTO CDS_S3DB.TARGET.S3_TABLE
FROM @CDS_S3DB.STAGE.S3_STAGE
FILE_FORMAT = flatfiles_db.public.csv_format

show PIPES like '%S3_TABLE_SNOWPIPE%';

ls @CDS_S3DB.STAGE.S3_STAGE

select system$pipe_status('CDS_S3DB.TARGET.S3_TABLE_snowpipe');

Refreshing Pipe :
-----------------
ALTER PIPE CDS_S3DB.TARGET.S3_TABLE_snowpipe refresh

select * from CDS_S3DB.information_schema.load_history order by last_load_time desc


limit 10;

select *
from table(information_schema.copy_history(table_name=>'CDS_S3DB.TARGET.S3_TABLE',
start_time=> dateadd(hours, -1, current_timestamp())));

s3-snowflake-time-based
=========================

CREATE OR REPLACE STAGE CDS_S3DB.STAGE.S3_STAGE


URL ='S3://cds-s3tosf-demo/tables' CREDENTIALS = (AWS_KEY_ID =
'AKIA27V2MBPWBMMCIGHH' AWS_SECRET_KEY = 'W4kZh95tbNu89Qc9McOBZyT2qtpD9qIgkrWBnwoY')

CREATE OR REPLACE TABLE CDS_S3DB.TARGET.S3_TABLE (


id STRING,
name STRING,
salary STRING,
joining_date STRING
)

COPY INTO CDS_S3DB.TARGET.S3_TABLE


FROM @CDS_S3DB.STAGE.S3_STAGE
FILE_FORMAT = flatfiles_db.public.csv_format

--------------------------------------------------

py S3_Snowflake_event_based.py -i table -t CDS_S3DB.target.S3_table ->event-based


py S3_Snowflake_time_based.py -i table -t CDS_S3DB.target.S3_table ->time-based

---------------

create or replace stage mystage


url='azure://myaccount.blob.core.windows.net/mycontainer/files'
;

create or replace external table ext_twitter_feed


integration = 'MY_AZURE_INT'
with location = @mystage/daily/
auto_refresh = true
file_format = (type = parquet)
pattern='.*sales.*[.]parquet';

alter external table ext_twitter_feed refresh;

insert into sf_per as select * from ext_twitter_feed

----------------------

create or replace stage sales_data_ext__part_s3_stage


url = 's3://sales-data-2009/'
credentials= (aws_key_id='AKIAZ6S4J6RNU7MVXNMJ'
aws_secret_key='TrT4zjKtaf8MEspXx5Kx7DrY15SVFz7OK6RFlVLW')
file_format= (type= csv );

list @sales_data_ext_part_s3_stage;

create or replace external table sales_data_ext_partition_table(


year varchar(4) as substr(metadata$filename,1,4),
month varchar(3) as substr(metadata$filename, 6,3)
)
partition by (year,month)
location=@sales_data_ext__part_s3_stage
auto_refresh = true
file_format = (type = csv);

select * from sales_data_ext_partition_table limit 1;

alter external table sales_data_ext_partition_table refresh;

desc external table sales_data_ext_partition_table;

select *
from
table(information_schema.external_table_file_registration_history(TABLE_NAME=>'sale
s_data_ext_partiti
on_table'));

select *
from table(information_schema.external_table_files
(TABLE_NAME=>'sales_data_ext_partition_table'));

DROP EXTERNAL TABLE sales_data_ext_partition_table;

------------------

You might also like