0% found this document useful (0 votes)
20 views

Snowflake Notes

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
20 views

Snowflake Notes

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 21

Only SysAdmin and Account Admin can create the warehouse.

https://Snowflake.com/en/data-cloud/pricing-options

Creating Warehouse

create or replace warehouse second_wh


with
WAREHOUSE_SIZE=XSMALL
AUTO_SUSPEND=60
AUTO_RESUME=TRUE
SCALING_POLICY='Economy'
MIN_CLUSTER_COUNT=1
MAX_CLUSTER_COUNT=3
INITIALLY_SUSPENDED=TRUE
COMMENT='This is 2nd warehouse';

ALTER WARESHOUSE SECOND_WH SET SIZE=’SMALL’;


ALTER WARESHOUSE SECOND_WH SET AUTO_SUSPEND=20;

DROP WAREHOUSE WAREHOUSE_NAME;

INFORMATION_SCHEMA and PUBLIC are the 2 defaults schemas after creating DB.

ALTER DATABASE FIRST_DB RENAME TO OUR_FIRST_DB;

create or replace stage public.customer_stf


url='s3://snowflake-assignments-mc/fileformat/'
file_format=(type=csv,field_delimiter='|' skip_header=1);

COPY INTO LOAN_PAYMENT FROM S3://bucketsnowflakese/loan_payments_data.csv


File_format(type=csv Field_delimiter=’,’ Skip_header=1)

copy into orders from @aws_stage


file_format=(type='csv', field_delimiter=',', skip_header=1)
files=('OrderDetails.csv');
copy into orders from @aws_stage
file_format=(type='csv',field_delimiter=',',skip_header=1)
pattern='OrderDetails*;

copy into public.orders_ex


from (select s.$1,s.$2 from @EXERCISE_DB.external_stages.aws_stage s)
file_format=(type=csv, field_Delimiter=',', skip_header=1)
files=('OrderDetails.csv');

copy into public.orders_ex1


from (select s.$1,s.$2,s.$3,
CASE WHEN CAST(s.$3 as int)<0 THEN 'non profitable' else 'profitanle' end from
@EXERCISE_DB.external_stages.aws_stage s)
file_format=(type=csv,field_Delimiter=',',skip_header=1)
files=('OrderDetails.csv');

COPY INTO public.orders_ex2 (ORDER_ID,PROFIT)


FROM (select s.$1,s.$3 from @EXERCISE_DB.external_stages.aws_stage s)
file_format=(type=csv,field_delimiter=',',SKIP_header=1)
files=('OrderDetails.csv');

COPY INTO public.orders_error


FROM @EXERCISE_DB.public.aws_stage_error
file_format=(type=csv,field_delimiter=',',SKIP_header=1)
files=('OrderDetails_error.csv')
ON_ERROR='CONTINUE';
ON_ERROR=’ABORT_STATEMENT’
ON_ERROR=’SKIP_FILE’
ON_ERROR=’SKIP_FILE_3’

CREATE OR REPLACE FILE format <fileformatname>;


Type=csv
Field_dlimiter=’,’
Skip_header=1
Null_if=(‘NULL’,’null)
Empty_field_as_null=TRUE

Alter file formant <fielformatname> set SKIP_HEADER=1

COPY INTO public.orders_ex2 (ORDER_ID,PROFIT)


FROM @EXERCISE_DB.external_stages.aws_stage
file_format=(FORMAT_NAME=<fileformatname>)
files=('OrderDetails.csv');

CREATE OR REPLACE STAGE EXERCISE_DB.public.aws_stage


url='s3://snowflake-assignments-mc/fileformat';
-- List files in stage
LIST @EXERCISE_DB.public.aws_stage;

-- create file format object


CREATE OR REPLACE FILE FORMAT EXERCISE_DB.public.aws_fileformat
TYPE = CSV
FIELD_DELIMITER='|'
SKIP_HEADER=1;

-- Load the data


COPY INTO EXERCISE_DB.PUBLIC.CUSTOMERS
FROM @aws_stage
file_format= (FORMAT_NAME=EXERCISE_DB.public.aws_fileformat)

-- Alternative
COPY INTO EXERCISE_DB.PUBLIC.CUSTOMERS
FROM @aws_stage
file_format= EXERCISE_DB.public.aws_fileformat

COPY INTO public.orders_error


from @public.aws_stage_copy
file_format=(type=csv,field_Delimiter=',' skip_header=1)
pattern='.*Order.*'
VALIDATION_MODE=RETURN_5_ROWS

COPY INTO public.orders_error


from @public.aws_stage_copy
file_format=(type=csv,field_Delimiter=',' skip_header=1)
pattern='.*Order.*'
VALIDATION_MODE=RETURN_ERRORS
SIZE_LIMIT=
RETURN_FAILED_ONLY=TRUE|FALSE
TRUNCATECOLUMNS=TRUE|FALSE
FORCE=TRUE|FALSE
Force: specifies to all the files, regardless of whether they have been already
loaded and have not changed since they were loaded and this option duplicate the
data in the table.

Load History
Can identify from below table
snowflake.account_usage.LOAD_HISTORY table or
DB.INFORMATION_SCHEMA.LOAD_HISTORY

Select * from snowflake.account_usage.LOAD_HISTORY where


DATE(LAST_LOAD_TIME)<=
DATEADD(

Validating rejected records


create or replace table rejected_Recrods as
select rejected_Record from table (result_scan(last_query_id()));

with ON_ERROR=Continue

select * from table(validate(public.orders_error,job_id=>'_last'))

create or replace table rejected_Values as


select
SPLIT_PART (rejected_Record,',',1)as order_id,
SPLIT_PART (rejected_Record,',',2)as amount
from rejected_Recrods;

Snowflake Editions
Standard Edition
Enterprise Edition –Multi Cluster
Business Critical – For higher security with extremely sensitive data
Virtual Private – Highest level of security

Standard Enterprise Business Critical Virtual Private


Complete DWH All Standard Features All Enterprise features All Business-critical features

Automatic Data Multi cluster Additionally, security features such Dedicated virtual servers
Encryption warehouse as customer managed encryption and separate snowflake env
Broad support for
Standard and special Time Travel up to 90
data Types days Support for data specific regulation Dedicated metadata store
Time Travel up to 1 Database failover/failback Isolated from all other
day Materialized views (disaster recovery) snowflake accounts
Disaster recovery for 7 Search Optimization
days
beyond time travel
Network Policies Column level security
24 hours early access
to
Secure data share weekly new releases
Federated
authentication &
SSO
Premier support 24/7

difference between CTE and recursive CTE


While a CTE is used to define a named query for simplifying complex queries, a
Recursive CTE allows you to repeatedly refer to the result of a CTE within itself,
which is useful for hierarchical data or recursive queries.

CTE:
 A CTE is typically used to simplify complex queries and make them more
readable.
 It does not involve recursion or repeated self-referencing.
 It is evaluated once and does not iterate.

Recursive CTE:
 A recursive CTE is used to work with hierarchical data, such as organizational
charts or tree-like structures.
 It involves recursion, meaning the CTE references itself.
 It consists of two parts: the anchor member and the recursive member.
The anchor member is the base result set, and the recursive member
repeatedly executes based on the previous result.
A Recursive CTE is useful for hierarchical data, where you need to traverse
multiple levels, such as finding all employees under a specific manager, no
matter how deep the hierarchy goes.

Resource Monitor can set at Account level ,Virtual warehouse and Multiple
virtual warehouse.
3 Types of actions can implement.
1. Suspend immediately and notify when this % of credit is used.
2. Suspend and notify when this % of credit is used
3. Notify when this % of credit is used.
This can be created by Account Admin.

Grant Role to <user>

Type of Loading

Bulk Loading Continuous Loading


Loading from stages Designed to load small volumes of
data
COPY Command SNOWPIPE (Server less feature)
Transformations possible Automatically once they are added
to the stage

Creating Stage
create or replace stage EXERCISE_DB.external_stages.aws_stage
url='s3://bucketsnowflakes3'

credentials=(aws_key_id='ABCD_DUMMY_ID',aws_secret_key='1234abcd_key'
);

LIST @aws_stage;

Time Travel : Recover the object that have been dropped within
retention period.

1. Using time stamp


Select * from table at/before (TIMESTAMP => timestamp)
2. Using Offset
Select * from table at (OFFSET => -10*60) – before 10 min
3. Using Query id
Select * from table before(STATEMENT => Query id)
4. Using undrop
Undrop table tablename
Undrop table schema
Undrop table database

alter table rejected_Values


set DATA_RETENTION_TIME_IN_DAYS=1;

create table ()set DATA_RETENTION_TIME_IN_DAYS=1;

select * from snowflake.account_usage.TABLE_STORAGE_METRICS;

Table Types
Snowpipe
1. Enables loading once a file appears in a bucket
2. If data needs to be available immediately for analysis
3. Snowpipe uses serverless features instead of datawarehouse.
Configuration
1.create stage
2. Test Copy command
3. create pipe  create a pipe object with a copy command
4.S3 Notification

Create or replace pipe db.schema.<pipename>


Auto_ingest=TRUE
AS
Copy into table
From @stage

Desc pipe <pipename>

Refresh pipe
Alter pipe <pipename> refresh;

Validate the pipe


SELECT SYSTEM$PIPE_STATUS(‘<pipename>’)

Snowpipe error message


SELECT * from table(VALIDATE_PIPE_LOAD(
PIPE_NAME=>’pipename’
START_TIME=>DATEADD(HOUR,-2,CURRENT_TIME_STAMP())));

History table to see Error message

Select * from table(INFORMATION.SCHEMA.COPY_HISTORY(


Table_name =>’’
START_TIME => DATEADD(HOUR,-2,CURRENT_TIME_STAMP())));

DESC PIPE <pipename>;


Show pipes;
Show pipes like
Show pipes in Database <dbname>
Show pipes in schema <schemaname>
Show pipes like <> in Database <dbname>

Pause pipe
ALTER PIPE <pipename> set PIPE_EXECUTION_PUASED=TRUE;

Azure Integration

CREATE OR REPLACE STORAGE INTEGRATION azure_integration_snowpipe


TYPE=EXTERNAL_STAGE
STORAGE_PROVIDER=AZURE
ENABLED=TRUE
AZURE_TENANT_ID='7ccde15a-6f43-43ac-8b1e-9ec017a0e02f'
STORAGE_ALLOWED_LOCATIONS=('azure://snowflakesnow.blob.core.windows.net/
snowpipe');

DESC STORAGE INTEGRATION azure_integration_snowpipe;

create or replace file format azure_Fileformat


TYPE=CSV
field_dElimiter=','
SKIP_HEADER=1;

CREATE OR REPLACE stage azure_Stage


STORAGE_INTEGRATION=azure_integration_snowpipe
URL='azure://snowflakesnow.blob.core.windows.net/snowpipe'
FILE_FORMAT=azure_Fileformat;

list @azure_Stage
How can you load historical data files from External storage using
snowpipe?

Performance Tuning
Roles
Create user ds1 password =’DS1’ login_name=’DS1’
DEFAULT_ROLE=’DATA_ENGINEER’,DAFAULT_WAREHOUSE=’DS_WH’
MUST_CHANGE_PASSWORD=FALSE;
CREATE ROLE DATAENGINEER;
Grant usage on COMPUTE_DW TO ROLE DATAENGINEER;

GRANT ROLE DATA_ENGINEER TO USER DS1;

Scaling up: Increasing the size of virtual warehouses for more complex query
Scaling out: Adding of warehouse or Multi Cluster warehouses for more concurrent
users or queries.
Clustering: Used only huge data tables.
Columns that are more frequently used in WHERE clause
Columns that is frequently used in joins
Create table name CLUSTER BY (columns)
ALTER TABLE name DROP CLUSTERING KEY
ALTER TABLE name CLUSTER (DATE)

Zero Copy Cloning


Syntax : create table new_Table Clone table source
Create table new_Table close table_source BEFORE(TIMESTAMP=>’’)
We can clone database, schema, table, stream, file format, sequence ,stage, task
and Pipe
Cloning temporary table is not possible
Stage : named internal stages cannot be cloned
Pipe : only external stage can be cloned
What privileges are needed?
Table –Select
Pipe,Task ans stream –owner
All other objects – Usage

Swapping : ALTER TABLE <table_name> SWAP WITH <TARGET_TABLE_NAME>

Swaps the metadata (like table names and properties) between the two tables.

Alter SESSION SET USE_CACHED_RESULT=TRUE|FALSE


ALTER WAREHOUSE COMPUTE SUSPEND
ALTER WAREHOUSE COMPUTE RESUME.

Materialized view : create or replace Materialized view <viewname>


AS
Select stmt

Select * from table (information_schema.materialized_view_refresh_history())


When to use MV?
1. View would take long time to be processed and is used frequently.
2. Underlying data is change not frequently and on a rather irregular basis.
3. If the data is updated on regular basis then tasks & streams better alternative

Only available in Enterprise Edition.


Can not use the below functions.
UDFS
Having Clause
Order by clause
Limit claus

Loading unstructured data


1.create stage
2. Load raw data –Type Variant
3. Analyze & Parse
4.Flatten & Load

select raw_data:first_name::string as first_name


,raw_data:last_name::string as last_name
,raw_Data:Skills[0]::string as skill_1
,raw_Data:Skills[1]::string as skill_2
from public.json_assighment
where raw_data:first_name::string='Florina';

using Flatten function


select raw_Data:first_name::string as first_name,
f.value:language::string as languages
,f.value:level::string as level
from public.json_data,table(flatten(raw_Data:spoken_languages)) f;

select distinct d.key, typeof(d.value) as data_type


from my_docs
inner join lateral flatten(doc, recursive=>true) d
where typeof(d.value) <> 'OBJECT'
order by 1;
Parquet data

select
$1:__index_level_0__::int as id
,$1:"__index_level_0__"
,$1:"cat_id"
,$1:"date"
,DATE($1:date::int) as date
,METADATA$FILENAME AS FILENAME
,METADATA$FILE_ROW_NUMBER AS ROWNUMBER
,TO_TIMESTAMP_NTZ(current_timestamp) load_Date
from @public.parquetstage limit 100;

Snowflake task: A Snowflake Task allows scheduled execution of SQL


statements including calling a stored procedure or Procedural logic using
Snowflake Scripting.

CREATE TASK mytask


WAREHOUSE = COMPUTE_WH
SCHEDULE = '5 MINUTE'
AS
INSERT INTO employees
VALUES( EMPLOYEE_SEQUENCE.NEXTVAL,'F_NAME','L_NAME','101')
;
Serverless Snowflake Task
CREATE TASK mytask_serverless
USER_TASK_MANAGED_INITIAL_WAREHOUSE_SIZE = 'XSMALL'
SCHEDULE = '5 MINUTE'
AS
INSERT INTO employees
VALUES( EMPLOYEE_SEQUENCE.NEXTVAL,'F_NAME','L_NAME','101')
;

CREATE OR REPLACE TASK my_crontask


WAREHOUSE = COMPUTE_WH
SCHEDULE = 'USING CRON * 10 * * SUN UTC'
AS
INSERT INTO employees
VALUES( EMPLOYEE_SEQUENCE.NEXTVAL,'F_NAME','L_NAME','101')
;

To start the task use alter command with resume

Alter task mytask resume;

ALTER TASK mytask SUSPEND;

to verify the task history


--CHECK TASK HISTORY
SELECT * FROM TABLE(INFORMATION_SCHEMA.TASK_HISTORY()) WHERE
NAME = 'MYTASK';

Manually executing a Snowflake Task


EXECUTE TASK mytask;

Tree of Tasks
CREATE TASK –
After <Parent taks>
AS

ALTER TASK –
Add after <parent task>

Access Control
DAC  Discretionary Access control  Each object has an owner who can
grant access to that object.
RBAC  Role based Access Control

Grant <privilege> on <object> to Role


Grant <role> to <user>

create user narayana password='snowflake@123'


DEFAULT_ROLE=ACCOUNTADMIN
MUST_CHANGE_PASSWORD=FALSE;

GRANT ROLE ACCOUNTADMIN to user narayana;

create user saikarthik password='snowflake@123'


DEFAULT_ROLE=securityadmin
MUST_CHANGE_PASSWORD=FALSE;

GRANT ROLE securityadmin to user saikarthik;

create user hasini password='snowflake@123'


DEFAULT_ROLE=sysadmin
MUST_CHANGE_PASSWORD=FALSE;

GRANT ROLE sysadmin to user hasini;

Strored Procedure
declare
v_string varchar(10);
begin
select 'Welcome to snowflake scripting'
into v_string;
return v_string;
exception
when expression_error then return 'sorry my bad';
end;

Streams
Create stream <streamname> on table <tablename>

Select * from stream;

Sharing
1. Create share : create share my_share
2. Grant privileges to share :
Grant usage on database my_db to share my_share;
Grant usage on database my_schema.my_db to share my_share;
Grant select on database my_Table.my_schema.my_db to share my_share;
3. Add Consumers to Account
Alter share my_share Add Account ;
4. Import share
Create Database my_db from share my_share
Tables, External Tables, Secure Views, Secure materialized views, Secure UDFS
Select * from Information_schema.TABLE_STORAGE_METRICS where
id<>CLONE_GROUP_ID

Select level from dual connect by level <=5;


LISTAGG, REPLCAE,REGEXP_COUNT,REGEXP_REPLACE

You might also like