0% found this document useful (0 votes)
138 views2 pages

Azure Synapse Serverless SQL Pools Cheat Sheet

Uploaded by

mprit009
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
138 views2 pages

Azure Synapse Serverless SQL Pools Cheat Sheet

Uploaded by

mprit009
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 2

AZURE SYNAPSE ANALYTICS SERVERLESS SQL POOLS CHEATSHEET

Cheat Sheet Overview Create Database Create View over data lake using CSV with header column and Working with Parquet Files
This cheat sheet covers using Synapse Serverless SQL Pools Create database, security objects, schema, and external data using external data source. Views do not support file format This section includes creating file formats, external tables
with Delimited files (e.g., CSV), Parquet, and Delta. source to be used with external tables and views. This cheat CREATE VIEW <schema_name>.<view_name> and views for working with Parquet files in Azure storage
When creating an Azure Synapse Analytics workspace, sheet will use Managed Identity to authorize to storage SELECT
Serverless SQL Pools is enabled by default using the in-built <source_column_name_1>, <source_column_name_2> CREATE EXTERNAL FILE FORMAT <parquet_format_name>
CREATE DATABASE <database_name>;
engine. FROM WITH (FORMAT_TYPE = PARQUET);
CREATE MASTER KEY ENCRYPTION OPENROWSET(
SQL queries below can be run connecting to default BY PASSWORD = '<strong_password>'; BULK 'rootfolder/**',
CREATE EXTERNAL TABLE <schema_name>.<table_name>
DATA_SOURCE = '<external_data_source>',
database where user has Storage Blob Data Read access to (
CREATE DATABASE SCOPED CREDENTIAL <credential_name> FORMAT = 'CSV',
underlying storage account WITH IDENTITY='Managed Identity'; FIELDTERMINATOR = ',',
[<column_name> <data_type>,
...
PARSER_VERSION = '2.0',
SELECT * CREATE EXTERNAL DATA SOURCE <external_data_source_name> )
HEADER_ROW = TRUE)
FROM WITH ( WITH (
AS r;
OPENROWSET( LOCATION = 'https://storage.dfs.core.windows.net/container', LOCATION = 'rootfolder/**',
BULK 'https://storage.dfs.core.windows.net/container/folder/**', CREDENTIAL = <credential_name> DATA_SOURCE = <external_data_source_name>,
FORMAT = 'CSV', ); Create View over data lake using CSV with no header column FILE_FORMAT = <parquet_format_name>
FIELDTERMINATOR = ',', );
CREATE SCHEMA <schema_name AUTHORIZATION dbo; using ordinal positioning and using external data source. WITH
PARSER_VERSION = '2.0', specifies data types and can be used in any View or Select
HEADER_ROW = TRUE) AS r;
Working with Delimited Files CREATE VIEW <schema_name>.<view_name> Create Views over data lake using Parquet
SELECT * SELECT
This section includes creating file formats, external tables and CREATE VIEW <schema_name>.<view_name>
FROM <user_defined_column_name_1>, <user_defined_column_name_2>
OPENROWSET( views for working with delimited files in Azure storage AS
FROM

Create Views over data lake


BULK 'https://storage.dfs.core.windows.net/container/folder/**', CREATE EXTERNAL FILE FORMAT tester SELECT *
OPENROWSET(
FORMAT = 'PARQUET') AS r; WITH (FORMAT_TYPE = DELIMITEDTEXT, FROM
BULK 'rootfolder/**',
FORMAT_OPTIONS( OPENROWSET(
DATA_SOURCE = '<external_data_source>',
SELECT *
FROM
OPENROWSET(
FIELD_TERMINATOR = ',',
STRING_DELIMITER = '"',
using CSV with header
FORMAT = 'CSV',
FIELDTERMINATOR = ',',
BULK 'https://storage.dfs.core.windows.net/container/folder/**',
FORMAT = ’PARQUET’)
AS r;
column
FIRST_ROW=1, PARSER_VERSION = '2.0',
BULK 'https://storage.dfs.core.windows.net/container/folder',
<other_options>)); HEADER_ROW = FALSE
FORMAT = 'DELTA') AS r;
)
Create External Table using data source and file format WITH Create Views over data lake using Parquet and use WITH to
Select Statements with File Metadata CREATE EXTERNAL TABLE <schema_name>.<table_name> ( specify data types and using external data source
( <user_defined_column_name_1> VARCHAR(20) 0,
As above, with metadata function to return source file names [<column_name> <data_type>, <user_defined_column_name_2> INT 1) CREATE VIEW <schema_name>.<view_name>
... AS r; AS
SELECT *, ) SELECT
r.filename() AS ParquetFileName WITH ( <column_1_from_file>,
Create Views over partitioned folders in data lake using CSV, <column_2_from_file>
FROM LOCATION = 'rootfolder/**', we use the filepath() function to expose the partitioned folder FROM
OPENROWSET( DATA_SOURCE = <external_data_source_name>,
BULK 'https://storage.dfs.core.windows.net/container/folder/**', and this can be used in WHERE clauses to filter OPENROWSET(
FILE_FORMAT = <csv_format_name>
FORMAT = 'PARQUET') AS r ); BULK 'rootfolder/**',
CREATE VIEW <schema_name>.<view_name> DATA_SOURCE = '<external_data_source>',
SELECT
Select Statements with Partition Metadata Create View over data lake using CSV <source_column_name_1>, <source_column_name_2>, WITH
FORMAT = 'PARQUET')
As above, with metadata function to return partition folders. CREATE VIEW <schema_name>.<view_name> r.filepath(1) AS PartitionLevel1, r.filepath(2) AS PartitionLevel2 (
Filepath(x) indicates the folder depth in the data lake AS FROM <column_1_from_file> VARCHAR(20),
SELECT * OPENROWSET( <column_2_from_file> INT)
SELECT *, FROM BULK 'rootfolder/*/*/**', AS r;
r.filepath(1) AS PartitionLevel1, OPENROWSET( DATA_SOURCE = '<external_data_source>',
r.filepath(2) AS PartitionLevel2 BULK 'https://storage.dfs.core.windows.net/container/folder/**', FORMAT = 'CSV',
FROM FORMAT = 'CSV', FIELDTERMINATOR = ',',
OPENROWSET( FIELDTERMINATOR = ',', PARSER_VERSION = '2.0',
BULK 'https://storage.dfs.core.windows.net/container/folder/*/*/**', PARSER_VERSION = '2.0') HEADER_ROW = TRUE)
FORMAT = 'PARQUET') AS r AS r; AS r;

serverlesssql.com 07/03/2023 v1.0 datahai.co.uk


AZURE SYNAPSE ANALYTICS SERVERLESS SQL POOLS CHEATSHEET
Create Views over partitioned folders in data lake using Create Views over data lake using Delta and use WITH to Create Stored Procedure Query partitioned Parquet / CSV files using a View that exposes
Parquet and use WITH to specify data types and using external override data types and using external data source We can create and execute stored procedures in a the partition scheme as columns using the filepath function
data source Serverless SQL Pools database SELECT
CREATE VIEW <schema_name>.<view_name>
AS Column1,
CREATE VIEW <schema_name>.<view_name> CREATE OR ALTER PROC LDW.spGetResults
SELECT Column2,
AS AS
<column_1_from_file>, COUNT(*) AS TotalCount
SELECT BEGIN
<column_2_from_file> FROM <schema>.<view_over_parquet>
<column_1_from_file>, SELECT EventType, EventDateTime FROM LDW.vwWebTelemertyCSV
FROM WHERE <partition_column_from_filepath> = ‘’value’
<column_2_from_file>, END;
OPENROWSET( GROUP BY
r.filepath(1) AS PartitionLevel1, BULK ’deltafolder', Column1,
r.filepath(2) AS PartitionLevel2 EXEC LDW.spGetResults
DATA_SOURCE = '<external_data_source>', Column2;
FROM FORMAT = ’DELTA')
OPENROWSET( WITH
Create Temp Tables Query Delta Lake using a View that exposes the partition
BULK 'rootfolder/*/*/**', ( Creating temp tables is possible but with limitations. Temp scheme as columns and use this as a filter in the WHERE clause
DATA_SOURCE = '<external_data_source>', <column_1_from_file> VARCHAR(20), tables only support inserting using VALUES or the results of a SELECT Column1,Column2,
FORMAT = 'PARQUET') <column_2_from_file> INT) stored procedure COUNT(*) AS TotalCount
WITH AS r; FROM <schema>.<view_over_delta>
( CREATE TABLE #tmpResults WHERE <partition_column> = ‘’value’
<column_1_from_file> VARCHAR(20), ( EventType VARCHAR(20),
Create Views over partitioned data lake using Delta and use GROUP BY
<column_2_from_file> INT) EventDate DATETIME2(0)); Column1,
AS r; WITH to override data types and using external data source.
Column2;
We do not use the filepath function, we specify the folder INSERT INTO #tmpResults (EventType, EventDate)
partition names in the SELECT statement. VALUES ('Event Type','2023-03-07 15:30:54');
Select Objects Metadata
Working with Delta Files INSERT INTO #tmpResults EXEC LDW.spGetResults; Show metadata from External tables including file format and
This section includes creating file formats, external tables and Example 3 level partition structure in Azure storage: external data source
views for working with the Delta format in Azure storage.
Note that partitioned external tables are not supported with Select using External Tables SELECT
et.[name] AS TableName,
Delta. Views do support partitioning. Query the external tables and views using standard T-SQL et.[location] AS TableLocation,
ef.[name] AS FileFormatName,
CREATE EXTERNAL FILE FORMAT <delta_format_name> Select from an external table, in this example we’ll select all ef.[format_type] AS FileFormatType,
WITH (FORMAT_TYPE = DELTA); the data and use standard SQL aggregation syntax es.[name] AS DataSourceName,
es.[location] AS DataSourceLocation
CREATE VIEW <schema_name>.<view_name> SELECT
CREATE EXTERNAL TABLE <schema_name>.<table_name> FROM sys.external_tables et
AS Column1,
( INNER JOIN sys.external_file_formats ef
SELECT Column2,
[<column_name> <data_type>, ON ef.file_format_id = et.file_format_id
<column_1_from_file>, COUNT(*) AS TotalCount
... INNER JOIN sys.external_data_sources es
<column_2_from_file>, FROM <schema>.<external_table_over_csv_or_parquet_or_delta>
) ON es.data_source_id = et.data_source_id;
EventYear, GROUP BY
WITH (
LOCATION = ’deltafolder/**', EventMonth, Column1,
Column2;
Get Data Processed and Usage Limits
EventDate
DATA_SOURCE = <external_data_source_name>, Use system views to select usage stats and user-
FILE_FORMAT = <delta_format_name> FROM
OPENROWSET(
configured limits
);
BULK 'deltafolder ', Query data using a view, this works similar to querying an
SELECT
DATA_SOURCE = '<external_data_source>’, external table [type] AS DataUsageWindow,
Create Views over data lake using Delta FORMAT = ’DELTA') data_processed_mb,
WITH SELECT CAST(data_processed_mb AS DECIMAL(10,3)) / 1000 AS GB,
CREATE VIEW <schema_name>.<view_name> ( Column1, (CAST(data_processed_mb AS DECIMAL(10,3)) / 1000) / 1000 AS TB
AS <column_1_from_file> VARCHAR(20), Column2, FROM sys.dm_external_data_processed
SELECT * <column_2_from_file> INT, COUNT(*) AS TotalCount
FROM EventYear INT, FROM <schema>.<view_over_csv_or_parquet_or_delta> SELECT
OPENROWSET( EventMonth TINYINT, GROUP BY [name] AS DataLimitWindow,
BULK 'https://storage.dfs.core.windows.net/container/deltafolder', EventDate DATE) Column1, value AS TBValue,
FORMAT = ’DELTA’) AS r; Column2; CAST(value_in_use AS INT) AS TBValueInUse
AS r;
FROM sys.configurations WHERE [name] LIKE 'Data processed %'

serverlesssql.com 07/03/2023 v1.0 datahai.co.uk

You might also like