Resumão - SQL Com Databricks

The document provides a cheat sheet overview of common SQL commands and functions for Databricks SQL (DBSQL) including creating and modifying tables, inserting and updating data, joins, aggregations, and Delta Lake features like change data capture and cloning tables.

Uploaded by

Cristiano Silva

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

47 views2 pages

Resumão - SQL Com Databricks

Uploaded by

Cristiano Silva

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 2

DBSQL CHEATSHEET DELETE / DROP A TABLE JOINS

DELETE JOIN
Databricks SQL (DB SQL) is a serverless data --Join two tables (via inner, outer, left, or right join)
--Delete rows in a table based upon a condition
warehouse on the Databricks Lakehouse Platform DELETE FROM sales SELECT city.name, country.name
that lets you run all your SQL and BI applications at WHERE predicate; FROM city
[INNER|OUTER|LEFT|RIGHT] JOIN country
scale with up to 12x better price/performance, a DROP TABLE ON city.country_id = country.id;

unified governance model, open formats and APIs, DROP TABLE [IF EXISTS] sales;

and your tools of choice – no lock-in. TRUNCATE COMMON SELECT QUERIES

--Keep a table but delete all of its data.
CREATE TABLES TRUNCATE TABLE sales; SUBQUERIES
--Query an intermediate result set using a subquery.
CREATE TABLE SELECT * FROM sales
WHERE sales_id IN (
--Create a table and define its schema.
CREATE TABLE default.sales ( ADD/MODIFY DATA SELECT DISTINCT sales_id
FROM visit
transaction_datetime TIMESTAMP,
UPDATE );
refund_datetime TIMESTAMP,
bank_zip INT, --Update column values for rows that match a predicate ALIAS COLUMN
customer_zip INT UPDATE sales
); --Alias a column
SET bank_office = 'Augusta' SELECT sales_id AS sales_id_new
WHERE employee_state = 'Maine';
CREATE VIEW FROM sales;

CREATE VIEW mytempview INSERT INTO ALIAS TABLE

AS SELECT * FROM default.sales; --Alias a table
--Insert comma separated values directly into a table.
INSERT [OVERWRITE] INTO mytable VALUES SELECT * FROM my_sales AS m;
CREATE OR REPLACE TABLE ('Harper Bryant', 'Employee', 98101),
CREATE OR REPLACE TABLE default.sales ('Sara Brown', 'Contractor', 48103); ORDER BY
parquet.`/path/to/data`; --Return a table sorted by a column's values. Values
MERGE INTO returned in ascending order by default, or specify DESC.
SELECT productname, sales_id FROM sales
--Upsert (update + insert) using MERGE
ORDER BY sales_id [DESC];
MERGE INTO target
ALTER TABLE USING updates
WHERE
ON target.Id = updates.Id
RENAME TABLE WHEN MATCHED AND target.delete_flag = "true" THEN --Filter a table based upon rows that match one or more
DELETE specific predicates (text or numeric filtering)
ALTER TABLE sales WHEN MATCHED THEN SELECT * FROM sales
RENAME TO salesperson; UPDATE SET * WHERE product_name = "Lego set" AND sales_id > 50000;
WHEN NOT MATCHED THEN
RENAME COLUMN INSERT (date, Id, data) -- or, use INSERT * JSON
ALTER TABLE sales VALUES (date, Id, data); --extract values from a JSON string using the : operator,
RENAME COLUMN customer_first_name TO customer_name; delimeters and identifiers
SELECT raw:owner, raw:OWNER, raw:['owner'], raw:['OWNER']
ADD COLUMNS FROM sales;
ALTER TABLE sales ADD columns (time TIMESTAMP, col_name1 IDENTITY COLUMNS --Extract nested fields from JSON string using the
data_type2); : operator and dot notation
AUTO-INCREMENTING IDENTITY COLUMNS SELECT raw:store.bicycle FROM sales;
CHECK (CONSTRAINTS)
--Add an auto-incrementing identity column --Extract values from an array in JSON using the
--Add a CHECK constraint CREATE TABLE sales : operator
ALTER TABLE sales (id BIGINT GENERATED ALWAYS AS IDENITY COMMENT 'Surrogate SELECT raw:store.fruit[0], raw:store.fruit[1] FROM sales;
ADD CONSTRAINT dateWithinRange CHECK (year > '2000-01- key for AccountID',
01'); accountid BIGINT, CLONE
samplecolumn STRING -- Deep clone is a complete, independent copy of the source
NOT NULL (CONSTRAINTS) ); table
CREATE OR REPLACE TABLE default.sales DEEP CLONE
--Add a NOT NULL constraint SHOW IDENTITY COLUMNS parquet.`/path/to/data`;
ALTER TABLE sales --Returns the CREATE TABLE statement that was used to -- Shallow clone is a copy of the source table’s definition,
ADD CONSTRAINT customer_name IS NOT NULL; create a given table or view. Allows you to see which but refers to the source table’s files
column(s) are identity columns. CREATE OR REPLACE TABLE default.sales SHALLOW CLONE
DROP CONSTRAINT (CONSTRAINTS)
SHOW CREATE TABLE sales; parquet.`/path/to/data`;
ALTER TABLE default.sales
DROP CONSTRAINT dateWithinRange;
DBSQL CHEATSHEET DELTA LAKE PERFORMANCE TUNING
CHANGE DATA FEED CACHE
COMMON AGGREGATIONS --Read table changes starting at a specified version number --Cache a table in memory to speed up queries.
COUNT SELECT * FROM table_changes('sales', <start version #>) CACHE SELECT sales;
--Enable Change Data Feed on Delta Lake table
--View count of distinct records in a table
ALTER TABLE sales SET TBLPROPERTIES
EXPLAIN
SELECT COUNT([DISTINCT] sales) --View the physical plan for execution of a given SQL
FROM orderhistory; (delta.enableChangeDataFeed = true);
statement.
CONVERT TO DELTA EXPLAIN [EXTENDED] SELECT * FROM sales;
AVERAGE/MIN/MAX
--Convert a table to Delta Lake format
--View average (mean), sum, or min and max values in a
TUNE WIDE TABLES
CONVERT TO DELTA sales;
column --Sets the number of columns to collect statistics on
SELECT AVG(sales), SUM(sales), MIN(sales), MAX(sales) VACUUM ALTER TABLE SET TBLPROPERTIES
FROM orderhistory; (‘delta.dataSkippingNumIndexedCols' = 64);
--Delete files no longer used by the table from cloud
storage
GROUP BY/HAVING OPTIMIZE
VACUUM sales [RETAIN num HOURS] [DRY RUN];
--View an aggregation grouped by a column's values. --OPTIMIZE Delta tables, bin packs tables for better
Optionally, specify a predicate using the HAVING clause TIME TRAVEL performance
that rows must match to be included in the aggregation. --Query historical versions of a Delta Lake table by OPTIMIZE sales
version number or timestamp
SELECT SUM(sales) ANALYZE
FROM orderhistory SELECT * FROM table_name [VERSION AS OF 0 | TIMESTAMP AS
OF "2020-12-18"] --Analyze table to collect statistics on entire column
GROUP BY country
ANALYZE TABLE sales COMPUTE STATISTICS FOR ALL COLUMNS;
[HAVING item_type="soup"]; --View Delta Lake transaction log (table history)
DESCRIBE HISTORY sales; OPTIMIZE/ZORDER
--Periodic OPTIMIZE and ZORDER, run on a nightly basis
DESCRIBE
OPTIMIZE customer_table ZORDER BY customer_id, customer_seq;
PERMISSIONS --View [detailed] information about a database or table
DESCRIBE [DETAIL] sales;
GRANT
-- Grant database and table permissions for admin group DATA INGESTION
GRANT ALL PRIVILEGES ON [DATABASE default|TABLE sales] TO GEOSPATIAL FUNCTIONS COPY INTO
`[email protected]`| admins;
H3 COPY INTO iot_devices
REVOKE --Returns the H3 cell ID (as a BIGINT) corresponding to the FROM "/databricks-datasets/iot/"
provided longitude and latitude at the specified resolution FILEFORMAT = JSON|CSV|PARQUET|etc.;
--Revoke privileges on databases or tables
SELECT h3_longlatash3(longitudeExpr, latitudeExpr,
REVOKE [SELECT TABLE|ALL PRIVILEGES|CREATE TABLE|etc.] ON
resolutionExpr)
sales FROM [`[email protected]`|admins];
--Returns an ARRAY of H3 cell IDs (represented as a BIGINTs) CREATE FUNCTION
SHOW GRANT corresponding to hexagons or pentagons, of the specified
resolution, that are contained by the input areal geography CREATE FUNCTION
--Show a user's permissions on a table SELECT h3_polyfillash3(geographyExpr, resolutionExpr)
SHOW GRANT `[email protected]` ON TABLE default.sales; -- Create a permanent function with parameters.
--Returns the H3 cell IDs that are within (grid) distance k CREATE FUNCTION area(x DOUBLE, y DOUBLE) RETURNS DOUBLE
of the origin cell ID RETURN x * y;
SELECT 3_kring(h3CellIdExpr, kExpr)
-- Use a SQL function in the SELECT clause of a query.
INFORMATION SCHEMA --Returns the grid distance of the two input H3 cell IDs
SELECT h3_distance(h3CellId1Expr, h3CellId2Expr)
SELECT area(c1, c2) AS area FROM t;
-- Use a SQL function in the WHERE clause of a query.
INFORMATION SCHEMA --Returns the parent H3 cell ID of the input H3 cell ID at
SELECT * FROM t WHERE area(c1, c2) > 0;
the specified resolution
--View all tables that have been created in the last 24 SELECT h3_toparent(h3CellIdExpr, resolutionExpr) -- Compose SQL functions.
hours CREATE FUNCTION square(x DOUBLE) RETURNS DOUBLE RETURN
SELECT table_name, table_owner, created_by, last_altered, area(x, x);
last_altered_by, table_catalog
FROM system.information_schema.tables CTE SELECT c1, square(c1) AS square FROM t

WHERE datediff(now(), last_altered) < 1; - Create a non-deterministic function

CTE CREATE FUNCTION roll_dice()
--View how many tables you have in each schema RETURNS INT
--Create a common table expression (CTE) that can be
SELECT table_schema, count(table_name) NOT DETERMINISTIC
easily reused in other queries.
FROM system.information_schema.tables CONTAINS SQL
WITH common_table_expression_name
WHERE table_schema = 'tpch' COMMENT 'Roll a single 6 sided die'
AS (
GROUP BY table_schema RETURN (rand() * 6)::INT + 1;
SELECT
ORDER BY 2 DESC
product_name as product, -- Rol
l a single 6-sided die
AVG(sales) as avg_sales
USE SELECT roll_dice();
FROM orderhistory
--Switch to a different database; the database default is GROUP BY product
used if none is specified. Provided to the open source community by Databricks
)
USE database_name; SELECT * FROM common_table_expression_name ©️Databricks 2023. All rights reserved. Apache, Apache Spark, Spark and the Spark logo
are trademarks of the Apache Software Foundation.

SQL Notes
50% (4)
SQL Notes
16 pages
T-SQL Cheat Sheet
100% (1)
T-SQL Cheat Sheet
20 pages
Top 100 Must Know SQL Queries
No ratings yet
Top 100 Must Know SQL Queries
10 pages
Ade 1737191501
No ratings yet
Ade 1737191501
29 pages
SQL Cheatsheet
No ratings yet
SQL Cheatsheet
16 pages
Oracle SQL Getting Started
No ratings yet
Oracle SQL Getting Started
10 pages
SQL Handbook
No ratings yet
SQL Handbook
7 pages
T SQL
No ratings yet
T SQL
39 pages
SQL Cheatsheet
No ratings yet
SQL Cheatsheet
14 pages
Full SQL Syntax Handbook With Vendor Specifics
No ratings yet
Full SQL Syntax Handbook With Vendor Specifics
2 pages
MySQL Cheat Sheet & Quick Reference
No ratings yet
MySQL Cheat Sheet & Quick Reference
26 pages
SQL Cheatsheet 123
No ratings yet
SQL Cheatsheet 123
14 pages
DB Queries
No ratings yet
DB Queries
9 pages
MySQL Cheat Sheet & Quick Reference
No ratings yet
MySQL Cheat Sheet & Quick Reference
20 pages
(UPDATED) SQL Level 3
No ratings yet
(UPDATED) SQL Level 3
9 pages
Delta SQL Analytics Queries
No ratings yet
Delta SQL Analytics Queries
5 pages
(MYSQL Advanced) (CheatSheet)
No ratings yet
(MYSQL Advanced) (CheatSheet)
10 pages
Interview Questions SQL
No ratings yet
Interview Questions SQL
20 pages
SQL Notes
No ratings yet
SQL Notes
12 pages
SQL
No ratings yet
SQL
31 pages
DW Exno 6
No ratings yet
DW Exno 6
15 pages
Tableau Notes
No ratings yet
Tableau Notes
16 pages
Subjoints, Constraints
No ratings yet
Subjoints, Constraints
32 pages
Best Practices For Writing MySQL Queries
No ratings yet
Best Practices For Writing MySQL Queries
17 pages
DBMS Mini Notes
No ratings yet
DBMS Mini Notes
2 pages
Netezza Cheat Sheet
No ratings yet
Netezza Cheat Sheet
9 pages
Course SQL Scripts
No ratings yet
Course SQL Scripts
18 pages
SQL For Data Scientist
No ratings yet
SQL For Data Scientist
3 pages
SQL - Eda Process
No ratings yet
SQL - Eda Process
7 pages
SQL Syntax
No ratings yet
SQL Syntax
11 pages
Data Base Lab Manual
No ratings yet
Data Base Lab Manual
33 pages
University of Technology Sydney SQL-Exam-Notes 2019 Database Fundamental
No ratings yet
University of Technology Sydney SQL-Exam-Notes 2019 Database Fundamental
2 pages
SQL Syntax
No ratings yet
SQL Syntax
3 pages
SQL Simplified
No ratings yet
SQL Simplified
11 pages
Fatima Code
No ratings yet
Fatima Code
24 pages
IP XII Quick Notes - Querying in MYSQL
No ratings yet
IP XII Quick Notes - Querying in MYSQL
11 pages
Rajat Kumar Behera Lab4
No ratings yet
Rajat Kumar Behera Lab4
12 pages
RDBMS Lab Record-IV Sem-1
No ratings yet
RDBMS Lab Record-IV Sem-1
39 pages
Myntra SQL
No ratings yet
Myntra SQL
34 pages
Assignment 3
No ratings yet
Assignment 3
9 pages
SQL Cheat Sheet
No ratings yet
SQL Cheat Sheet
4 pages
Database Syntax (By chatGPT)
No ratings yet
Database Syntax (By chatGPT)
4 pages
SQL Cheat Sheet
No ratings yet
SQL Cheat Sheet
4 pages
SQL Notes
No ratings yet
SQL Notes
10 pages
SQL Assignment
No ratings yet
SQL Assignment
7 pages
SQL Commands
No ratings yet
SQL Commands
13 pages
SQL Advanced Cheatsheet
No ratings yet
SQL Advanced Cheatsheet
1 page
SQL Interview
100% (1)
SQL Interview
68 pages
Lecture Week 3-Databases
No ratings yet
Lecture Week 3-Databases
17 pages
SQL Cheat Sheet
No ratings yet
SQL Cheat Sheet
11 pages
SQL Simplified
No ratings yet
SQL Simplified
11 pages
KPMG Data Analyst Interview Questions
No ratings yet
KPMG Data Analyst Interview Questions
30 pages
50 SQL Interview Queries
No ratings yet
50 SQL Interview Queries
52 pages
Mysql Cheat Sheet
No ratings yet
Mysql Cheat Sheet
8 pages
Examen LBD2
No ratings yet
Examen LBD2
7 pages
Sap NOTES
No ratings yet
Sap NOTES
61 pages
Modelo de Diagrama de Tablas para Tienda de Bicicletas
No ratings yet
Modelo de Diagrama de Tablas para Tienda de Bicicletas
3 pages

Resumão - SQL Com Databricks

Uploaded by

Resumão - SQL Com Databricks

Uploaded by

DBSQL CHEATSHEET DELETE / DROP A TABLE JOINS

and your tools of choice – no lock-in. TRUNCATE COMMON SELECT QUERIES

CREATE VIEW mytempview INSERT INTO ALIAS TABLE

WHERE datediff(now(), last_altered) < 1; - Create a non-deterministic function

You might also like