Window Functions Presentation MariaDB Foundation NY Developer Meeting
Window Functions Presentation MariaDB Foundation NY Developer Meeting
WITH engineers AS (
SELECT *
FROM employees
WHERE dept=”Engineering”
)
SELECT *
FROM engineers
WHERE ...
What are CTEs?
Syntax
WITH engineers AS (
SELECT * Keyword
FROM employees
WHERE dept=”Engineering”
)
SELECT *
FROM engineers
WHERE ...
What are CTEs?
Syntax
WITH engineers AS (
SELECT * CTE Name
FROM employees
WHERE dept=”Engineering”
)
SELECT *
FROM engineers
WHERE ...
What are CTEs?
Syntax
WITH engineers AS (
SELECT *
FROM employees
CTE Body
WHERE dept=”Engineering”
)
SELECT *
FROM engineers
WHERE ...
What are CTEs?
Syntax
WITH engineers AS (
SELECT *
FROM employees
WHERE dept=”Engineering”
)
SELECT *
FROM engineers
WHERE ... CTE Usage
What are CTEs?
CTEs are similar to derived tables.
SELECT *
FROM
sales_product_year CUR,
sales_product_year PREV,
WHERE
CUR.product = PREV.product AND
CUR.year = PREV.year + 1 AND
CUR.total_amt > PREV.total_amt
CTE optimization #1
CTE reuse
SELECT *
FROM
sales_product_year CUR, We can reuse CTE
here!
sales_product_year PREV,
WHERE
CUR.product = PREV.product AND
CUR.year = PREV.year + 1 AND
CUR.total_amt > PREV.total_amt
CTE optimization #1
CTE reuse
SELECT *
FROM
sales_product_year CUR, Materialize only
once!
sales_product_year PREV,
WHERE
CUR.product = PREV.product AND
CUR.year = PREV.year + 1 AND
CUR.total_amt > PREV.total_amt
CTE optimization #1
CTE reuse
SELECT *
FROM
sales_product_year CUR, Materialize only
once!
sales_product_year PREV,
WHERE
CUR.product = PREV.product AND
CUR.year = PREV.year + 1 AND
CUR.total_amt > PREV.total_amt
CTE optimization #2
CTE merging
WITH engineers AS (
SELECT * FROM EMPLOYEES
WHERE
dept='Development'
)
SELECT
...
FROM
engineers E,
support_cases SC
WHERE
E.name=SC.assignee and
SC.created='2017-04-10' and
E.location='New York'
Requirements:
● CTE is used in a JOIN, no GROUP
BY, DISTINCT, etc.
CTE optimization #2
CTE merging
Requirements:
● CTE is used in a JOIN, no GROUP
BY, DISTINCT, etc.
CTE optimization #2
CTE merging
WITH sales_per_year AS (
SELECT
year(order.date) AS year
sum(order.amount) AS sales
FROM
order
GROUP BY
year
)
SELECT *
FROM sales_per_year
WHERE
year in ('2015','2016')
CTE optimization #3
Condition pushdown
WITH sales_per_year AS (
SELECT
year(order.date) AS year
sum(order.amount) AS sales
FROM
order
GROUP BY
year
)
SELECT *
FROM sales_per_year
WHERE
year in ('2015','2016')
Requirements:
● Merging is not possible (GROUP BY
exists)
● Conditions in outer select
CTE optimization #3
Condition pushdown
Requirements:
● Merging is not possible (GROUP BY
exists)
● Conditions in outer select
CTE optimization #3
Condition pushdown
MariaDB 10.2 ✔ ✔ ✘
MS SQL Server ✔ ✔ ✘
PostgreSQL ✘ ✘ ✔
MySQL ✔ ✘ ✔*
8.0.0-labs-optimizer
SELECT
email, first_name,
last_name, account_type
FROM users
ORDER BY email;
+------------------------+------------+-----------+--------------+
| email | first_name | last_name | account_type |
+------------------------+------------+-----------+--------------+
| [email protected] | Admin | Boss | admin |
| [email protected] | Bob | Carlsen | regular |
| [email protected] | Eddie | Stevens | regular |
| [email protected] | John | Smith | regular |
| [email protected] | Root | Chief | admin |
+------------------------+------------+-----------+--------------+
What are window functions?
Let’s start with a “function like” example
SELECT
row_number() over () as rnum,
email, first_name,
last_name, account_type
FROM users
ORDER BY email;
+------+------------------------+------------+-----------+--------------+
| rnum | email | first_name | last_name | account_type |
+------+------------------------+------------+-----------+--------------+
| 1 | [email protected] | Admin | Boss | admin |
| 2 | [email protected] | Bob | Carlsen | regular |
| 3 | [email protected] | Eddie | Stevens | regular |
| 4 | [email protected] | John | Smith | regular |
| 5 | [email protected] | Root | Chief | admin |
+------+------------------------+------------+-----------+--------------+
What are window functions?
Let’s start with a “function like” example
SELECT
row_number() over () as rnum,
email, first_name,
last_name, account_type
FROM users
ORDER BY email;
+------+------------------------+------------+-----------+--------------+
| rnum | email | first_name | last_name | account_type |
+------+------------------------+------------+-----------+--------------+
| 1 | [email protected] | Admin | Boss | admin |
| 2 | [email protected] | Bob | Carlsen | regular |
| 3 | [email protected] | Eddie | Stevens | regular |
| 4 | [email protected] | John | Smith | regular |
| 5 | [email protected] | Root | Chief | admin |
+------+------------------------+------------+-----------+--------------+
What are window functions?
Let’s start with a “function like” example
SELECT
row_number() over () as rnum,
email, first_name,
last_name, account_type
FROM users
ORDER BY email;
+------+------------------------+------------+-----------+--------------+
| rnum | email | first_name | last_name | account_type |
+------+------------------------+------------+-----------+--------------+
| 2 | [email protected] | Admin | Boss | admin |
| 1 | [email protected] | Bob | Carlsen | regular |
| 3 | [email protected] | Eddie | Stevens | regular |
| 5 | [email protected] | John | Smith | regular |
| 4 | [email protected] | Root | Chief | admin |
+------+------------------------+------------+-----------+--------------+
What are window functions?
Let’s start with a “function like” example
SELECT
row_number() over () as rnum,
email, first_name,
last_name, account_type
FROM users
ORDER BY email;
+------+------------------------+------------+-----------+--------------+
| rnum | email | first_name | last_name | account_type |
+------+------------------------+------------+-----------+--------------+
| 5 | [email protected] | Admin | Boss | admin |
| 4 | [email protected] | Bob | Carlsen | regular |
| 3 | [email protected] | Eddie | Stevens | regular |
| 2 | [email protected] | John | Smith | regular |
| 1 | [email protected] | Root | Chief | admin |
+------+------------------------+------------+-----------+--------------+
What are window functions?
Let’s start with a “function like” example
SELECT
row_number() over (ORDER BY email) as rnum,
email, first_name,
last_name, account_type
FROM users
ORDER BY email;
+------+------------------------+------------+-----------+--------------+
| rnum | email | first_name | last_name | account_type |
+------+------------------------+------------+-----------+--------------+
| 1 | [email protected] | Admin | Boss | admin |
| 2 | [email protected] | Bob | Carlsen | regular |
| 3 | [email protected] | Eddie | Stevens | regular |
| 4 | [email protected] | John | Smith | regular |
| 5 | [email protected] | Root | Chief | admin |
+------+------------------------+------------+-----------+--------------+
What are window functions?
Let’s start with a “function like” example
SELECT
row_number() over (ORDER BY email) as rnum,
email, first_name,
last_name, account_type
FROM users
ORDER BY email;
+------+------------------------+------------+-----------+--------------+
| rnum | email | first_name | last_name | account_type |
+------+------------------------+------------+-----------+--------------+
| 1 | [email protected] | Admin | Boss | admin |
| 2 | [email protected] | Bob | Carlsen | regular |
| 3 | [email protected] | Eddie | Stevens | regular |
| 4 | [email protected] | John | Smith | regular |
| 5 | [email protected] | Root | Chief | admin |
+------+------------------------+------------+-----------+--------------+
What are window functions?
Let’s start with a “function like” example
SELECT
row_number() over (PARTITION BY account_type ORDER BY email) as rnum,
email, first_name,
last_name, account_type
FROM users
ORDER BY account_type, email;
SELECT
time, value
FROM data_points
ORDER BY time;
What are window functions?
How about that aggregate similarity?
SELECT
time, value
FROM data_points
ORDER BY time;
SELECT
time, value
avg(value) over (ORDER BY time
ROWS BETWEEN 3 PRECEDING
AND 3 FOLLOWING),
FROM data_points
ORDER BY time;
What are window functions?
How about that aggregate similarity?
SELECT
time, value
FROM data_points
ORDER BY time;
SELECT
time, value
avg(value) over (ORDER BY time
ROWS BETWEEN 3 PRECEDING
AND 3 FOLLOWING),
FROM data_points
ORDER BY time;
What are window functions?
How about that aggregate similarity?
SELECT
time, value
FROM data_points
ORDER BY time;
SELECT
time, value
avg(value) over (ORDER BY time
ROWS BETWEEN 6 PRECEDING
AND 6 FOLLOWING),
FROM data_points
ORDER BY time;
What are window functions?
So how do frames work?
SELECT SELECT
time, value time, value
sum(value) OVER ( sum(value) OVER (
ORDER BY time ORDER BY time
ROWS BETWEEN 1 PRECEDING ROWS BETWEEN 2 PRECEDING
AND 1 FOLLOWING) AND 2 FOLLOWING)
FROM data_points FROM data_points
ORDER BY time; ORDER BY time;
+----------+-------+------+ +----------+-------+------+
| time | value | sum | | time | value | sum |
+----------+-------+------+ +----------+-------+------+
| 10:00:00 | 2 | | | 10:00:00 | 2 | |
| 11:00:00 | 5 | | | 11:00:00 | 5 | |
| 12:00:00 | 4 | | | 12:00:00 | 4 | |
| 13:00:00 | 4 | | | 13:00:00 | 4 | |
| 14:00:00 | 1 | | | 14:00:00 | 1 | |
| 15:00:00 | 5 | | | 15:00:00 | 5 | |
| 15:00:00 | 2 | | | 15:00:00 | 2 | |
| 15:00:00 | 2 | | | 15:00:00 | 2 | |
+----------+-------+------+ +----------+-------+------+
What are window functions?
So how do frames work?
SELECT SELECT
time, value time, value
sum(value) OVER ( sum(value) OVER (
ORDER BY time ORDER BY time
ROWS BETWEEN 1 PRECEDING ROWS BETWEEN 2 PRECEDING
AND 1 FOLLOWING) AND 2 FOLLOWING)
FROM data_points FROM data_points
ORDER BY time; ORDER BY time;
+----------+-------+------+ +----------+-------+------+
| time | value | sum | | time | value | sum |
+----------+-------+------+ +----------+-------+------+
| 10:00:00 | 2 | 7 | (2 + 5) | 10:00:00 | 2 | 11 | (2 + 5 + 4)
| 11:00:00 | 5 | | | 11:00:00 | 5 | |
| 12:00:00 | 4 | | | 12:00:00 | 4 | |
| 13:00:00 | 4 | | | 13:00:00 | 4 | |
| 14:00:00 | 1 | | | 14:00:00 | 1 | |
| 15:00:00 | 5 | | | 15:00:00 | 5 | |
| 15:00:00 | 2 | | | 15:00:00 | 2 | |
| 15:00:00 | 2 | | | 15:00:00 | 2 | |
+----------+-------+------+ +----------+-------+------+
What are window functions?
So how do frames work?
SELECT SELECT
time, value time, value
sum(value) OVER ( sum(value) OVER (
ORDER BY time ORDER BY time
ROWS BETWEEN 1 PRECEDING ROWS BETWEEN 2 PRECEDING
AND 1 FOLLOWING) AND 2 FOLLOWING)
FROM data_points FROM data_points
ORDER BY time; ORDER BY time;
+----------+-------+------+ +----------+-------+------+
| time | value | sum | | time | value | sum |
+----------+-------+------+ +----------+-------+------+
| 10:00:00 | 2 | 7 | (2 + 5) | 10:00:00 | 2 | 11 | (2 + 5 + 4)
| 11:00:00 | 5 | 11 | (2 + 5 + 4) | 11:00:00 | 5 | 15 | (2 + 5 + 4 + 4)
| 12:00:00 | 4 | | | 12:00:00 | 4 | |
| 13:00:00 | 4 | | | 13:00:00 | 4 | |
| 14:00:00 | 1 | | | 14:00:00 | 1 | |
| 15:00:00 | 5 | | | 15:00:00 | 5 | |
| 15:00:00 | 2 | | | 15:00:00 | 2 | |
| 15:00:00 | 2 | | | 15:00:00 | 2 | |
+----------+-------+------+ +----------+-------+------+
What are window functions?
So how do frames work?
SELECT SELECT
time, value time, value
sum(value) OVER ( sum(value) OVER (
ORDER BY time ORDER BY time
ROWS BETWEEN 1 PRECEDING ROWS BETWEEN 2 PRECEDING
AND 1 FOLLOWING) AND 2 FOLLOWING)
FROM data_points FROM data_points
ORDER BY time; ORDER BY time;
+----------+-------+------+ +----------+-------+------+
| time | value | sum | | time | value | sum |
+----------+-------+------+ +----------+-------+------+
| 10:00:00 | 2 | 7 | (2 + 5) | 10:00:00 | 2 | 11 | (2 + 5 + 4)
| 11:00:00 | 5 | 11 | (2 + 5 + 4) | 11:00:00 | 5 | 15 | (2 + 5 + 4 + 4)
| 12:00:00 | 4 | 13 | (5 + 4 + 4) | 12:00:00 | 4 | 16 | (2 + 5 + 4 + 4 + 1)
| 13:00:00 | 4 | | | 13:00:00 | 4 | |
| 14:00:00 | 1 | | | 14:00:00 | 1 | |
| 15:00:00 | 5 | | | 15:00:00 | 5 | |
| 15:00:00 | 2 | | | 15:00:00 | 2 | |
| 15:00:00 | 2 | | | 15:00:00 | 2 | |
+----------+-------+------+ +----------+-------+------+
What are window functions?
So how do frames work?
SELECT SELECT
time, value time, value
sum(value) OVER ( sum(value) OVER (
ORDER BY time ORDER BY time
ROWS BETWEEN 1 PRECEDING ROWS BETWEEN 2 PRECEDING
AND 1 FOLLOWING) AND 2 FOLLOWING)
FROM data_points FROM data_points
ORDER BY time; ORDER BY time;
+----------+-------+------+ +----------+-------+------+
| time | value | sum | | time | value | sum |
+----------+-------+------+ +----------+-------+------+
| 10:00:00 | 2 | 7 | (2 + 5) | 10:00:00 | 2 | 11 | (2 + 5 + 4)
| 11:00:00 | 5 | 11 | (2 + 5 + 4) | 11:00:00 | 5 | 15 | (2 + 5 + 4 + 4)
| 12:00:00 | 4 | 13 | (5 + 4 + 4) | 12:00:00 | 4 | 16 | (2 + 5 + 4 + 4 + 1)
| 13:00:00 | 4 | 9 | (4 + 4 + 1) | 13:00:00 | 4 | 19 | (5 + 4 + 4 + 1 + 5)
| 14:00:00 | 1 | | | 14:00:00 | 1 | |
| 15:00:00 | 5 | | | 15:00:00 | 5 | |
| 15:00:00 | 2 | | | 15:00:00 | 2 | |
| 15:00:00 | 2 | | | 15:00:00 | 2 | |
+----------+-------+------+ +----------+-------+------+
What are window functions?
So how do frames work?
SELECT SELECT
time, value time, value
sum(value) OVER ( sum(value) OVER (
ORDER BY time ORDER BY time
ROWS BETWEEN 1 PRECEDING ROWS BETWEEN 2 PRECEDING
AND 1 FOLLOWING) AND 2 FOLLOWING)
FROM data_points FROM data_points
ORDER BY time; ORDER BY time;
SELECT SELECT
time, value time, value
sum(value) OVER ( sum(value) OVER (
ORDER BY time ORDER BY time
ROWS BETWEEN 1 PRECEDING ROWS BETWEEN 2 PRECEDING
AND 1 FOLLOWING) AND 2 FOLLOWING)
FROM data_points FROM data_points
ORDER BY time; ORDER BY time;
SELECT SELECT
time, value time, value
sum(value) OVER ( sum(value) OVER (
ORDER BY time ORDER BY time
ROWS BETWEEN 1 PRECEDING ROWS BETWEEN 2 PRECEDING
AND 1 FOLLOWING) AND 2 FOLLOWING)
FROM data_points FROM data_points
ORDER BY time; ORDER BY time;
+----------+-------+------+ +----------+-------+------+
| time | value | sum | | time | value | sum |
+----------+-------+------+ +----------+-------+------+
| 10:00:00 | 2 | 7 | (2 + 5) | 10:00:00 | 2 | 11 | (2 + 5 + 4)
| 11:00:00 | 5 | 11 | (2 + 5 + 4) | 11:00:00 | 5 | 15 | (2 + 5 + 4 + 4)
| 12:00:00 | 4 | 13 | (5 + 4 + 4) | 12:00:00 | 4 | 16 | (2 + 5 + 4 + 4 + 1)
| 13:00:00 | 4 | 9 | (4 + 4 + 1) | 13:00:00 | 4 | 19 | (5 + 4 + 4 + 1 + 5)
| 14:00:00 | 1 | 10 | (4 + 1 + 5) | 14:00:00 | 1 | 16 | (4 + 4 + 1 + 5 + 2)
| 15:00:00 | 5 | 8 | (1 + 5 + 2) | 15:00:00 | 5 | 14 | (4 + 1 + 5 + 2 + 2)
| 15:00:00 | 2 | 9 | (5 + 2 + 2) | 15:00:00 | 2 | 10 | (1 + 5 + 2 + 2)
| 15:00:00 | 2 | 4 | (2 + 2) | 15:00:00 | 2 | 9 | (5 + 2 + 2)
+----------+-------+------+ +----------+-------+------+
Scenario 1 - Regular SQL
Given a set of bank transactions,
compute the account balance after each transaction.
+---------------------+----------------+-------------+--------+
| timestamp | transaction_id | customer_id | amount |
+---------------------+----------------+-------------+--------+
| 2016-09-01 10:00:00 | 1 | 1 | 1000 |
| 2016-09-01 11:00:00 | 2 | 1 | -200 |
| 2016-09-01 12:00:00 | 3 | 1 | -600 |
| 2016-09-01 13:00:00 | 5 | 1 | 400 |
| 2016-09-01 12:10:00 | 4 | 2 | 300 |
| 2016-09-01 14:00:00 | 6 | 2 | 500 |
| 2016-09-01 15:00:00 | 7 | 2 | 400 |
+---------------------+----------------+-------------+--------+
Scenario 1 - Regular SQL
Given a set of bank transactions,
compute the account balance after each transaction.
+---------------------+----------------+-------------+--------+---------+
| timestamp | transaction_id | customer_id | amount | balance |
+---------------------+----------------+-------------+--------+---------+
| 2016-09-01 10:00:00 | 1 | 1 | 1000 | 1000 |
| 2016-09-01 11:00:00 | 2 | 1 | -200 | 800 |
| 2016-09-01 12:00:00 | 3 | 1 | -600 | 200 |
| 2016-09-01 13:00:00 | 5 | 1 | 400 | 600 |
| 2016-09-01 12:10:00 | 4 | 2 | 300 | 300 |
| 2016-09-01 14:00:00 | 6 | 2 | 500 | 800 |
| 2016-09-01 15:00:00 | 7 | 2 | 400 | 1200 |
+---------------------+----------------+-------------+--------+---------+
Scenario 1 - Window Functions
Given a set of bank transactions,
compute the account balance after each transaction.
+---------------------+----------------+-------------+--------+---------+
| timestamp | transaction_id | customer_id | amount | balance |
+---------------------+----------------+-------------+--------+---------+
| 2016-09-01 10:00:00 | 1 | 1 | 1000 | 1000 |
| 2016-09-01 11:00:00 | 2 | 1 | -200 | 800 |
| 2016-09-01 12:00:00 | 3 | 1 | -600 | 200 |
| 2016-09-01 13:00:00 | 5 | 1 | 400 | 600 |
| 2016-09-01 12:10:00 | 4 | 2 | 300 | 300 |
| 2016-09-01 14:00:00 | 6 | 2 | 500 | 800 |
| 2016-09-01 15:00:00 | 7 | 2 | 400 | 1200 |
+---------------------+----------------+-------------+--------+---------+
Scenario 1 - Performance
Given a set of bank transactions,
compute the account balance after each transaction.
■ “Top-N” queries
SELECT +---------+-------+----------+--------+
(SELECT count(*) + 1 | ranking | dept | name | salary |
+---------+-------+----------+--------+
FROM employee_salaries as t2
| 1 | Eng | Kristian | 3500 |
WHERE t1.name != t2.name and | 2 | Eng | Sergei | 3000 |
t1.dept = t2.dept and | 3 | Eng | Sami | 2800 |
t2.salary > t1.salary) | 4 | Eng | Arnold | 2500 |
AS ranking, | 5 | Eng | Scarlett | 2200 |
dept, name, salary | 1 | Sales | Bob | 500 |
FROM employee_salaries AS t1 | 2 | Sales | Jill | 400 |
| 3 | Sales | Lucy | 300 |
WHERE (SELECT count(*) | 3 | Sales | Tom | 300 |
FROM employee_salaries AS t2 | 5 | Sales | Axel | 250 |
WHERE t1.name != t2.name AND +---------+-------+----------+--------+
t1.dept = t2.dept AND
t2.salary > t1.salary) < 5
What if I want a “rank” column?
ORDER BY dept, salary DESC;
Scenario 2 - Window Functions
Retrieve the top 5 earners by department.
Contact me at:
[email protected]
[email protected]
Blog: vicentiu.ciorbaru.io