SQL1 Merged
SQL1 Merged
Find out which vendors primarily sell fresh products and which don’t. Add an
identifier column for the same.
vendor_id,
SELECT
vendor_name,
vendor_id,
vendor_type,
vendor_name,
CASE vendor_type,
WHEN lower(vendor_type) LIKE "%fresh%" IF(UPPER(vendor_type) LIKE "%FRESH%",
SELECT
customer_id,
market_date,
COUNT(*) AS num_orders
FROM `farmers_market.customer_purchases`
GROUP BY customer_id, market_date
ORDER BY market_date, customer_id
Calculate the total quantity purchased by each customer per market_date.
SELECT
customer_id,
market_date,
SUM(quantity) AS total_quantity
FROM `farmers_market.customer_purchases`
GROUP BY customer_id, market_date
ORDER BY market_date, customer_id
How many different kinds of products were purchased by each customer on each
market date?
SELECT
customer_id,
market_date,
COUNT(DISTINCT product_id) num_products
FROM `farmers_market.customer_purchases`
GROUP BY customer_id, market_date
ORDER BY market_date, customer_id
CASE & WHEN
Find out which vendors primarily sell fresh products and which don’t. Add an
identifier column for the same.
vendor_id,
SELECT
vendor_name,
vendor_id,
vendor_type,
vendor_name,
CASE vendor_type,
WHEN lower(vendor_type) LIKE "%fresh%" IF(UPPER(vendor_type) LIKE "%FRESH%",
SELECT
customer_id,
market_date,
COUNT(*) AS num_orders
FROM `farmers_market.customer_purchases`
GROUP BY customer_id, market_date
ORDER BY market_date, customer_id
Calculate the total quantity purchased by each customer per market_date.
SELECT
customer_id,
market_date,
SUM(quantity) AS total_quantity
FROM `farmers_market.customer_purchases`
GROUP BY customer_id, market_date
ORDER BY market_date, customer_id
How many different kinds of products were purchased by each customer on each
market date?
SELECT
customer_id,
market_date,
COUNT(DISTINCT product_id) num_products
FROM `farmers_market.customer_purchases`
GROUP BY customer_id, market_date
ORDER BY market_date, customer_id
Group By & Agg Queries
Calculate the total price paid by customer_id 3 per market_date.
SELECT
customer_id,
market_date,
SUM(quantity * cost_to_customer_per_qty) AS total_amt
FROM `farmers_market.customer_purchases`
WHERE customer_id = 3
GROUP BY market_date, customer_id
Determine how much each customer has paid to each vendor regardless of the date.
SELECT
customer_id,
vendor_id,
SUM(quantity * cost_to_customer_per_qty) AS total_amt
FROM `farmers_market.customer_purchases`
GROUP BY customer_id, vendor_id
Give me the least and most expensive product.
Least - MIN(col)
Highest - MAX(col)
SELECT
MIN(original_price) AS least_expensive,
MAX(original_price) AS most_expensive
FROM `farmers_market.vendor_inventory`
Least and most expensive price of each vendor.
SELECT
vendor_id,
MIN(original_price) AS least_expensive,
MAX(original_price) AS most_expensive
FROM `farmers_market.vendor_inventory`
GROUP BY vendor_id
Count how many products were on sale on each market date.
SELECT
market_date,
COUNT(product_id) AS num_products
FROM `farmers_market.vendor_inventory`
GROUP BY vendor_id
Count the number of different products brought to the market between
2019-04-03 and 2019-05-16 by each vendor.
SELECT
market_date,
COUNT(DISTINCT product_id) AS num_products
FROM `farmers_market.vendor_inventory`
WHERE market_date BETWEEN "2019-04-03" and "2019-05-16"
GROUP BY vendor_id
Along with count, calculate the average of the original price of a product per
vendor.
SELECT
vendor_id,
product_id,
AVG(original_price) AS avg_price
FROM `farmers_market.vendor_inventory`
WHERE market_date BETWEEN "2019-04-03" and "2019-05-16"
GROUP BY vendor_id, product_id
Filter out vendors who brought at least 100 items to the market over the
period - 2019-05-02 and 2019-05-16
SELECT
vendor_id,
SUM(quantity) AS total_quantity
FROM `farmers_market.vendor_inventory`
WHERE market_date BETWEEN "2019-05-02" and "2019-05-16"
GROUP BY vendor_id
HAVING total_quantity >= 100
Joins-1 Queries
List all the products along with their product category names.
SELECT
p.product_id,
p.product_name,
p.product_category_id,
pc.product_category_name
FROM `farmers_market.product` AS p
LEFT JOIN `farmers_market.product_category` AS pc
ON p.product_category_id = pc.product_category_id
Get a list of customers’ zip codes who made purchases on 2019-04-06
SELECT
DISTINCT
c.customer_id,
c.customer_zip
FROM `farmers_market.customer` AS c
INNER JOIN `farmers_market.customer_purchases` AS cp
ON c.customer_id = cp.customer_id
WHERE cp.market_date = "2019-04-06"
Find the customers from the database who have never made a purchase
from the market.
SELECT
c.customer_id
FROM `farmers_market.customer` AS c
LEFT JOIN `farmers_market.customer_purchases` AS cp
ON c.customer_id = cp.customer_id
WHERE cp.customer_id IS NULL
JOINS - 3
Find out all customers who have either not made any purchase or they have
deleted their accounts.
SELECT
c.customer_id,
"New Customer" AS customer_type
FROM `farmers_market.customer` AS c
LEFT JOIN `farmers_market.customer_purchases` AS cp
ON c.customer_id = cp.customer_id
WHERE cp.customer_id IS NULL
UNION DISTINCT
SELECT
cp.customer_id,
"Deleted Customer" AS type_of_customer
FROM `farmers_market.customer` AS c
RIGHT JOIN `farmers_market.customer_purchases` AS cp
ON c.customer_id = cp.customer_id
WHERE c.customer_id IS NULL
Find out all customers who have either not made any purchase or they have
deleted their accounts.
SELECT
c.customer_id AS new_customers,
cp.customer_id AS deleted_customers
FROM `farmers_market.customer` AS c
FULL JOIN `farmers_market.customer_purchases` AS cp
ON c.customer_id = cp.customer_id
WHERE cp.customer_id IS NULL OR c.customer_id IS NULL
Get details about all market booths and every vendor booth assignment for every
market date along with the vendor details.
SELECT
vba.market_date,
b.booth_number,
b.booth_type,
b.booth_price_level,
vba.vendor_id,
v.vendor_name
FROM `farmers_market.booth` AS b
LEFT JOIN `farmers_market.vba` AS vba
ON b.booth_number = vba.booth_number
LEFT JOIN `farmers_market.vendor` AS v
ON vba.vendor_id = v.vendor_id
Self Join - Display the name of the manager
SELECT
emp.employeeNumber,
emp.firstName AS employee_name,
mgr.firstName As manager_name
FROM employees AS emp
LEFT JOIN employees AS mgr
ON emp.reportsTo = mgr.employeeNumber
Window Functions - 1
Get the price of the most expensive item per vendor?
SELECT
vendor_id,
MAX(original_price) AS most_expensive_price
FROM `farmers_market.vendor_inventory`
GROUP BY vendor_id
Rank the products in each vendor’s inventory. Expensive products should get a
lower rank.
SELECT
vendor_id,
market_date,
product_id,
original_price,
ROW_NUMBER() OVER (PARTITION BY vendor_id ORDER BY original_price DESC) AS num_rankings,
RANK() OVER (PARTITION BY vendor_id ORDER BY original_price DESC) AS rnk,
DENSE_RANK() OVER (PARTITION BY vendor_id ORDER BY original_price DESC) AS dense_rnk
FROM `farmers_market.vendor_inventory`
Follow up: extract all rows where the ranking is 1 in the previous question.
SELECT *
FROM (
SELECT
vendor_id,
market_date,
product_id,
original_price,
ROW_NUMBER() OVER (PARTITION BY vendor_id ORDER BY original_price DESC) AS num_rankings,
RANK() OVER (PARTITION BY vendor_id ORDER BY original_price DESC) AS rnk,
DENSE_RANK() OVER (PARTITION BY vendor_id ORDER BY original_price DESC) AS dense_rnk
FROM `farmers_market.vendor_inventory` ) AS x
WHERE x.dense_rnk = 1
Being a vendor, you want to find which of your products were above the
average price on each market date.
SELECT *
FROM
(
SELECT
market_date,
vendor_id,
original_price,
ROUND(AVG(original_price) OVER (PARTITION BY market_date),2) AS avg_price
FROM `farmers_market.vendor_inventory`
ORDER BY market_date) AS x
WHERE x.original_price > x.avg_price
Count how many products each vendor brought to the market on each
date and display the count on each row.
SELECT
vendor_id,
market_date,
product_id,
COUNT(DISTINCT product_id) OVER (PARTITION BY market_date, vendor_id) AS count_of_products
FROM `farmers_market.vendor_inventory`
ORDER BY market_date, vendor_id
Window Functions - 2
SELECT
employee,
date,
sale,
SUM(sale) OVER(ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT
ROW) AS total_sales
FROM sales
SELECT
employee,
date,
sale,
AVG(sale) OVER(ORDER BY date ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS dma
FROM `farmers_market.sales`
ORDER BY date, employee
Display each vendor’s booth assignment for each market_date alongside
their previous booth assignment.
SELECT
market_date,
vendor_id,
booth_number,
LAG(booth_number, 1) OVER (PARTITION BY vendor_id ORDER BY market_date) AS prev_booth
FROM `farmers_market.vba`
Determine which vendors are new or changing booths that day, so we can contact and
ensure smooth booth setup.
Check it for Date: 2019-04-10
SELECT *
FROM
(SELECT
market_date,
vendor_id,
booth_number,
LAG(booth_number, 1) OVER (PARTITION BY vendor_id ORDER BY market_date) AS
prev_booth
FROM `farmers_market.vba`) AS x
WHERE x.market_date = "2019-04-10" AND
x.booth_number != x.prev_booth OR x.prev_booth IS NULL
Find out the total revenue on each date, and compare it with the previous date to
check whether it is higher or lower.
SELECT
market_date,
SUM(cost_to_customer_per_qty * quantity) AS revenue,
LAG(SUM(cost_to_customer_per_qty * quantity)) OVER (ORDER BY market_date) AS prev_rev
FROM `farmers_market.customer_purchases`
GROUP BY market_date
ORDER BY market_date
Display 3rd highest salary in each department across all rows.
SELECT *,
FROM employee
Date & Time Fns
From market_start_datetime, extract the following:
- Date,
- Time,EXTRACT(date from market_start_datetime) AS
-
- Day of month,
- Month of year,
- Year, SELECT
- Hour market_start_datetime,
- Minute EXTRACT(date from market_start_datetime) AS mkt_date,
SELECT
market_start_datetime,
DATE_ADD(market_start_datetime, INTERVAL 150 MINUTE),
DATE_SUB(market_start_datetime, INTERVAL 150 MINUTE)
FROM `farmers_market.datetime_demo`
Today is 17th July, find out how many orders were placed in the last 30 days.
SELECT
COUNT(DISTINCT order_id)
FROM orders
WHERE market_date BETWEEN DATE_SUB("2023-07-17", INTERVAL 30 DAY) AND
"2023-07-17"
What is the time period for which the data is recorded in the farmer’s
market dataset?
SELECT
MIN(market_date) AS first_prch_date,
MAX(market_date) AS last_prch_date,
DATE_DIFF(MAX(market_date), MIN(market_date), DAY)
FROM `farmers_market.customer_purchases`
Customer Profiling
- First purchase
- Most recent purchase
- How many times they have visited the market
SELECT
customer_id,
MIN(market_date) AS first_prch_date,
MAX(market_date) AS last_prch_date,
DATE_DIFF(MAX(market_date), MIN(market_date), DAY) AS customer_time_period,
COUNT(DISTINCT market_date) AS uniq_visits
FROM `farmers_market.customer_purchases`
GROUP BY customer_id
ORDER BY customer_id
Query Optimisation Tips & Tricks
Some uncommon yet important SQL query optimization tips, along with examples comparing
the regular approach with the optimized approach, each illustrated with a real-world example:
2. Minimize Subqueries:
- Regular Approach:
```sql
SELECT product_name, (SELECT AVG(price) FROM sales WHERE product_id =
products.id) AS avg_price
FROM products;
```
- Optimized Approach:
```sql
SELECT p.product_name, AVG(s.price) AS avg_price
FROM products p
JOIN sales s ON p.id = s.product_id
GROUP BY p.product_name;
```
Explanation: In the regular approach, we use a subquery to calculate the average price for
each product. This can be slow and inefficient, especially for large datasets. The optimized
approach uses a JOIN and GROUP BY to achieve the same result, resulting in better
performance.
3. Use EXISTS or JOIN instead of IN:
- Regular Approach:
```sql
SELECT order_id, order_date
FROM orders
WHERE customer_id IN (SELECT id FROM customers WHERE country = 'USA');
```
- Optimized Approach:
```sql
SELECT o.order_id, o.order_date
FROM orders o
WHERE EXISTS (SELECT 1 FROM customers c WHERE c.id = o.customer_id AND
c.country = 'USA');
```
Explanation: The regular approach uses the IN operator with a subquery, which can be
inefficient when the subquery returns a large result set. The optimized approach uses EXISTS
to check for the existence of a matching record, which is often faster.
Remember, optimization techniques may vary based on the database system and specific use
cases. Always analyze the query execution plans, profile query performance, and test different
optimization approaches to determine the most effective strategy for your environment.
JOINS
Some uncommon yet important JOINs tips in SQL queries to optimize their performance:
Remember to analyze query execution plans, profile query performance, and test different join
strategies to find the most efficient approach for your specific use case and database system.
Properly setting up JOINs is crucial for query optimization and can significantly impact the
overall performance of your SQL queries.
GROUP BY & Aggregations
Following are some tips for setting up GROUP BY clauses in SQL queries to optimize their
performance:
Remember to analyze query execution plans, profile query performance, and test different
GROUP BY strategies to find the most efficient approach for your specific use case and
database system. Properly setting up GROUP BY clauses is essential for query optimization
and can significantly impact the overall performance of your SQL queries.
WINDOW FUNCTIONS
Window functions and GROUP BY serve different purposes, but they can often achieve similar
results. Here are some tips on using window functions in comparison with GROUP BY and other
use cases:
When using window functions, consider the specific use case, the window frame, and the
desired result set. Window functions offer powerful capabilities to perform complex calculations
and aggregations without collapsing the data, making them suitable for various analytical tasks.
However, for simple aggregations and grouping, GROUP BY remains a suitable choice. Choose
the appropriate approach based on the query's complexity, performance requirements, and the
specific analysis needed for your data.
Query Optimisation
COUNT(*) vs COUNT(1)
SELECT
COUNT(*) AS num_of_rows
FROM bigquery-public-data.san_francisco.bikeshare_trips;
SELECT
COUNT(1) AS num_of_rows
FROM bigquery-public-data.san_francisco.bikeshare_trips
Tip 1: Only select columns that you really need
SELECT *
FROM bigquery-public-data.san_francisco.bikeshare_trips
SELECT
trip_id,
start_station_name,
end_station_name
FROM bigquery-public-data.san_francisco.bikeshare_trips
Always filter your data according to requirements
SELECT *
FROM bigquery-public-data.san_francisco.bikeshare_trips
WHERE EXTRACT(year from start_date) = 2015
Tip: Read lesser amount of data
How long bike trips usually are? Calculate the average duration of one-way bike trips in any one
of the cities in SF.
SELECT
start_station_name,
end_station_name,
AVG(duration_sec) AS avg_time
FROM bigquery-public-data.san_francisco.bikeshare_trips
WHERE start_station_name != end_station_name
GROUP BY start_station_name, end_station_name
Tip 4: Use GROUP BY instead of DISTINCT
Unique list of stations
SELECT
DISTINCT
start_station_name
FROM bigquery-public-data.san_francisco.bikeshare_trips
SELECT
start_station_name
FROM bigquery-public-data.san_francisco.bikeshare_trips
GROUP BY start_station_name
Tip 5: Order your JOINs from larger table to smaller tables
Find the number of bikes and docks currently available at all stations in SF so that proper
restocking can be done.
SELECT
t2.station_id,
t2.name,
t1.bikes_available,
t1.docks_available
FROM `bigquery-public-data.san_francisco.bikeshare_status` AS t1
JOIN `bigquery-public-data.san_francisco.bikeshare_stations` AS t2
ON t2.station_id = t1.station_id
WHERE t2.landmark = "San Francisco"
Business Case Solving
Tables to be Downloaded
1. Customers
2. Suppliers
3. Employees
4. Products
5. Shippers
6. Orders
7. Order_Details
Schema
Ques. Fetch the full name and hiring date of all Employees who work as
Sales Representatives.
SELECT
CONCAT(firstname, " ", lastname) AS full_name,
hiredate
FROM `cochin_traders.employees`
WHERE title = "Sales Representative"
Ques. Which of the products in our inventory need to be reordered?
Note: For now, just use the fields UnitsInStock and ReorderLevel, where UnitsInStock is less than the ReorderLevel, ignoring the fields
UnitsOnOrder and Discontinued.
SELECT
productid,
productname
FROM `cochin_traders.products`
WHERE unitsinstock <= reorderlevel
ORDER BY productid
Ques. Find and display the details of customers who have placed more than
5 orders.
SELECT
customerid,
contactname
FROM `cochin_traders.customers`
WHERE customerid IN (SELECT
customerid
FROM `cochin_traders.orders`
GROUP BY customerid
HAVING COUNT(DISTINCT orderid) > 5)
Ques: An employee of ours (Margaret Peacock, EmployeeID 4) has the record of
completing most orders. However, there are some customers who've never placed an
order with her. Show such customers.
SELECT
c.customerid,
c.contactname
FROM `cochin_traders.customers` AS c
LEFT JOIN `cochin_traders.orders` AS o
ON c.customerid = o.customerid AND o.employeeid != 4
Ques. Retrieve the top 5 best-selling products on the basis of the quantity ordered.
SELECT
productname,
SUM(od.quantity) AS total_qty
FROM `cochin_traders.products` AS p
JOIN `cochin_traders.order_details` AS od
ON p.productid = od.productid
GROUP BY p.productname
ORDER BY total_qty DESC
LIMIT 5
Ques. Analyze the monthly order count for the year 1997.
SELECT
EXTRACT(MONTH FROM orderdate) AS month,
COUNT(DISTINCT order_id) AS num_orders
FROM `cochin_traders.orders`
WHERE EXTRACT(YEAR FROM orderdate) = 1997
GROUP BY month
ORDER BY num_orders DESC
Ques: Calculate the difference in sales revenue for each month compared to
the previous month.
WITH monthly_rev AS (
SELECT
EXTRACT(MONTH FROM orderdate) AS month,
EXTRACT(YEAR FROM orderdate) AS year,
ROUND(SUM((od.unitprice * od.quantity) - (od.discount * (od.unitprice *
od.quantity) / 100)), 2) AS revenue
FROM `cochin_traders.orders` AS o
JOIN `cochin_traders.order_details` AS od
ON o.orderid = od.orderid
GROUP BY month, year
)
SELECT
*,
LAG(m.revenue) OVER (ORDER BY year, month) AS prev_mon_rev,
revenue - LAG(m.revenue) OVER (ORDER BY year, month) AS difference_in_revenue
FROM monthly_rev AS m
ORDER BY year, month
Ques: Calculate the percentage of total sales revenue for each product.
WITH total_rev AS (
SELECT
SUM(unitprice * quantity) AS rev
FROM `cochin_traders.order_details`
), product_rev AS (
SELECT
p.productname,
SUM(od.unitprice * od.quantity) AS product_rev
FROM `cochin_traders.products` AS p
JOIN `cochin_traders.order_details` AS od
ON p.productid = od.productid
GROUP BY p.productname
)
SELECT
p.productname,
p.product_rev,
(p.product_rev / t.rev) * 100 AS percentage_revenue
FROM total_rev AS t, product_rev AS p
ORDER BY percentage_revenue DESC