Tech Mahindra
SQL interview questions for a Data Engineer
(3-5 years of experience.)
---=================
-- 1. Find the top 3 cities with the highest sales per month.
(sales_table):
drop table ##sales
Create table ##sales( sale_id int, city varchar (50), sale_date date,
amount int)
insert into ##sales values
(1,'Mumbai','2024-01-10','5000'),
(2,'Delhi','2024-01-15','7000'),
(3,'Bangalore','2024-01-20','10000'),
(4,'Mangalore','2024-01-20','12000'),
(5,'Chennai','2024-02-05','3000'),
(6,'Mumbai','2024-02-08','4000'),
(7,'Patna','2024-02-08','5000'),
(7,'Mumbai','2024-03-08','5000')
with cte as
(
select city,FORMAT(sale_date,'yyyy-
MM')sale_month,SUM(amount)amt,
ROW_NUMBER()over(partition by FORMAT(sale_date,'yyyy-MM')
order by SUM(amount) desc) as rn
from ##sales
group by city,FORMAT(sale_date,'yyyy-MM')
)select * from cte where rn<=3
--2. Write an SQL query to calculate the running total of sales for
each city. (sales_data):
drop table ##sales
Create table ##sales( sale_id int, city varchar (50), sale_date date,
amount int)
insert into ##sales values
(1,'Mumbai','2024-01-10','5000'),
(2,'Delhi ','2024-01-15','7000'),
(3,'Mumbai','2024-01-20','3000'),
(4,'Delhi ','2024-02-05','6000'),
(5,'Mumbai','2024-02-08','8000')
select *,SUM(amount)over (partition by city order by
sale_date)calculatetotal from ##sales
--3. Find the second highest salary of employees. (employees):
drop table ##employees
create table ##employees(emp_id int, emp_name varchar (50),
salary int, department varchar (50))
insert into ##employees values
(1,'Ravi','70000','HR'),
(2,'Priya','90000','IT'),
(3,'Kunal','85000','Finance'),
(4,'Aisha','60000','IT'),
(5,'Rahul','95000','HR')
---method 1
with cte as
(
select * ,DENSE_RANK()over(order by salary desc)rn from
##employees
) select * from cte where rn=2
---method 2
SELECT TOP 1 * FROM ##employees WHERE Salary<(SELECT
MAX(SALARY) FROM ##employees) ORDER BY Salary DESC
---method 3
with cte as
(
SELECT TOP 2 * FROM ##employees order by SALARY desc
) select top 1 * from cte order by SALARY asc
---method 4 sub Query
select * from (
select * ,DENSE_RANK()over(order by salary desc)rn from
##employees
)aa where rn =2
-- 4. Find employees who have the same salary as someone in the
same department. (employee_salary):
drop table ##employees
create table ##employees(emp_id int, emp_name varchar (50),
salary int, department varchar (50))
insert into ##employees values
(1,'Neha','50000','HR'),
(2,'Ravi','70000','IT'),
(3,'Aman','50000','HR'),
(4,'Pooja','90000','IT'),
(5,'Karan','70000','IT')
--Method 1
with cte as
(
select *, dense_rank()over (partition by department order by
salary)rn from ##employees
)select * from cte where rn=1
---Method 2
SELECT e1.*
FROM ##employees e1
JOIN ##employees e2
ON e1.department = e2.department
AND e1.salary = e2.salary
AND e1.emp_id <> e2.emp_id
ORDER BY e1.department, e1.salary, e1.emp_id;
--5. Write an SQL query to find duplicate records in a table. (users):
create table ##users(users_id int, users_name varchar (10), email
varchar (50))
insert into ##users values
(1,'Sameer','
[email protected]'),
(2,'Anjali','
[email protected]'),
(3,'Sameer','
[email protected]'),
(4,'Rohan','
[email protected]'),
(5,'Rohan','
[email protected]')
with cte as
(
select *, ROW_NUMBER()over(partition by users_name,email order
by users_name,email)rn from ##users
) --select * from cte where rn>1
--delete from cte where rn>1
select * from cte where rn=1
-- 6. Write an SQL query to delete duplicate rows while keeping only
one unique record. (Same sample data as Question 5)
with cte as
(
select *, ROW_NUMBER()over(partition by users_name,email order
by users_name,email)rn from ##users
) delete from cte where rn>1
-- 7. Write an SQL query to pivot a table by months. Sample Data
(sales_data):
Create table ##sales_data(sale_id int, city varchar (50), sale_date
date, amount int)
insert into ##sales_data values
(1,'Mumbai','2024-01-10','5000'),
(2,'Delhi ','2024-02-15','7000'),
(3,'Mumbai','2024-01-20','3000'),
(4,'Delhi ','2024-03-05','6000'),
(5,'Mumbai','2024-02-08','8000')
with cte as
(
select * ,convert (varchar (3),datename(MONTH,sale_date))as mon
from ##sales_data
) select city,sum([jan]) as [jan],sum([Feb]) as [Feb],sum([Mar]) as
[Mar] from cte
pivot (sum(amount) for mon in([jan],[Feb],[Mar])) as pvt
group by city
--8. Find customers who placed at least 3 orders in the last 6
months. Sample Data (orders):
Create table ##orders(order_id int, customer_id int, order_date date,
amount int)
insert into ##orders values
(1,101,'2024-01-10',1000),
(2,102,'2024-02-15',2000),
(3,101,'2024-03-20',1500),
(4,103,'2024-04-05',2500),
(5,101,'2024-05-08',3000)
SELECT * FROM (
SELECT * ,COUNT(1)OVER(ORDER BY CUSTOMER_ID)RNK FROM
##ORDERS WHERE ORDER_DATE>=DATEADD(MONTH,-
6,GETDATE())
) AA WHERE RNK=3
----9. NORMALIZATION VS. DENORMALIZATION – WHAT ARE
THEY, AND WHEN SHOULD EACH BE USED IN A DATA PIPELINE?
FEATURE NORMALIZATION (OLTP) DENORMALIZATION (OLAP)
REDUCE REDUNDANCY, ENSURE IMPROVE READ/QUERY
GOAL INTEGRITY PERFORMANCE
MORE JOINS (COMPLEX FEWER JOINS (FASTER
JOINS QUERIES) QUERIES)
MORE STORAGE DUE TO
STORAGE LESS STORAGE REQUIRED REDUNDANCY
TRANSACTIONAL SYSTEMS ANALYTICAL SYSTEMS (DATA
USE CASE (BANKING, E-COMMERCE) WAREHOUSES, REPORTING)
FASTER UPDATES (LESS SLOWER UPDATES (MULTIPLE
UPDATE SPEED REDUNDANT DATA) COPIES OF DATA)
QUERY FASTER (PRE-AGGREGATED OR
PERFORMANCE SLOWER (DUE TO JOINS) REDUNDANT DATA)
*10. INDEXING IN SQL – EXPLAIN CLUSTERED VS. NON-CLUSTERED
INDEXES. HOW DO THEY IMPACT QUERY PERFORMANCE?
CLUSTERED INDEX:-DETERMINES THE PHYSICAL ORDER OFDATA
IN A TABLE. IT CHANGES THE WAY THE DATA IS STOREDON DISK
AND CAN BE CREATED ON ONLY ONE COLUMN. ATABLE CAN HAVE
ONLY ONE CLUSTERED INDEX.
NON-CLUSTERED INDEX:- DOES NOT AFFECT THE
PHYSICALORDER OF DATA IN A TABLE. IT IS STORED SEPARATELY
ANDCONTAINS A POINTER TO THE ACTUAL DATA. A TABLE
CANHAVE MULTIPLE NON-CLUSTERED INDEXES.