0% found this document useful (0 votes)
3 views

Tech Mahindra SQL Interview Questions for Data Engineer

The document contains SQL interview questions tailored for Data Engineers with 3-5 years of experience, covering various topics such as sales analysis, employee salary queries, duplicate record handling, and data normalization versus denormalization. It includes practical SQL queries for tasks like finding top cities by sales, calculating running totals, and identifying duplicate records. Additionally, it discusses indexing in SQL, explaining the differences between clustered and non-clustered indexes and their impact on query performance.

Uploaded by

Prasad Sangitrao
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
3 views

Tech Mahindra SQL Interview Questions for Data Engineer

The document contains SQL interview questions tailored for Data Engineers with 3-5 years of experience, covering various topics such as sales analysis, employee salary queries, duplicate record handling, and data normalization versus denormalization. It includes practical SQL queries for tasks like finding top cities by sales, calculating running totals, and identifying duplicate records. Additionally, it discusses indexing in SQL, explaining the differences between clustered and non-clustered indexes and their impact on query performance.

Uploaded by

Prasad Sangitrao
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 6

Tech Mahindra

SQL interview questions for a Data Engineer


(3-5 years of experience.)

---=================
-- 1. Find the top 3 cities with the highest sales per month.
(sales_table):
drop table ##sales
Create table ##sales( sale_id int, city varchar (50), sale_date date,
amount int)
insert into ##sales values
(1,'Mumbai','2024-01-10','5000'),
(2,'Delhi','2024-01-15','7000'),
(3,'Bangalore','2024-01-20','10000'),
(4,'Mangalore','2024-01-20','12000'),
(5,'Chennai','2024-02-05','3000'),
(6,'Mumbai','2024-02-08','4000'),
(7,'Patna','2024-02-08','5000'),
(7,'Mumbai','2024-03-08','5000')

with cte as
(
select city,FORMAT(sale_date,'yyyy-
MM')sale_month,SUM(amount)amt,
ROW_NUMBER()over(partition by FORMAT(sale_date,'yyyy-MM')
order by SUM(amount) desc) as rn
from ##sales
group by city,FORMAT(sale_date,'yyyy-MM')
)select * from cte where rn<=3

--2. Write an SQL query to calculate the running total of sales for
each city. (sales_data):
drop table ##sales
Create table ##sales( sale_id int, city varchar (50), sale_date date,
amount int)
insert into ##sales values
(1,'Mumbai','2024-01-10','5000'),
(2,'Delhi ','2024-01-15','7000'),
(3,'Mumbai','2024-01-20','3000'),
(4,'Delhi ','2024-02-05','6000'),
(5,'Mumbai','2024-02-08','8000')

select *,SUM(amount)over (partition by city order by


sale_date)calculatetotal from ##sales

--3. Find the second highest salary of employees. (employees):


drop table ##employees
create table ##employees(emp_id int, emp_name varchar (50),
salary int, department varchar (50))
insert into ##employees values
(1,'Ravi','70000','HR'),
(2,'Priya','90000','IT'),
(3,'Kunal','85000','Finance'),
(4,'Aisha','60000','IT'),
(5,'Rahul','95000','HR')
---method 1
with cte as
(
select * ,DENSE_RANK()over(order by salary desc)rn from
##employees
) select * from cte where rn=2

---method 2
SELECT TOP 1 * FROM ##employees WHERE Salary<(SELECT
MAX(SALARY) FROM ##employees) ORDER BY Salary DESC
---method 3
with cte as
(
SELECT TOP 2 * FROM ##employees order by SALARY desc
) select top 1 * from cte order by SALARY asc
---method 4 sub Query
select * from (
select * ,DENSE_RANK()over(order by salary desc)rn from
##employees
)aa where rn =2

-- 4. Find employees who have the same salary as someone in the


same department. (employee_salary):
drop table ##employees
create table ##employees(emp_id int, emp_name varchar (50),
salary int, department varchar (50))
insert into ##employees values
(1,'Neha','50000','HR'),
(2,'Ravi','70000','IT'),
(3,'Aman','50000','HR'),
(4,'Pooja','90000','IT'),
(5,'Karan','70000','IT')

--Method 1
with cte as
(
select *, dense_rank()over (partition by department order by
salary)rn from ##employees
)select * from cte where rn=1

---Method 2

SELECT e1.*
FROM ##employees e1
JOIN ##employees e2
ON e1.department = e2.department
AND e1.salary = e2.salary
AND e1.emp_id <> e2.emp_id
ORDER BY e1.department, e1.salary, e1.emp_id;

--5. Write an SQL query to find duplicate records in a table. (users):


create table ##users(users_id int, users_name varchar (10), email
varchar (50))
insert into ##users values
(1,'Sameer','[email protected]'),
(2,'Anjali','[email protected]'),
(3,'Sameer','[email protected]'),
(4,'Rohan','[email protected]'),
(5,'Rohan','[email protected]')
with cte as
(
select *, ROW_NUMBER()over(partition by users_name,email order
by users_name,email)rn from ##users
) --select * from cte where rn>1
--delete from cte where rn>1
select * from cte where rn=1

-- 6. Write an SQL query to delete duplicate rows while keeping only


one unique record. (Same sample data as Question 5)
with cte as
(
select *, ROW_NUMBER()over(partition by users_name,email order
by users_name,email)rn from ##users
) delete from cte where rn>1

-- 7. Write an SQL query to pivot a table by months. Sample Data


(sales_data):
Create table ##sales_data(sale_id int, city varchar (50), sale_date
date, amount int)
insert into ##sales_data values
(1,'Mumbai','2024-01-10','5000'),
(2,'Delhi ','2024-02-15','7000'),
(3,'Mumbai','2024-01-20','3000'),
(4,'Delhi ','2024-03-05','6000'),
(5,'Mumbai','2024-02-08','8000')

with cte as
(
select * ,convert (varchar (3),datename(MONTH,sale_date))as mon
from ##sales_data
) select city,sum([jan]) as [jan],sum([Feb]) as [Feb],sum([Mar]) as
[Mar] from cte
pivot (sum(amount) for mon in([jan],[Feb],[Mar])) as pvt
group by city

--8. Find customers who placed at least 3 orders in the last 6


months. Sample Data (orders):
Create table ##orders(order_id int, customer_id int, order_date date,
amount int)
insert into ##orders values
(1,101,'2024-01-10',1000),
(2,102,'2024-02-15',2000),
(3,101,'2024-03-20',1500),
(4,103,'2024-04-05',2500),
(5,101,'2024-05-08',3000)

SELECT * FROM (
SELECT * ,COUNT(1)OVER(ORDER BY CUSTOMER_ID)RNK FROM
##ORDERS WHERE ORDER_DATE>=DATEADD(MONTH,-
6,GETDATE())
) AA WHERE RNK=3
----9. NORMALIZATION VS. DENORMALIZATION – WHAT ARE
THEY, AND WHEN SHOULD EACH BE USED IN A DATA PIPELINE?

FEATURE NORMALIZATION (OLTP) DENORMALIZATION (OLAP)


REDUCE REDUNDANCY, ENSURE IMPROVE READ/QUERY
GOAL INTEGRITY PERFORMANCE
MORE JOINS (COMPLEX FEWER JOINS (FASTER
JOINS QUERIES) QUERIES)
MORE STORAGE DUE TO
STORAGE LESS STORAGE REQUIRED REDUNDANCY
TRANSACTIONAL SYSTEMS ANALYTICAL SYSTEMS (DATA
USE CASE (BANKING, E-COMMERCE) WAREHOUSES, REPORTING)
FASTER UPDATES (LESS SLOWER UPDATES (MULTIPLE
UPDATE SPEED REDUNDANT DATA) COPIES OF DATA)
QUERY FASTER (PRE-AGGREGATED OR
PERFORMANCE SLOWER (DUE TO JOINS) REDUNDANT DATA)

*10. INDEXING IN SQL – EXPLAIN CLUSTERED VS. NON-CLUSTERED


INDEXES. HOW DO THEY IMPACT QUERY PERFORMANCE?

CLUSTERED INDEX:-DETERMINES THE PHYSICAL ORDER OFDATA


IN A TABLE. IT CHANGES THE WAY THE DATA IS STOREDON DISK
AND CAN BE CREATED ON ONLY ONE COLUMN. ATABLE CAN HAVE
ONLY ONE CLUSTERED INDEX.

NON-CLUSTERED INDEX:- DOES NOT AFFECT THE


PHYSICALORDER OF DATA IN A TABLE. IT IS STORED SEPARATELY
ANDCONTAINS A POINTER TO THE ACTUAL DATA. A TABLE
CANHAVE MULTIPLE NON-CLUSTERED INDEXES.

You might also like