0% found this document useful (0 votes)
26 views7 pages

Data Engineer (3-5 Years of Experience.) PDF

The document contains a series of SQL interview questions tailored for Data Engineers with 3-5 years of experience, focusing on various SQL queries and concepts. It includes practical examples such as finding top cities by sales, calculating running totals, identifying duplicate records, and discussing normalization versus denormalization. Additionally, it explains the differences between clustered and non-clustered indexes and their impact on query performance.

Uploaded by

Shabber Shaik
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
26 views7 pages

Data Engineer (3-5 Years of Experience.) PDF

The document contains a series of SQL interview questions tailored for Data Engineers with 3-5 years of experience, focusing on various SQL queries and concepts. It includes practical examples such as finding top cities by sales, calculating running totals, identifying duplicate records, and discussing normalization versus denormalization. Additionally, it explains the differences between clustered and non-clustered indexes and their impact on query performance.

Uploaded by

Shabber Shaik
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 7

Tech Mahindra SQL

interview questions for


a Data Engineer (3-5
years of experience.)

Aitolla Venkatesh
---================= -- 1. Find the top 3 cities with the highest
sales per month. (sales_table): drop table ##sales Create table
##sales( sale_id int, city varchar (50), sale_date date, amount int)
insert into ##sales values (1,'Mumbai','2024-01-10','5000'),
(2,'Delhi','2024-01-15','7000'), (3,'Bangalore','2024-01-20','10000'),
(4,'Mangalore','2024-01-20','12000'), (5,'Chennai','2024-02-
05','3000'), (6,'Mumbai','2024-02-08','4000'), (7,'Patna','2024-02-
08','5000'), (7,'Mumbai','2024-03-08','5000')
with cte as
(
select city,FORMAT(sale_date,'yyyy-
MM')sale_month,SUM(amount)amt,
ROW_NUMBER()over(partition by FORMAT(sale_date,'yyyy-MM')
order by SUM(amount) desc) as rn
from ##sales
group by city,FORMAT(sale_date,'yyyy-MM')
)select * from cte where rn<=3

--2. Write an SQL query to calculate the running total of sales for
each city. (sales_data):
drop table ##sales
Create table ##sales( sale_id int, city varchar (50), sale_date date,
amount int)
insert into ##sales values
(1,'Mumbai','2024-01-10','5000'),
(2,'Delhi ','2024-01-15','7000'),
(3,'Mumbai','2024-01-20','3000'),
(4,'Delhi ','2024-02-05','6000'),
(5,'Mumbai','2024-02-08','8000')
select *,SUM(amount)over (partition by city order by
sale_date)calculatetotal from ##sales

--3. Find the second highest salary of employees. (employees):


drop table ##employees
create table ##employees(emp_id int, emp_name varchar (50),
salary int, department varchar (50))
insert into ##employees values
(1,'Ravi','70000','HR'),
(2,'Priya','90000','IT'),
(3,'Kunal','85000','Finance'),
(4,'Aisha','60000','IT'),
(5,'Rahul','95000','HR')
---method 1
with cte as
(
select * ,DENSE_RANK()over(order by salary desc)rn from
##employees
) select * from cte where rn=2
---method 2
SELECT TOP 1 * FROM ##employees WHERE Salary<(SELECT
MAX(SALARY) FROM ##employees) ORDER BY Salary DESC
---method 3
with cte as ( SELECT TOP 2 * FROM ##employees order by
SALARY desc ) select top 1 * from cte order by SALARY asc ---
method 4 sub Query select * from ( select *
,DENSE_RANK()over(order by salary desc)rn from ##employees
)aa where rn =2
-- 4. Find employees who have the same salary as someone in the
same department. (employee_salary):
drop table ##employees
create table ##employees(emp_id int, emp_name varchar (50),
salary int, department varchar (50))
insert into ##employees values
(1,'Neha','50000','HR'),
(2,'Ravi','70000','IT'),
(3,'Aman','50000','HR'),
(4,'Pooja','90000','IT'),
(5,'Karan','70000','IT')

--Method 1
with cte as
(
select *, dense_rank()over (partition by department order by
salary)rn from ##employees
)select * from cte where rn=1
---Method 2

SELECT e1.*
FROM ##employees e1
JOIN ##employees e2
ON e1.department = e2.department
AND e1.salary = e2.salary
AND e1.emp_id <> e2.emp_id
ORDER BY e1.department, e1.salary, e1.emp_id;
--5. Write an SQL query to find duplicate records in a table. (users):
create table ##users(users_id int, users_name varchar (10), email
varchar (50))
insert into ##users values
(1,'Sameer','[email protected]'),
(2,'Anjali','[email protected]'),
(3,'Sameer','[email protected]'),
(4,'Rohan','[email protected]'),
(5,'Rohan','[email protected]')
with cte as
(
select *, ROW_NUMBER()over(partition by users_name,email order
by users_name,email)rn from ##users
) --select * from cte where rn>1
--delete from cte where rn>1
select * from cte where rn=1
-- 6. Write an SQL query to delete duplicate rows while keeping only
one unique record. (Same sample data as Question 5)
with cte as
(
select *, ROW_NUMBER()over(partition by users_name,email order
by users_name,email)rn from ##users
) delete from cte where rn>1

-- 7. Write an SQL query to pivot a table by months. Sample Data


(sales_data):
Create table ##sales_data(sale_id int, city varchar (50), sale_date
date, amount int)
insert into ##sales_data values
(1,'Mumbai','2024-01-10','5000'),
(2,'Delhi ','2024-02-15','7000'),
(3,'Mumbai','2024-01-20','3000'),
(4,'Delhi ','2024-03-05','6000'),
(5,'Mumbai','2024-02-08','8000')
with cte as
(
select * ,convert (varchar (3),datename(MONTH,sale_date))as mon
from ##sales_data
) select city,sum([jan]) as [jan],sum([Feb]) as [Feb],sum([Mar]) as
[Mar] from cte
pivot (sum(amount) for mon in([jan],[Feb],[Mar])) as pvt
group by city
--8. Find customers who placed at least 3 orders in the last 6
months. Sample Data (orders):
Create table ##orders(order_id int, customer_id int, order_date date,
amount int)
insert into ##orders values
(1,101,'2024-01-10',1000),
(2,102,'2024-02-15',2000),
(3,101,'2024-03-20',1500),
(4,103,'2024-04-05',2500),
(5,101,'2024-05-08',3000)
SELECT * FROM (
SELECT * ,COUNT(1)OVER(ORDER BY CUSTOMER_ID)RNK FROM
##ORDERS WHERE ORDER_DATE>=DATEADD(MONTH,-
6,GETDATE())
) AA WHERE RNK=3
----9. NORMALIZATION VS. DENORMALIZATION – WHAT ARE
THEY, AND WHEN SHOULD EACH BE USED IN A DATA PIPELINE?

FEATURE NORMALIZATION (OLTP) DENORMALIZATION (OLAP)


REDUCE REDUNDANCY, ENSURE IMPROVE READ/QUERY
GOAL INTEGRITY PERFORMANCE
MORE JOINS (COMPLEX FEWER JOINS (FASTER
JOINS QUERIES) QUERIES)
MORE STORAGE DUE TO
STORAGE LESS STORAGE REQUIRED REDUNDANCY
TRANSACTIONAL SYSTEMS ANALYTICAL SYSTEMS (DATA
USE CASE (BANKING, E-COMMERCE) WAREHOUSES, REPORTING)
FASTER UPDATES (LESS SLOWER UPDATES (MULTIPLE
UPDATE SPEED REDUNDANT DATA)
COPIES OF DATA)
QUERY FASTER (PRE-AGGREGATED OR
PERFORMANCE SLOWER (DUE TO JOINS)
REDUNDANT DATA)

*10. INDEXING IN SQL – EXPLAIN CLUSTERED VS. NON-CLUSTERED


INDEXES. HOW DO THEY IMPACT QUERY PERFORMANCE?
CLUSTERED INDEX:-DETERMINES THE PHYSICAL ORDER OFDATA
IN A TABLE. IT CHANGES THE WAY THE DATA IS STOREDON DISK
AND CAN BE CREATED ON ONLY ONE COLUMN. ATABLE CAN HAVE
ONLY ONE CLUSTERED INDEX.
NON-CLUSTERED INDEX:- DOES NOT AFFECT THE
PHYSICALORDER OF DATA IN A TABLE. IT IS STORED SEPARATELY
ANDCONTAINS A POINTER TO THE ACTUAL DATA. A TABLE
CANHAVE MULTIPLE NON-CLUSTERED INDEXES.

You might also like