FDS Slips Solution
FDS Slips Solution
Q.2 A) Write a Python program to create a Pie plot to get the frequency of the three
species of the Iris data (Use iris.csv)
B) Write a Python program to view basic statistical details of the data.(Use wineequality-
red.csv)
import pandas as pd
wine_data = pd.read_csv('winequality-red.csv')
statistical_details wine_data.describe()
print(statistical_details)
SLIP-2 / SLIP-6
Q.2 A) Write a Python program for Handling Missing Value. Replace missing value of
salary, age column with mean of that column.(Use Data.csv file).
import pandas as pd
data pd.read_csv('Data.csv')
print("Original Data:")
print(data)
mean_salary = data['salary'].mean()
mean_age data['age'].mean()
data['salary').fillna(mean_salary, inplace=True)
data['age'].fillna(mean_age, inplace=True)
print("\nData after handling missing values:")
print(data)
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_csv('Datal.csv')
plt.figure(figsize=(10, 6))
plt.plot(data['name'], data['salary'), linestyle='dotted', marker='0')
plt.title('Name vs Salary')
plt.xlabel('Name')
plt.ylabel('Salary')
plt.xticks (rotation=45)
plt.show()
C) Download the heights and weights dataset and load the dataset froma given csv file
into a dataframe. Print the f irst, last 10 rows and random 20 rows also display shape of
the dataset.
import pandas as pd
df = pd.read_csv('height_weight.csv')
print("First 10 rows:")
print(df.head(10))
print("\nLast 10 rows:")
print(df.tail(10))
print("\nRandom 20 rows:")
print(df.sample(20))
print("\nShape of the dataset:")
print(df.shape)
SLIP-3 /SLIP-18
A) Write a Python program to create box plots to see how each feature i.e. Sepal Length,
Sepal Width, Petal Length, Petal Width are distributed across the three species. (Use
iris.csv dataset)
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
iris_data pd.read_csv('iris.csv')
plt.figure(figsize=(14, 8))
plt.subplots_adjust(wspace=0.5, hspace=0.5)
plt.subplot(2, 2, 1)
sns.boxplot(x='Species', y='SepalLengthCm', data=iris_data)
plt.title('Sepal Length Distribution')
plt.subplot(2, 2, 2)
sns.boxplot(x='Species', y='SepalWidthCm', data-iris_data)
plt.title('Sepal Width Distribution')
plt.subplot(2, 2, 3)
sns.boxplot(x='Species', y='PetalLengthCm', data=iris_data)
plt.title('Petal Length Distribution')
plt.subplot(2, 2, 4) sns.boxplot(x='Species', y='PetalWidthCm', data=iris_data)
plt.title('Petal Width Distribution')
plt.show()
B) Write a Python program to view basic statistical details of the data (Use Heights and
Weights Dataset)
import pandas as pd
df = pd.read_csv('height_weight.csv')
statistical_details = df.describe()
print (statistical_details)
SLIP-4 / SLIP-5
A) Generate a random array of 50 integers and display them using a line chart, scatter
plot, histogram and box plot. Apply appropriate color, labels and styling options.
import numpy as np
import matplotlib.pyplot as plt
np. random.seed (42)
random_array = np.random.randint(1, 100, 50)
plt. figure(figsize=(12, 4))
plt.subplot(1,4,1)
pit.plot(random_array,marker='o', color='blue')
plt.title('Line Chart')
plt.xlabel ('Index")
plt. ylabel ('Value')
plt. subplot(1, 4, 2)
plt. scatter (range(len(random_array)), random_array, color='green', marker=*)
plt. title( Scatter Plot')
plt. xlabel ('Index')
plt.ylabel( 'Value')
plt. subplot (1, 4, 3)
plt.hist(random_array, bins=10, color='orange', edgecolor='black')
plt. title( 'Histogram')
plt.xlabel( 'Value')
plt.ylabel ('Frequency')
plt. subplot(1, 4, 4)
plt.boxplot(random_array, vert=False, widths=0.7, patch_artist=True, boxprops=dict(facecolor='pink'))
plt. title(Box Plot')
plt. xlabel( 'Value')
plt.tight_layout()
plt. show()
B) Write a Python program to print the shape, number of rows-columns, data types,
feature names and the description of the data. (Use User_Data.csv)
import pandas as pd
df = pd.read_csv('user_data.csv')
print("Shape of the data:", df.shape)
print("Number of rows:", df.shape[0])
print("Number of columns:", df.shape[1])
print("\nData types:")
print(df.dtypes)
print("\nFeature names:")
print(df.columns)
print("\nDescription of the data:")
print(df.describe())
SLIP-7
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
df = pd.read_csv('data2.csv')
print("Original Dataset:")
print(df)
df_onehot = pd.get_dummies (df, columns=['Country'], prefix='Country')
print("\nDataset after OneHot encoding:")
print(df_onehot)
label_encoder LabelEncoder()
df['Purchased'] = label_encoder.fit_transform(df ['Purchased'])
print("\nDataset after Label encoding:")
print(df)
SLIP-8
Write a program in python to perform following task Standardizing Data (transform them
into a standard Gaussian distribution with a mean of 0 and a standard deviation of 1)
(Use winequality-red.csv)
import pandas as pd
from sklearn.preprocessing import StandardScaler
df pd.read_csv('winequality-red.csv')
print("Original Dataset:")
print(df.head())
features df.drop('quality', axis=1)
scaler StandardScaler()
features_standardized = scaler.fit_transform(features)
df_standardized pd. DataFrame (features_standardized, columns=features.columns)
df_standardized['quality'] = df ['quality']
print("\nDataset after Standardization:")
print(df_standardized.head())
SLIP-9
A) Generate a random array of 50 integers and display them using a line chart, scatter
plot. Apply appropriate color, labels and styling options……………………..
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(42)
random_array np.random.randint(1, 100, 50)
plt.figure(figsize=(12, 4))
plt.subplot(1, 4, 1)
plt.plot(random_array, marker='o', color='blue')
plt.title('Line Chart')
plt.xlabel('Index')
plt.ylabel('Value')
plt.subplot(1, 4, 2)
plt.scatter(range (len(random_array)), random_array, color='green', marker='^')
plt.title('Scatter Plot')
plt.xlabel('Index')
plt.ylabel('Value')
plt.subplot(1, 4, 3)
plt.hist(random_array, bins-10, color'orange', edgecolor='black')
plt.title('Histogram')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.subplot(1, 4, 4)
plt.boxplot(random_array, vert=False, widths=0.7, patch_artist=True, boxprops
dict(facecolor='pink')) plt.title('Box Plot')
plt.xlabel('Value')
plt.tight_layout()
plt.show()
B) Create two lists, one representing subject names and the other representing marks
obtained in those subjects. Display the data in a pie chart.
import pandas as pd
df = pd.read_csv('winequality-red.csv')
print("a) Describing the dataset:")
print(df.describe())
print("\nb) Shape of the dataset:")
print(df.shape)
print("\nc) Display first 3 rows from the dataset:")
print(df.head(3))
SLIP-10
A) Write a python program to Display column-wise mean, and median for SOCR-
HeightWeight dataset.
import pandas as pd
df= pd.read_csv('height_weight.csv')
print("Column-wise Mean:")
mean_values = df.mean()
print(mean_values)
print("\nColumn-wise Median:")
median_values = df.median()
print(median_values)
B) Write a python program to compute sum of Manhattan distance between all pairs of points .
import itertools
def manhattan_distance (point1, point2):
return abs (point1 [0] point2[0]) + abs (point1 [1] point2[1])
points = [(1, 2), (3, 4), (5, 6), (7, 8)]
total_distance sum(manhattan_distance (point1, point2) for point1, point2 in
itertools.combinations (points, 2))
print("Sum of Manhattan distance between all pairs of points:", total_distance)
SLIP-21
A) Import dataset “iris.csv”. Write a Python program to create a Bar plot to get the
frequency of the three species of the Iris data.
import pandas as pd
import matplotlib.pyplot as plt
iris_data=pd.read_csv('iris.csv')
species_count=iris_data['Species'].value_counts()
plt.figure(figsize=(8, 6))
species_counts.plot(kind='bar', color=['skyblue', 'lightgreen', 'coral'])
plt.title('Frequency of Iris Species')
plt.xlabel('Species')
plt.ylabel('Count')
plt.xticks (rotation=0)
plt.show()
B) Write a Python program to create a histogram of the three species of the Iris data.
import pandas as pd
import matplotlib.pyplot as plt
iris_data = pd.read_csv('iris.csv')
plt.figure(figsize=(10, 6))
for species in iris_data['Species'].unique():
subset=iris_data[iris_data['Species'] == species]
plt.hist(subset ['SepalLengthCm'], bins=20, alpha=0.5, label=species)
plt.title('Histogram of Sepal Length for Each Iris Species')
plt.xlabel('Sepal Length')
plt.ylabel('Frequency')
plt.legend()
plt.show()