Python Code of Given Problem
SRB-30.073034
Problem 1:
#Mean and Median
import numpy as np
data = [12, 15, 11, 20, 14, 18, 19, 10]
# Mean
mean = np.mean(data)
print("Mean:", mean)
# Median
median = np.median(data)
print("Median:", median)
Problem 2:
#Mode
from scipy import stats
data = [5, 7, 8, 5, 10, 7, 8, 7, 10]
# Mode
mode = stats.mode(data)
print("Mode:", mode.mode[0])
Problem 3:
#Range and Interquartile Range (IQR)
import numpy as np
data = [3, 7, 8, 5, 12, 14, 21, 13, 18]
# Range
range_value = np.ptp(data)
print("Range:", range_value)
# Interquartile Range (IQR)
q1 = np.percentile(data, 25)
q3 = np.percentile(data, 75)
iqr = q3 - q1
print("IQR:", iqr)
Problem 4:
#Variance and Standard Deviation
import numpy as np
data = [4, 8, 6, 5, 3, 7, 10, 9]
# Variance
variance = np.var(data, ddof=1)
print("Variance:", variance)
# Standard Deviation
std_dev = np.std(data, ddof=1)
print("Standard Deviation:", std_dev)
Problem 5:
#Five-Number Summary
import numpy as np
data = [22, 29, 15, 31, 25, 18, 23, 20]
# Five-number summary
minimum = np.min(data)
q1 = np.percentile(data, 25)
median = np.median(data)
q3 = np.percentile(data, 75)
maximum = np.max(data)
print("Five-Number Summary:", [minimum, q1, median, q3, maximum])
Problem 7:
#Z-Score
import numpy as np
data = [30, 35, 40, 45, 50, 55, 60]
# Mean
mean = np.mean(data)
# Standard Deviation
std_dev = np.std(data, ddof=1)
# Z-Score for 45
z_score = (45 - mean) / std_dev
print("Z-Score for 45:", z_score)
Problem 8:
#Correlation
import numpy as np
X = [2, 4, 6, 8, 10]
Y = [1, 2, 3, 4, 5]
# Correlation Coefficient
correlation = np.corrcoef(X, Y)[0, 1]
print("Correlation Coefficient:", correlation)
Problem 9:
#Covariance
import numpy as np
X = [1, 3, 5, 7, 9]
Y = [2, 4, 6, 8, 10]
# Covariance
covariance = np.cov(X, Y)[0, 1]
print("Covariance:", covariance)
Problem 10:
#Normal Distribution
import scipy.stats as stats
mean = 170
std_dev = 10
value = 180
# Z-Score for 180 cm
z_score = (value - mean) / std_dev
# Percentage of students taller than 180 cm
percentile = stats.norm.cdf(z_score)
percentage_taller = (1 - percentile) * 100
print("Percentage of students taller than 180 cm:", percentage_taller)
Plotting
Problem 3: Range and Interquartile Range (IQR)
# Box plot to visualize the range and interquartile range.
import numpy as np
import matplotlib.pyplot as plt
data = [3, 7, 8, 5, 12, 14, 21, 13, 18]
# Plotting a box plot
plt.boxplot(data, vert=False)
plt.title("Box Plot of the Dataset")
plt.xlabel("Value")
plt.show()
Problem 4: Variance and Standard Deviation
#A histogram with a line showing the mean and standard deviation.
import numpy as np
import matplotlib.pyplot as plt
data = [4, 8, 6, 5, 3, 7, 10, 9]
# Mean and standard deviation
mean = np.mean(data)
std_dev = np.std(data, ddof=1)
# Plotting a histogram
plt.hist(data, bins=8, edgecolor='black', alpha=0.7)
plt.axvline(mean, color='r', linestyle='dashed', linewidth=1,
label=f'Mean: {mean}')
plt.axvline(mean + std_dev, color='g', linestyle='dashed', linewidth=1,
label=f'Standard Deviation: {std_dev}')
plt.axvline(mean - std_dev, color='g', linestyle='dashed', linewidth=1)
plt.legend()
plt.title("Histogram of the Dataset with Mean and Standard Deviation")
plt.xlabel("Value")
plt.ylabel("Frequency")
plt.show()
Problem 7: Z-Score
#Visualize the normal distribution and mark the Z-score.
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
data = [30, 35, 40, 45, 50, 55, 60]
# Mean and standard deviation
mean = np.mean(data)
std_dev = np.std(data, ddof=1)
# Generate data for plotting the normal distribution
x = np.linspace(min(data), max(data), 100)
y = stats.norm.pdf(x, mean, std_dev)
# Z-Score for 45
z_score = (45 - mean) / std_dev
# Plotting the normal distribution
plt.plot(x, y, label='Normal Distribution')
plt.axvline(45, color='r', linestyle='dashed', linewidth=1, label=f'Z-
Score for 45: {z_score:.2f}')
plt.legend()
plt.title("Normal Distribution with Z-Score for 45")
plt.xlabel("Value")
plt.ylabel("Density")
plt.show()
Problem 8: Correlation
#Create a scatter plot to visualize the correlation between two variables.
import numpy as np
import matplotlib.pyplot as plt
X = [2, 4, 6, 8, 10]
Y = [1, 2, 3, 4, 5]
# Correlation Coefficient
correlation = np.corrcoef(X, Y)[0, 1]
# Scatter plot
plt.scatter(X, Y)
plt.title(f"Scatter Plot of X and Y (Correlation: {correlation:.2f})")
plt.xlabel("X")
plt.ylabel("Y")
plt.show()
Extra 1:
#Python example to calculate and visualize correlation using a scatter plot:
import numpy as np
import matplotlib.pyplot as plt
# Example data
X = [2, 4, 6, 8, 10]
Y = [1, 2, 3, 4, 5]
# Calculate Pearson correlation coefficient
correlation = np.corrcoef(X, Y)[0, 1]
print("Pearson Correlation Coefficient:", correlation)
# Scatter plot
plt.scatter(X, Y)
plt.title(f"Scatter Plot of X and Y (Correlation: {correlation:.2f})")
plt.xlabel("X")
plt.ylabel("Y")
plt.show()
Extra 2:
#Standardize a dataset in Python using the StandardScaler from scikit-learn:
import numpy as np
from sklearn.preprocessing import StandardScaler
# Example dataset
data = np.array([[70], [75], [80], [85], [90]])
# Standardization
scaler = StandardScaler()
standardized_data = scaler.fit_transform(data)
print("Original Data:", data.flatten())
print("Standardized Data:", standardized_data.flatten())
Extra 3:
#Calculate and interpret covariance in Python:
import numpy as np
# Example data
X = [1, 2, 3, 4, 5]
Y = [2, 4, 6, 8, 10]
# Calculate covariance matrix
cov_matrix = np.cov(X, Y)
# Covariance between X and Y
covariance = cov_matrix[0, 1]
print("Covariance matrix:\n", cov_matrix)
print("Covariance between X and Y:", covariance)