SH Assignment
SH Assignment
data = {
'n': np.arange(1, 21),
'z': [1.7, 6.26, 7.56, 7.92, 0.96, 2.47, 2.55, 0.28, 1.34, 0.71, 1.66, 2.
↪99, 8.71, 0.09, 0.62, 0.99, 10.27, 2.96, 5.54, 3.61],
'y': [1.3, 17.02, 19.74, 12.01, 0.66, 1.8, 15.91, 0.62, 2.15, 2.07, 4.68, 2.
↪74, 11.72, 0.24, 2.3, 0.52, 5.67, 3.17, 5.92, 5.03]
z = np.array(data['z'])
y = np.array(data['y'])
[2]: # Question 1
plt.hist(z, bins=np.arange(0, 15, 5), edgecolor='black', alpha=0.7)
plt.title('Histogram of z')
plt.xlabel('Value Ranges')
plt.ylabel('Frequency')
plt.grid(axis='y')
plt.show()
fraction = np.sum((z >= 5) & (z < 10)) / len(z)
print(f"Fraction of data with z-values between 5 and 10: {fraction:.2f}")
1
Fraction of data with z-values between 5 and 10: 0.25
[12]: # Question 2
z_sorted = np.sort(z)
y_sorted = np.sort(y)
z_cumulative = np.cumsum(np.ones_like(z_sorted)) / len(z_sorted)
y_cumulative = np.cumsum(np.ones_like(y_sorted)) / len(y_sorted)
2
5 0.99 0.30
6 1.34 0.35
7 1.66 0.40
8 1.70 0.45
9 2.47 0.50
10 2.55 0.55
11 2.96 0.60
12 2.99 0.65
13 3.61 0.70
14 5.54 0.75
15 6.26 0.80
16 7.56 0.85
17 7.92 0.90
18 8.71 0.95
19 10.27 1.00
[26]: # Question 3
def calculate_statistics(data):
mean = np.mean(data)
variance = np.var(data, ddof=1)
skewness = skew(data)
quantiles = np.quantile(data, [0.25, 0.5, 0.75])
iqr = quantiles[2] - quantiles[0]
return mean, variance, skewness, quantiles, quantiles[1], iqr
3
z_stats = calculate_statistics(z)
y_stats = calculate_statistics(y)
Statistics for z:
Mean: 3.46, Variance: 9.76, Skewness: 0.85
Quantiles: [0.9825 2.51 5.72 ], Median: 2.51, Interquantile Range: 4.74
Statistics for y:
Mean: 5.76, Variance: 36.94, Skewness: 1.14
Quantiles: [1.675 2.955 7.37 ], Median: 2.955, Interquantile Range: 5.70
[15]: # Question 4
plt.boxplot([z, y], labels=['z', 'y'], showmeans=True)
plt.title('Box-and-Whisker Plot of z and y')
plt.ylabel('Values')
plt.grid(axis='y')
plt.show()
C:\Users\satya\AppData\Local\Temp\ipykernel_9812\2455904157.py:2:
MatplotlibDeprecationWarning: The 'labels' parameter of boxplot() has been
renamed 'tick_labels' since Matplotlib 3.9; support for the old name will be
dropped in 3.11.
plt.boxplot([z, y], labels=['z', 'y'], showmeans=True)
4
[22]: # Question 5
z_mean = np.mean(z)
y_mean = np.mean(y)
[24]: # Question 6
critical_concentration = 5
site_area = 8000
fraction_below_critical = np.sum(z < critical_concentration) / len(z)
cleanup_area = fraction_below_critical * site_area
print(f"Approximate area of the site cleaned up: {cleanup_area:.2f} m²")
5
[27]: # Question 7
fraction = np.sum((z < 5) & (y < 10)) / len(z)
print(f"Fraction of data with z < 5 and y < 10: {fraction:.2f}")
[10]: # Question 8
fraction_z_less_5_or_y_less_10 = np.sum((z < 5) | (y < 10)) / len(z)
print(f"Fraction of data with z < 5 or y < 10: {fraction_z_less_5_or_y_less_10:.
↪2f}")