Mltheory 2
Mltheory 2
_2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
data=pd.read_csv('powerconsumption_3.csv')
data
PowerConsumption_Zone2 PowerConsumption_Zone3
0 16128.87538 20240.96386
1 19375.07599 20131.08434
2 19006.68693 19668.43373
3 18361.09422 18899.27711
4 17872.34043 18442.40964
... ... ...
52411 26857.31820 14780.31212
52412 26124.57809 14428.81152
52413 25277.69254 13806.48259
52414 24692.23688 13512.60504
52415 24055.23167 13345.49820
# => The Central Limit Theorem (CLT) states that, regardless of the
original population distribution, the sampling distribution of the
sample mean
# approaches a normal distribution as the sample size increases,
provided the samples are independent and identically distributed. This
holds
# true even if the population itself is not normally distributed.
The theorem is fundamental in probability and statistics, as it
justifies
# the use of normal distribution in inferential statistics,
enabling hypothesis testing and confidence interval estimation.
# Q.2] Plot the distribution of energy consumption for all the zones
and check for normality.
sns.kdeplot(data['PowerConsumption_Zone1'],color='yellow',fill='True')
plt.title("kde distribution")
sample_size=50
total_sample=10000
sample=np.random.choice(data['PowerConsumption_Zone3'],
(total_sample,sample_size))
sample.shape
(10000, 50)
sample_mean1=np.mean(sample,axis=1)
sample_mean2=np.mean(sample,axis=0)
plt.figure(figsize=(10,6))
plt.subplot(1,2,1)
sns.kdeplot(sample_mean1,color='yellow',fill='True')
plt.title=("mean by first type")
plt.subplot(1,2,2)
sns.kdeplot(sample_mean2,color='blue',fill='True')
plt.title=("mean by second type")
plt.show()
plt.figure(figsize=(10,6))
plt.subplot(1,2,1)
sns.histplot(sample_mean1,color='red',fill='True')
plt.title=("mean by first type")
plt.subplot(1,2,2)
sns.histplot(sample_mean2,color='grey',fill='True')
plt.title=("mean by second type")
plt.show()
plt.subplot(1,2,1)
sns.boxplot(sample_mean1,color='pink',orient='h')
plt.title=("mean by first type")
plt.subplot(1,2,2)
sns.boxplot(sample_mean2,color='yellow',orient='h')
plt.title=("mean by second type")
plt.show()
#median