Python Programs
Python Programs
1. Probability
a. Calculating the simple probabilities.
b. Applications of Probability distributions to real life problems.
# Simple probability
# Probability of rolling a 4 on a six-sided die
total_outcomes = 6
favorable_outcomes = 1 # Rolling a 4
probability_4 = favorable_outcomes / total_outcomes
print(f"Probability of rolling a 4: {probability_4}")
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm, poisson, binom, expon
# Normal Distribution - Quality Control example
# Generating and plotting a normal distribution
mean = 50
std_dev = 10
samples = np.random.normal(mean, std_dev, 1000)
plt.figure(figsize=(8, 6))
plt.hist(samples, bins=30, density=True, alpha=0.6, color='blue')
x = np.linspace(mean - 4*std_dev, mean + 4*std_dev, 100)
plt.plot(x, norm.pdf(x, mean, std_dev), 'r-', lw=2, label='Normal Distribution')
plt.title('Normal Distribution Example (Quality Control)')
plt.xlabel('Values')
plt.ylabel('Probability Density')
plt.legend()
plt.grid(True)
plt.show()
-------------------------------------------------------------------------------------------
P a g e 1|7
BCA V SEM DA Part-B LAB MANUAL(NEP)
OUTPUT
Probability of rolling a 4: 0.16666666666666666
-------------------------------------------------------------------------------------------
P a g e 2|7
BCA V SEM DA Part-B LAB MANUAL(NEP)
2. Test of Significance
a. T-Test: one sample, two independent samples and paired
b. ANOVA & Chi-Square Test.
import pandas as pd
from scipy import stats
# Load Titanic dataset
titanic_data = pd.read_csv('train.csv') # Replace 'train.csv' with your dataset file
# One Sample T-Test: Checking mean age against a hypothetical mean
hypothetical_mean_age = 30
ttest_one_sample = stats.ttest_1samp(titanic_data['Age'].dropna(),
hypothetical_mean_age)
print("One Sample T-Test:")
print("T-statistic:", ttest_one_sample.statistic)
print("p-value:", ttest_one_sample.pvalue)
-------------------------------------------------------------------------------------------
P a g e 3|7
BCA V SEM DA Part-B LAB MANUAL(NEP)
# Two Independent Samples T-Test: Comparing ages of male and female passengers
male_ages = titanic_data[titanic_data['Sex'] == 'male']['Age'].dropna()
female_ages = titanic_data[titanic_data['Sex'] == 'female']['Age'].dropna()
ttest_two_ind_samples = stats.ttest_ind(male_ages, female_ages)
print("\nTwo Independent Samples T-Test:")
print("T-statistic:", ttest_two_ind_samples.statistic)
print("p-value:", ttest_two_ind_samples.pvalue)
# Paired T-Test: Comparing fares before and after
before_fares = titanic_data['Fare'].dropna()
after_fares = before_fares * 1.2 # Assuming a 20% increase in fares
ttest_paired = stats.ttest_rel(before_fares, after_fares)
print("\nPaired T-Test:")
print("T-statistic:", ttest_paired.statistic)
print("p-value:", ttest_paired.pvalue)
# ANOVA Test: Impact of passenger class on fares
anova_result = stats.f_oneway(titanic_data[titanic_data['Pclass'] == 1]['Fare'].dropna(),
titanic_data[titanic_data['Pclass'] == 2]['Fare'].dropna(),
titanic_data[titanic_data['Pclass'] == 3]['Fare'].dropna())
print("\nANOVA Test Result:")
print("F-statistic:", anova_result.statistic)
print("p-value:", anova_result.pvalue)
# Chi-Square Test: Relationship between survival status and passenger class
chi2_table = pd.crosstab(titanic_data['Survived'], titanic_data['Pclass'])
chi2_result = stats.chi2_contingency(chi2_table)
print("\nChi-Square Test Result:")
print("Chi-Square statistic:", chi2_result[0])
print("p-value:", chi2_result[1])
OUTPUT
One Sample T-Test:
T-statistic: -0.5534583115970276
p-value: 0.5801231230388639
Paired T-Test:
T-statistic: -19.344277455944212
p-value: 7.255925461999273e-70
-------------------------------------------------------------------------------------------
P a g e 4|7
BCA V SEM DA Part-B LAB MANUAL(NEP)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
-------------------------------------------------------------------------------------------
P a g e 5|7
BCA V SEM DA Part-B LAB MANUAL(NEP)
plt.figure(figsize=(8, 4))
plt.scatter(X_test, y_test, color='black')
plt.plot(X_test, y_pred, color='blue', linewidth=3)
plt.title('Linear Regression Prediction')
plt.xlabel('X')
plt.ylabel('Y')
plt.grid(True)
plt.figure(figsize=(8, 6))
plt.contourf(xx, yy, Z, alpha=0.4)
plt.scatter(X_iris[:, 0], X_iris[:, 1], c=y_iris, s=20, edgecolor='k')
plt.title('Logistic Regression (Iris dataset)')
plt.xlabel('Sepal Length')
plt.ylabel('Sepal Width')
plt.grid(True)
plt.show()
-------------------------------------------------------------------------------------------
P a g e 6|7
BCA V SEM DA Part-B LAB MANUAL(NEP)
OUTPUT
Correlation Coefficient: 0.9529657473628446
Mean Squared Error: 2.6147980548680083
R-squared Score: 0.9287298556395622
-------------------------------------------------------------------------------------------
P a g e 7|7