DSC Project 442
DSC Project 442
CSV File:bank_system_data.csv
DESCRIPTIVE STATISTICS:
Here I have applied the descriptive statistics (mean, median, mode, IQR, variance,
standard deviation, and range) to the top 100 data from the csv file I uploaded.
CODE:
import pandas as pd
path="/content/drive/MyDrive/bank_system_data.csv"
df = pd.read_csv(path)
df.head(100)
try:
df = pd.read_csv('bank_system_data.csv')
display(df.head())
print(df.shape)
except FileNotFoundError:
print("Error: 'bank_system_data.csv' not found.")
df = None
except pd.errors.ParserError:
print("Error: Could not parse the CSV file.")
df = None
except Exception as e:
print(f"An unexpected error occurred: {e}")
df = None
import numpy as np
# Numerical features
# Descriptive statistics
for feature in numerical_features:
print(f"Descriptive statistics for {feature}:")
print(f"Mean: {np.mean(top_100_df[feature])}")
print(f"Median: {np.median(top_100_df[feature])}")
print(f"Mode: {top_100_df[feature].mode()[0]}")
print(f"IQR: {np.percentile(top_100_df[feature], 75) - np.percentile(top_100_df[feature],
25)}")
print(f"Variance: {np.var(top_100_df[feature])}")
print(f"Standard Deviation: {np.std(top_100_df[feature])}")
print(f"Range: {np.max(top_100_df[feature]) - np.min(top_100_df[feature])}")
print("-" * 20)
OUTPUT:
CODE:
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns
# Assuming 'df' is your DataFrame and 'Balance', 'Loan_Amount', 'Interest_Rate' are your
numerical features
numerical_features = ['Balance', 'Loan_Amount', 'Interest_Rate']
Args:
data: The pandas DataFrame containing the data.
feature: The name of the numerical feature to analyze.
"""
skewness = stats.skew(data[feature])
kurtosis = stats.kurtosis(data[feature])
# Interpretation of skewness
if skewness > 0:
print("Distribution is right-skewed (positive skew).")
elif skewness < 0:
print("Distribution is left-skewed (negative skew).")
else:
print("Distribution is approximately symmetrical.")
# Interpretation of kurtosis
if kurtosis > 0:
print("Distribution is leptokurtic (heavy-tailed).")
elif kurtosis < 0:
print("Distribution is platykurtic (light-tailed).")
else:
print("Distribution is mesokurtic (normal-tailed).")
# Display histogram
plt.figure(figsize=(8, 6))
sns.histplot(data[feature], kde=True)
plt.title(f"Distribution of {feature}")
plt.xlabel(feature)
plt.ylabel("Frequency")
plt.show()
print("-" * 30)
OUTPUT:
Analysis for Balance:
Skewness: -0.02
Kurtosis: -1.16
Distribution is left-skewed (negative skew).
Distribution is platykurtic (light-tailed).
------------------------------
Analysis for Loan_Amount:
Skewness: -0.08
Kurtosis: -1.21
Distribution is left-skewed (negative skew).
Distribution is platykurtic (light-tailed).
INFERENTIAL STATISTICS:
Here I have applied the python algorithm to represent LINEAR REGRESSION
graphically.
CODE:
import pandas as pd
from scipy import stats
import statsmodels.api as sm
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import numpy as np
OUTPUT:
—>Logistic Regression:
CODE:
OUTPUT: