Model2.ipynb - Colab
Model2.ipynb - Colab
ipynb - Colab
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import fbeta_score, roc_auc_score, classification_report, confusion_matrix
train_path = "/content/train.csv"
test_path = "/content/test.csv"
import os
print(os.path.exists(train_path))
print(os.path.exists(test_path))
True
True
if not os.path.isfile(train_path):
raise FileNotFoundError(f"Train file not found at {train_path}")
if not os.path.isfile(test_path):
raise FileNotFoundError(f"Test file not found at {test_path}")
train_df = pd.read_csv(train_path)
test_df = pd.read_csv(test_path)
smoking_status stroke
count 2555 2554
unique 4 4
top never smoked 0
freq 945 2429
mean NaN NaN
std NaN NaN
min NaN NaN
25% NaN NaN
50% NaN NaN
75% NaN NaN
max NaN NaN
https://colab.research.google.com/drive/1UbZkmbbE6dt3FGmI0-hzfE9Rn7LMAa7M#scrollTo=UcGITmAB3D1r&printMode=true 2/11
2/23/25, 2:52 PM Model2.ipynb - Colab
bmi 0
smoking_status 0
stroke 1
dtype: int64
https://colab.research.google.com/drive/1UbZkmbbE6dt3FGmI0-hzfE9Rn7LMAa7M#scrollTo=UcGITmAB3D1r&printMode=true 3/11
2/23/25, 2:52 PM Model2.ipynb - Colab
# Capping extreme BMI values using Winsorization (1st and 99th percentile)
bmi_lower_cap = train_df['bmi'].quantile(0.01)
https://colab.research.google.com/drive/1UbZkmbbE6dt3FGmI0-hzfE9Rn7LMAa7M#scrollTo=UcGITmAB3D1r&printMode=true 4/11
2/23/25, 2:52 PM Model2.ipynb - Colab
bmi_upper_cap = train_df['bmi'].quantile(0.99)
train_df['bmi'] = np.clip(train_df['bmi'], bmi_lower_cap, bmi_upper_cap)
test_df['bmi'] = np.clip(test_df['bmi'], bmi_lower_cap, bmi_upper_cap)
# Checking Unique Values and Their Frequencies for Each Categorical Column
for feature in cat_features:
print(f"{feature} unique values count: {train_df[feature].nunique()}")
print(f"{feature} unique values: {list(train_df[feature].unique())}")
print(f"{feature} value counts:\n{train_df[feature].value_counts()}\n")
https://colab.research.google.com/drive/1UbZkmbbE6dt3FGmI0-hzfE9Rn7LMAa7M#scrollTo=UcGITmAB3D1r&printMode=true 5/11
2/23/25, 2:52 PM Model2.ipynb - Colab
https://colab.research.google.com/drive/1UbZkmbbE6dt3FGmI0-hzfE9Rn7LMAa7M#scrollTo=UcGITmAB3D1r&printMode=true 6/11
2/23/25, 2:52 PM Model2.ipynb - Colab
most_frequent_gender = train_df["gender"].mode()[0]
train_df["gender"] = train_df["gender"].replace("other", most_frequent_gender)
test_df["gender"] = test_df["gender"].replace("other", most_frequent_gender)
plt.figure(figsize=(10,6))
sns.heatmap(numeric_cols.corr(), annot=True, cmap="coolwarm", fmt=".2f", linewidths=0.5)
plt.title("Feature Correlation Matrix")
plt.show()
https://colab.research.google.com/drive/1UbZkmbbE6dt3FGmI0-hzfE9Rn7LMAa7M#scrollTo=UcGITmAB3D1r&printMode=true 7/11
2/23/25, 2:52 PM Model2.ipynb - Colab
Correlation Matrix:
scaler = StandardScaler()
scaled_train = pd.DataFrame(scaler.fit_transform(train_df[num_features]), columns=num_features)
scaled_test = pd.DataFrame(scaler.transform(test_df[num_features]), columns=num_features)
smoking_status_smokes
0 1.0
1 0.0
2 0.0
3 0.0
4 0.0
y_train_final = train_df["stroke"].astype(int)
# Train-Test Split
X_train, X_val, y_train, y_val = train_test_split(X_train_final, y_train_final, test_size=0.2, random_state=42, strati
print("Preprocessing complete.")
Preprocessing complete.
https://colab.research.google.com/drive/1UbZkmbbE6dt3FGmI0-hzfE9Rn7LMAa7M#scrollTo=UcGITmAB3D1r&printMode=true 9/11
2/23/25, 2:52 PM Model2.ipynb - Colab
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:33] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:33] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:33] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:33] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:33] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:33] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:33] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:34] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:34] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:34] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:34] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:34] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:34] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:35] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:35] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:35] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:35] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
https://colab.research.google.com/drive/1UbZkmbbE6dt3FGmI0-hzfE9Rn7LMAa7M#scrollTo=UcGITmAB3D1r&printMode=true 10/11
2/23/25, 2:52 PM Model2.ipynb - Colab
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:35] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:35] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:36] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:36] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:36] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:36] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:37] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:37] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:37] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:37] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:38] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:38] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:40] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:40] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:41] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:41] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:42] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
warnings.warn(smsg, UserWarning)
/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158: UserWarning: [12:48:42] WARNING: /workspace/src/learn
Parameters: { "use_label_encoder" } are not used.
https://colab.research.google.com/drive/1UbZkmbbE6dt3FGmI0-hzfE9Rn7LMAa7M#scrollTo=UcGITmAB3D1r&printMode=true 11/11